├── .coveragerc ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.rst ├── docs ├── Makefile ├── _bin │ └── tikz2svg ├── _static │ ├── feedforward_layers.svg │ ├── feedforward_layers.tikz │ ├── feedforward_neuron.svg │ ├── feedforward_neuron.tikz │ ├── mnist-digits-small.png │ ├── mnist-digits.png │ └── style-tweaks.css ├── _templates │ └── gitwidgets.html ├── api │ ├── activations.rst │ ├── layers.rst │ ├── losses.rst │ ├── models.rst │ ├── reference.rst │ ├── regularizers.rst │ ├── trainers.rst │ └── utils.rst ├── conf.py ├── examples │ ├── mnist-classifier.rst │ └── recurrent-memory.rst ├── guide.rst ├── index.rst ├── make.bat └── requirements.txt ├── examples ├── cifar-autoencoder.py ├── lstm-chime.py ├── mnist-autoencoder.py ├── mnist-classifier.py ├── mnist-convolution.py ├── mnist-deep-autoencoder.py ├── mnist-deep-classifier.py ├── mnist-rica.py ├── recurrent-addition.py ├── recurrent-autoencoder.py ├── recurrent-memory.py ├── recurrent-sinusoid.py ├── recurrent-text.py ├── utils.py ├── weighted-classification.py └── xor-classifier.py ├── scripts └── theanets-char-rnn ├── setup.cfg ├── setup.py ├── test ├── activations_test.py ├── convolution_test.py ├── feedforward_test.py ├── graph_test.py ├── layers_test.py ├── losses_test.py ├── main_test.py ├── recurrent_test.py ├── regularizers_test.py ├── sparse_test.py ├── trainer_test.py ├── util.py └── util_test.py └── theanets ├── __init__.py ├── activations.py ├── convolution.py ├── feedforward.py ├── graph.py ├── layers ├── __init__.py ├── base.py ├── convolution.py ├── feedforward.py └── recurrent.py ├── losses.py ├── main.py ├── recurrent.py ├── regularizers.py ├── trainer.py └── util.py /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmjohns3/theanets/79db9f878ef2071f2f576a1cf5d43a752a55894a/.coveragerc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Downloaded files for examples 2 | mnist.pkl.gz 3 | hf.py 4 | 5 | *.py[cod] 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Packages 11 | *.egg 12 | *.egg-info 13 | dist 14 | build 15 | eggs 16 | parts 17 | bin 18 | var 19 | sdist 20 | develop-eggs 21 | .installed.cfg 22 | lib 23 | lib64 24 | 25 | # Installer logs 26 | pip-log.txt 27 | 28 | # Unit test / coverage reports 29 | .coverage 30 | .tox 31 | nosetests.xml 32 | 33 | # Translations 34 | *.mo 35 | 36 | # Mr Developer 37 | .mr.developer.cfg 38 | .project 39 | .pydevproject 40 | 41 | # Sphinx 42 | docs/_build 43 | docs/api/generated 44 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | cache: apt 2 | sudo: false 3 | language: python 4 | python: 5 | - "2.7" 6 | - "3.4" 7 | addons: 8 | apt: 9 | packages: 10 | - libatlas-dev 11 | - libatlas-base-dev 12 | - liblapack-dev 13 | - gfortran 14 | before_install: 15 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 16 | - bash miniconda.sh -b -p $HOME/miniconda 17 | - export PATH="$HOME/miniconda/bin:$PATH" 18 | - conda update --yes conda 19 | install: 20 | - conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy 21 | - pip install pytest-pep8 pytest-cov python-coveralls 22 | - python setup.py develop 23 | script: 24 | - THEANO_FLAGS=floatX=float32 py.test -v --pep8 --cov=theanets --cov-report=term-missing 25 | after_success: 26 | - coveralls 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2012-2015 lmjohns3 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " zip to make standalone HTML files and zip them up" 23 | @echo " dirhtml to make HTML files named index.html in directories" 24 | @echo " singlehtml to make a single large HTML file" 25 | @echo " pickle to make pickle files" 26 | @echo " json to make JSON files" 27 | @echo " htmlhelp to make HTML files and a HTML help project" 28 | @echo " qthelp to make HTML files and a qthelp project" 29 | @echo " devhelp to make HTML files and a Devhelp project" 30 | @echo " epub to make an epub" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " text to make text files" 34 | @echo " man to make manual pages" 35 | @echo " texinfo to make Texinfo files" 36 | @echo " info to make Texinfo files and run them through makeinfo" 37 | @echo " gettext to make PO message catalogs" 38 | @echo " changes to make an overview of all changed/added/deprecated items" 39 | @echo " linkcheck to check all external links for integrity" 40 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 41 | 42 | clean: 43 | -rm -rf $(BUILDDIR)/* 44 | -rm docs.zip 45 | 46 | zip: html 47 | cd $(BUILDDIR)/html && zip -r docs.zip . && mv docs.zip ../.. 48 | 49 | html: 50 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 53 | 54 | dirhtml: 55 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 58 | 59 | singlehtml: 60 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 61 | @echo 62 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 63 | 64 | pickle: 65 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 66 | @echo 67 | @echo "Build finished; now you can process the pickle files." 68 | 69 | json: 70 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 71 | @echo 72 | @echo "Build finished; now you can process the JSON files." 73 | 74 | htmlhelp: 75 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 76 | @echo 77 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 78 | ".hhp project file in $(BUILDDIR)/htmlhelp." 79 | 80 | qthelp: 81 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 82 | @echo 83 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 84 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 85 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/theanets.qhcp" 86 | @echo "To view the help file:" 87 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/theanets.qhc" 88 | 89 | devhelp: 90 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 91 | @echo 92 | @echo "Build finished." 93 | @echo "To view the help file:" 94 | @echo "# mkdir -p $$HOME/.local/share/devhelp/theanets" 95 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/theanets" 96 | @echo "# devhelp" 97 | 98 | epub: 99 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 100 | @echo 101 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 102 | 103 | latex: 104 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 105 | @echo 106 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 107 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 108 | "(use \`make latexpdf' here to do that automatically)." 109 | 110 | latexpdf: 111 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 112 | @echo "Running LaTeX files through pdflatex..." 113 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 114 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 115 | 116 | text: 117 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 118 | @echo 119 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 120 | 121 | man: 122 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 123 | @echo 124 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 125 | 126 | texinfo: 127 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 128 | @echo 129 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 130 | @echo "Run \`make' in that directory to run these through makeinfo" \ 131 | "(use \`make info' here to do that automatically)." 132 | 133 | info: 134 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 135 | @echo "Running Texinfo files through makeinfo..." 136 | make -C $(BUILDDIR)/texinfo info 137 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 138 | 139 | gettext: 140 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 141 | @echo 142 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 143 | 144 | changes: 145 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 146 | @echo 147 | @echo "The overview file is in $(BUILDDIR)/changes." 148 | 149 | linkcheck: 150 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 151 | @echo 152 | @echo "Link check complete; look for any errors in the above output " \ 153 | "or in $(BUILDDIR)/linkcheck/output.txt." 154 | 155 | doctest: 156 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 157 | @echo "Testing of doctests in the sources finished, look at the " \ 158 | "results in $(BUILDDIR)/doctest/output.txt." 159 | -------------------------------------------------------------------------------- /docs/_bin/tikz2svg: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # author: github.com/jbenet 4 | # license: MIT 5 | # 6 | # tikz2svg: convert tikz input into svg 7 | # depends on: 8 | # - pdflatex: comes with your tex dist 9 | # - pdf2svg: brew install pdf2svg 10 | 11 | import fileinput 12 | import os 13 | import shutil 14 | import subprocess 15 | import sys 16 | import tempfile 17 | 18 | PDFLATEX = 'pdflatex --shell-escape -file-line-error -interaction=nonstopmode --' 19 | PDF2SVG = 'pdf2svg texput.pdf out.svg' 20 | 21 | LATEX = r''' 22 | \documentclass[border=2bp]{standalone} 23 | \usepackage{tikz} 24 | \usepackage{pgfplots} 25 | \usetikzlibrary{arrows} 26 | \begin{document} 27 | \begingroup 28 | \tikzset{every picture/.style={scale=1}} 29 | \begin{tikzpicture}%(content)s\end{tikzpicture} 30 | \endgroup 31 | \end{document} 32 | ''' 33 | 34 | def run(cmd, stdin=None, exit_on_error=True): 35 | CECI = subprocess.PIPE 36 | p = subprocess.Popen(cmd, shell=True, stdin=CECI, stdout=CECI, stderr=CECI) 37 | if stdin: 38 | p.stdin.write(stdin) 39 | p.stdin.close() 40 | p.wait() 41 | if p.returncode != 0 and exit_on_error: 42 | print('>', cmd) 43 | print('Error.') 44 | print('-' * 20, 'STDIN') 45 | print(stdin) 46 | print('-' * 20, 'STDOUT') 47 | print(p.stdout.read()) 48 | print('-' * 20, 'STDERR') 49 | print(p.stderr.read()) 50 | sys.exit(p.returncode) 51 | return p.stdout.read() 52 | 53 | 54 | def tikz2tex(tikz): 55 | return LATEX % dict(content=tikz) 56 | 57 | def tex2pdf(tex): 58 | with open('figure.tex', 'w') as h: 59 | h.write(tex) 60 | return run(PDFLATEX.split(' '), stdin=tex.encode('utf8')) 61 | 62 | def pdf2svg(pdf): 63 | run(PDF2SVG) 64 | with open('out.svg') as f: 65 | return f.read() 66 | 67 | def tikz2svg(tikz): 68 | return pdf2svg(tex2pdf(tikz2tex(tikz))) 69 | 70 | 71 | if __name__ == '__main__': 72 | # move to tmp because latex litters :( 73 | tmp = tempfile.mkdtemp() 74 | cwd = os.getcwd() 75 | os.chdir(tmp) 76 | print(tikz2svg(''.join(fileinput.input()))) 77 | os.chdir(cwd) 78 | shutil.rmtree(tmp) 79 | -------------------------------------------------------------------------------- /docs/_static/feedforward_layers.tikz: -------------------------------------------------------------------------------- 1 | [thick,->,>=stealth',rectangle,minimum size=10mm,node distance=25mm,rounded corners=3mm] 2 | \node (dots) at (0, 0) {$\dots$}; 3 | \node[draw] (h1) [left of=dots] {Layer 1} edge (dots); 4 | \node[draw] (input) [left of=h1] {Input} edge (h1); 5 | \node[draw] (hkm1) [right of=dots] {Layer $k-1$} edge[<-] (dots); 6 | \node[draw] (output) [right of=hkm1] {Output} edge[<-] (hkm1); 7 | -------------------------------------------------------------------------------- /docs/_static/feedforward_neuron.tikz: -------------------------------------------------------------------------------- 1 | [thick,->,>=stealth',circle,minimum size=10mm,node distance=10mm,below,near start] 2 | \node[draw] (z) at (0, 0) {$\sum$}; 3 | \node[draw] (x) at (20mm, 1.5mm) {$z_i^k$} edge[<-] (z); 4 | \node[draw] (b) at (-30mm, 0) {$z_j^{k-1}$} edge node {$w^k_{ji}$} (z); 5 | \node (adots) [above of=b] {$\vdots$}; 6 | \node[draw] (a) [above of=adots] {$z_1^{k-1}$} edge node {$w^k_{1i}$} (z); 7 | \node (cdots) [below of=b] {$\vdots$}; 8 | \node[draw] (c) [below of=cdots] {$z_{n_{k-1}}^{k-1}$} edge node [midway] {$w^k_{n_{k-1}i}$} (z); 9 | \node[draw] (bias) at (0, -20mm) {$b^k_i$} edge (z); 10 | -------------------------------------------------------------------------------- /docs/_static/mnist-digits-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmjohns3/theanets/79db9f878ef2071f2f576a1cf5d43a752a55894a/docs/_static/mnist-digits-small.png -------------------------------------------------------------------------------- /docs/_static/mnist-digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmjohns3/theanets/79db9f878ef2071f2f576a1cf5d43a752a55894a/docs/_static/mnist-digits.png -------------------------------------------------------------------------------- /docs/_static/style-tweaks.css: -------------------------------------------------------------------------------- 1 | a, a:visited { color: #258; } 2 | a tt, a:visited tt, a:active tt { color: #258; } 3 | 4 | .banana { float: right; max-width: 45%; } 5 | .banana img { width: 100%; } 6 | 7 | pre { font-size: 0.9rem; line-height: 1.25; } 8 | span.pre { background: #eee; font-size: 0.95rem; padding: 0.1rem 0.2rem; } 9 | 10 | a.internal span.pre { 11 | background: inherit; 12 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; 13 | font-size: inherit; 14 | padding: inherit; 15 | } 16 | 17 | th.field-name { background: #ffd; } 18 | 19 | dl.method dt { background: #def; } 20 | dl.attribute dt { background: #efd; } 21 | dl.classmethod dt { background: #fed; } 22 | 23 | .rubric { font-size: 2rem; font-weight: bold; } 24 | 25 | .sphinxsidebar ul + ul:before { content: 'Examples'; font-weight: bold; } 26 | .sphinxsidebar ul + ul + ul:before { content: 'API'; } 27 | -------------------------------------------------------------------------------- /docs/_templates/gitwidgets.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | 6 | 7 |
8 |
9 | -------------------------------------------------------------------------------- /docs/api/activations.rst: -------------------------------------------------------------------------------- 1 | .. _activations: 2 | 3 | ==================== 4 | Activation Functions 5 | ==================== 6 | 7 | An activation function (sometimes also called a transfer function) specifies how 8 | the final output of a layer is computed from the weighted sums of the inputs. 9 | 10 | By default, hidden layers in ``theanets`` use a rectified linear activation 11 | function: :math:`g(z) = \max(0, z)`. 12 | 13 | Output layers in :class:`theanets.Regressor ` 14 | and :class:`theanets.Autoencoder ` models use 15 | linear activations (i.e., the output is just the weighted sum of the inputs from 16 | the previous layer: :math:`g(z) = z`), and the output layer in 17 | :class:`theanets.Classifier ` models uses a 18 | softmax activation: :math:`g(z) = \exp(z) / \sum\exp(z)`. 19 | 20 | To specify a different activation function for a layer, include an activation 21 | key chosen from the table below, or :ref:`create a custom activation 22 | `. As described in :ref:`guide-creating-specifying-layers`, 23 | the activation key can be included in your model specification either using the 24 | ``activation`` keyword argument in a layer dictionary, or by including the key 25 | in a tuple with the layer size: 26 | 27 | .. code:: python 28 | 29 | net = theanets.Regressor([10, (10, 'tanh'), 10]) 30 | 31 | The activations that ``theanets`` provides are: 32 | 33 | ========= ============================ =============================================== 34 | Key Description :math:`g(z) =` 35 | ========= ============================ =============================================== 36 | linear linear :math:`z` 37 | sigmoid logistic sigmoid :math:`(1 + \exp(-z))^{-1}` 38 | logistic logistic sigmoid :math:`(1 + \exp(-z))^{-1}` 39 | tanh hyperbolic tangent :math:`\tanh(z)` 40 | softplus smooth relu approximation :math:`\log(1 + \exp(z))` 41 | softmax categorical distribution :math:`\exp(z) / \sum\exp(z)` 42 | relu rectified linear :math:`\max(0, z)` 43 | rect:min truncation :math:`\min(1, z)` 44 | rect:max rectification :math:`\max(0, z)` 45 | rect:minmax truncation and rectification :math:`\max(0, \min(1, z))` 46 | norm:mean mean-normalization :math:`z - \bar{z}` 47 | norm:max max-normalization :math:`z / \max |z|` 48 | norm:std variance-normalization :math:`z / \mathbb{E}[(z-\bar{z})^2]` 49 | norm:z z-score normalization :math:`(z-\bar{z}) / \mathbb{E}[(z-\bar{z})^2]` 50 | prelu_ relu with parametric leak :math:`\max(0, z) - \max(0, -rz)` 51 | lgrelu_ relu with leak and gain :math:`\max(0, gz) - \max(0, -rz)` 52 | maxout_ piecewise linear :math:`\max_i m_i z` 53 | ========= ============================ =============================================== 54 | 55 | .. _prelu: generated/theanets.activations.Prelu.html 56 | .. _lgrelu: generated/theanets.activations.LGrelu.html 57 | .. _maxout: generated/theanets.activations.Maxout.html 58 | 59 | Composition 60 | =========== 61 | 62 | Activation functions can also be composed by concatenating multiple function 63 | names togather using a ``+``. For example, to create a layer that uses a 64 | batch-normalized hyperbolic tangent activation: 65 | 66 | .. code:: python 67 | 68 | net = theanets.Regressor([10, (10, 'tanh+norm:z'), 10]) 69 | 70 | Just like function composition, the order of the components matters! Unlike the 71 | notation for mathematical function composition, the functions will be applied 72 | from left-to-right. 73 | 74 | .. _activations-custom: 75 | 76 | Custom Activations 77 | ================== 78 | 79 | To define a custom activation, create a subclass of :class:`theanets.Activation 80 | `, and implement the ``__call__`` method to 81 | make the class instance callable. The callable will be given one argument, the 82 | array of layer outputs to activate. 83 | 84 | .. code:: python 85 | 86 | class ThresholdedLinear(theanets.Activation): 87 | def __call__(self, x): 88 | return x * (x > 1) 89 | 90 | This example activation returns 0 if a layer output is less than 1, or the 91 | output value itself otherwise. In effect it is a linear activation for "large" 92 | outputs (i.e., greater than 1) and zero otherwise. To use it in a model, give 93 | the name of the activation: 94 | 95 | .. code:: python 96 | 97 | net = theanets.Regressor([10, (10, 'thresholdedlinear'), 10]) 98 | -------------------------------------------------------------------------------- /docs/api/layers.rst: -------------------------------------------------------------------------------- 1 | .. _layers: 2 | 3 | ====== 4 | Layers 5 | ====== 6 | 7 | .. image:: ../_static/feedforward_neuron.svg 8 | 9 | In a standard feedforward neural network layer, each node :math:`i` in layer 10 | :math:`k` receives inputs from all nodes in layer :math:`k-1`, then transforms 11 | the weighted sum of these inputs: 12 | 13 | .. math:: 14 | z_i^k = \sigma\left( b_i^k + \sum_{j=1}^{n_{k-1}} w^k_{ji} z_j^{k-1} \right) 15 | 16 | where :math:`\sigma: \mathbb{R} \to \mathbb{R}` is an :ref:`activation function 17 | `. 18 | 19 | In addition to standard feedforward layers, other types of layers are also 20 | commonly used: 21 | 22 | - For recurrent models, :mod:`recurrent layers ` 23 | permit a cycle in the computation graph that depends on a previous time step. 24 | 25 | - For models that process images, :mod:`convolution layers 26 | ` are common. 27 | 28 | - For some types of autoencoder models, it is common to :class:`tie layer weights to 29 | another layer `. 30 | 31 | .. _layers-available: 32 | 33 | Available Layers 34 | ================ 35 | 36 | .. automodule:: theanets.layers.base 37 | :no-members: 38 | :no-inherited-members: 39 | 40 | .. autosummary:: 41 | :toctree: generated/ 42 | 43 | Layer 44 | Input 45 | Concatenate 46 | Flatten 47 | Product 48 | Reshape 49 | 50 | Feedforward 51 | ----------- 52 | 53 | .. automodule:: theanets.layers.feedforward 54 | :no-members: 55 | :no-inherited-members: 56 | 57 | .. autosummary:: 58 | :toctree: generated/ 59 | 60 | Classifier 61 | Feedforward 62 | Tied 63 | 64 | Convolution 65 | ----------- 66 | 67 | .. automodule:: theanets.layers.convolution 68 | :no-members: 69 | :no-inherited-members: 70 | 71 | .. autosummary:: 72 | :toctree: generated/ 73 | 74 | Conv1 75 | Conv2 76 | Pool1 77 | Pool2 78 | 79 | Recurrent 80 | --------- 81 | 82 | .. automodule:: theanets.layers.recurrent 83 | :no-members: 84 | :no-inherited-members: 85 | 86 | .. autosummary:: 87 | :toctree: generated/ 88 | 89 | RNN 90 | RRNN 91 | MUT1 92 | GRU 93 | LSTM 94 | MRNN 95 | SCRN 96 | Clockwork 97 | Bidirectional 98 | 99 | .. _layers-attributes: 100 | 101 | Layer Attributes 102 | ================ 103 | 104 | Now that we've seen how to specify values for the attributes of each layer in 105 | your model, we'll look at the available attributes that can be customized. For 106 | many of these settings, you'll want to use a dictionary (or create a 107 | :class:`theanets.Layer ` instance yourself) to 108 | specify non-default values. 109 | 110 | - ``size``: The number of "neurons" in the layer. This value must be specified 111 | by the modeler when creating the layer. It can be specified by providing an 112 | integer, or as a tuple that contains an integer. 113 | 114 | - ``form``: A string specifying the type of layer to use (see above). This 115 | defaults to "feedforward" but can be the name of any existing 116 | :class:`theanets.Layer ` subclass (including 117 | :ref:`layers-custom` that you have defined). 118 | 119 | - ``name``: A string name for the layer. If this isn't provided when creating a 120 | layer, the layer will be assigned a default name. The default names for the 121 | first and last layers in a network are ``'in'`` and ``'out'`` respectively, 122 | and the layers in between are assigned the name "hidN" where N is the number 123 | of existing layers. 124 | 125 | If you create a layer instance manually, the default name is ``'layerN'`` 126 | where N is the number of existing layers. 127 | 128 | - ``activation``: A string describing the :ref:`activation function 129 | ` to use for the layer. This defaults to ``'relu'``. 130 | 131 | - ``inputs``: An integer or dictionary describing the sizes of the inputs that 132 | this layer expects. This is normally optional and defaults to the size of the 133 | preceding layer in a chain-like model. However, providing a dictionary here 134 | permits arbitrary layer interconnections. See :ref:`guide-advanced-graphs` for 135 | more details. 136 | 137 | - ``mean``: A float specifying the mean of the initial parameter values to use 138 | in the layer. Defaults to 0. This value applies to all parameters in the model 139 | that don't have mean values specified for them directly. 140 | 141 | - ``mean_ABC``: A float specifying the mean of the initial parameter values to 142 | use in the layer's ``'ABC'`` parameter. Defaults to 0. This can be used to 143 | specify the mean of the initial values used for a specific parameter in the 144 | model. 145 | 146 | - ``std``: A float specifying the standard deviation of the initial parameter 147 | values to use in the layer. Defaults to 1. This value applies to all 148 | parameters in the model that don't have standard deviations specified 149 | directly. 150 | 151 | - ``std_ABC``: A float specifying the standard deviation of the initial 152 | parameter values to use in the layer's ``'ABC'`` parameter. Defaults to 1. 153 | This can be used to specify the standard deviation of the initial values used 154 | for a specific parameter in the model. 155 | 156 | - ``sparsity``: A float giving the proportion of parameter values in the layer 157 | that should be initialized to zero. Nonzero values in the parameters will be 158 | drawn from a Gaussian with the specified mean and standard deviation as above, 159 | and then an appropriate number of these parameter values will randomly be 160 | reset to zero to make the parameter "sparse." 161 | 162 | - ``sparsity_ABC``: A float or vector of floats used to initialize the 163 | parameters in the layer's ``'ABC'`` parameter. This can be used to set the 164 | initial sparsity level for a particular parameter in the layer. 165 | 166 | - ``diagonal``: A float or vector of floats used to initialize the parameters in 167 | the layer. If this is provided, weight matrices in the layer will be 168 | initialized to all zeros, with this value or values placed along the diagonal. 169 | 170 | - ``diagonal_ABC``: A float or vector of floats used to initialize the 171 | parameters in the layer's ``'ABC'`` parameter. If this is provided, the 172 | relevant weight matrix in the layer will be initialized to all zeros, with 173 | this value or values placed along the diagonal. 174 | 175 | - ``rng``: An integer or ``numpy`` random number generator. If specified the 176 | given random number generator will be used to create the initial values for 177 | the parameters in the layer. This can be useful for repeatable runs of a 178 | model. 179 | 180 | In addition to these configuration values, each layer can also be provided with 181 | keyword arguments specific to that layer. For example, the :class:`MRNN 182 | ` recurrent layer type requires a ``factors`` 183 | argument, and the :class:`Conv1 ` 1D 184 | convolutional layer requires a ``filter_size`` argument. 185 | 186 | .. _layers-custom: 187 | 188 | Custom Layers 189 | ============= 190 | 191 | Layers are the real workhorse in ``theanets``; custom layers can be created to 192 | do all sorts of fun stuff. To create a custom layer, just create a subclass of 193 | :class:`theanets.Layer ` and give it the 194 | functionality you want. 195 | 196 | As a very simple example, let's suppose you wanted to create a normal 197 | feedforward layer but did not want to include a bias term: 198 | 199 | .. code:: python 200 | 201 | import theanets 202 | import theano.tensor as TT 203 | 204 | class NoBias(theanets.Layer): 205 | def transform(self, inputs): 206 | return TT.dot(inputs, self.find('w')) 207 | 208 | def setup(self): 209 | self.add_weights('w', nin=self.input_size, nout=self.output_size) 210 | 211 | Once you've set up your new layer class, it will automatically be registered and 212 | available in :func:`theanets.Layer.build ` 213 | using the name of your class: 214 | 215 | .. code:: python 216 | 217 | layer = theanets.Layer.build('nobias', size=4) 218 | 219 | or, while creating a model: 220 | 221 | .. code:: python 222 | 223 | net = theanets.Autoencoder( 224 | layers=(4, (3, 'nobias', 'linear'), (4, 'tied', 'linear')), 225 | ) 226 | 227 | This example shows how fast it is to create a PCA-like model that will learn the 228 | subspace of your dataset that spans the most variance---the same subspace 229 | spanned by the principal components. 230 | -------------------------------------------------------------------------------- /docs/api/losses.rst: -------------------------------------------------------------------------------- 1 | .. _losses: 2 | 3 | ============== 4 | Loss Functions 5 | ============== 6 | 7 | A loss function is used to optimize the parameter values in a neural network 8 | model. Loss functions map a set of parameter values for the network onto a 9 | scalar value that indicates how well those parameter accomplish the task the 10 | network is intended to do. 11 | 12 | There are several common loss functions provided by ``theanets``. These losses 13 | often measure the :class:`squared ` or 14 | :class:`absolute ` error between a network's 15 | output and some target or desired output. Other loss functions are designed 16 | specifically for classification models; the :class:`cross-entropy 17 | ` is a common loss designed to minimize the 18 | distance between the network's distribution over class labels and the 19 | distribution that the dataset defines. 20 | 21 | :ref:`models` in ``theanets`` have at least one loss to optimize during 22 | training. There are default losses for each of the built-in model types, but you 23 | can often override these defaults just by providing a non-default value for the 24 | ``loss`` keyword argument when creating your model. For example, to create a 25 | regression model with a mean absolute error loss: 26 | 27 | .. code:: python 28 | 29 | net = theanets.Regressor([10, 20, 3], loss='mae') 30 | 31 | This will create the regression model with the specified loss. 32 | 33 | .. _losses-predefined: 34 | 35 | Predefined Losses 36 | ================= 37 | 38 | .. automodule:: theanets.losses 39 | :no-members: 40 | :no-inherited-members: 41 | 42 | .. autosummary:: 43 | :toctree: generated/ 44 | 45 | Loss 46 | CrossEntropy 47 | GaussianLogLikelihood 48 | Hinge 49 | KullbackLeiblerDivergence 50 | MaximumMeanDiscrepancy 51 | MeanAbsoluteError 52 | MeanSquaredError 53 | 54 | .. _losses-multiple: 55 | 56 | Multiple Losses 57 | =============== 58 | 59 | A ``theanets`` model can actually have more than one loss that it attempts to 60 | optimize simultaneously, and these losses can change between successive calls to 61 | :func:`train() `. In fact, a model has a 62 | ``losses`` attribute that's just a list of :class:`theanets.Loss 63 | ` instances; these losses are weighted by a ``weight`` 64 | attribute, then summed and combined with any applicable :ref:`regularizers 65 | ` during each call to ``train()``. 66 | 67 | Let's say that you want to optimize a model using both the mean absolute and the 68 | mean squared error. You could first create a regular regression model: 69 | 70 | .. code:: python 71 | 72 | net = theanets.Regressor([10, 20, 3]) 73 | 74 | and then add a new loss to the model: 75 | 76 | .. code:: python 77 | 78 | net.add_loss('mse') 79 | 80 | Then, when you call: 81 | 82 | .. code:: python 83 | 84 | net.train(...) 85 | 86 | the model will attempt to minimize the sum of the two losses. 87 | 88 | You can specify the relative weight of the two losses by manipulating the 89 | ``weight`` attribute of each loss instance. For instance, if you want the MAE 90 | loss to be twice as strong as the MSE loss: 91 | 92 | .. code:: python 93 | 94 | net.losses[1].weight = 2 95 | net.train(...) 96 | 97 | Finally, if you want to reset the loss to the standard MSE: 98 | 99 | .. code:: python 100 | 101 | net.set_loss('mse', weight=1) 102 | 103 | (Here we've also shown how to specify the weight of the loss when adding or 104 | setting it to the model.) 105 | 106 | .. _losses-weighted: 107 | 108 | Using Weighted Targets 109 | ====================== 110 | 111 | By default, the network models available in ``theanets`` treat all inputs as 112 | equal when computing the loss for the model. For example, a regression model 113 | treats an error of 0.1 in component 2 of the output just the same as an error of 114 | 0.1 in component 3, and each example of a minibatch is treated with equal 115 | importance when training a classifier. 116 | 117 | However, there are times when all inputs to a neural network model are not to be 118 | treated equally. This is especially evident in recurrent models: sometimes, the 119 | inputs to a recurrent network might not contain the same number of time steps, 120 | but because the inputs are presented to the model using a rectangular minibatch 121 | array, all inputs must somehow be made to have the same size. One way to address 122 | this would be to cut off all inputs at the length of the shortest input, but 123 | then the network is not exposed to all input/output pairs during training. 124 | 125 | Weighted targets can be used for any model in ``theanets``. For example, an 126 | :class:`autoencoder ` could use an array of 127 | weights containing zeros and ones to solve a matrix completion task, where the 128 | input array contains some "unknown" values. In such a case, the network is 129 | required to reproduce the known values exactly (so these could be presented to 130 | the model with weight 1), while filling in the unknowns with statistically 131 | reasonable values (which could be presented to the model during training with 132 | weight 0). 133 | 134 | As another example, suppose a :class:`classifier 135 | ` model is being trained in a binary 136 | classification task where one of the classes---say, class A---is only present 137 | 0.1% of the time. In such a case, the network can achieve 99.9% accuracy by 138 | always predicting class B, so during training it might be important to ensure 139 | that errors in predicting A are "amplified" when computing the loss. You could 140 | provide a large weight for training examples in class A to encourage the model 141 | not to miss these examples. 142 | 143 | All of these cases are possible to model in ``theanets``; just include 144 | ``weighted=True`` when you create your model: 145 | 146 | .. code:: python 147 | 148 | net = theanets.recurrent.Autoencoder([3, (10, 'rnn'), 3], weighted=True) 149 | 150 | When training a weighted model, the training and validation datasets require an 151 | additional component: an array of floating-point values with the same shape as 152 | the expected output of the model. For example, a non-recurrent Classifier model 153 | would require a weight vector with each minibatch, of the same shape as the 154 | labels array, so that the training and validation datasets would each have three 155 | pieces: ``sample``, ``label``, and ``weight``. Each value in the weight array is 156 | used as the weight for the corresponding error when computing the loss. 157 | 158 | .. _losses-custom: 159 | 160 | Custom Losses 161 | ============= 162 | 163 | It's pretty straightforward to create models in ``theanets`` that use different 164 | losses from the predefined :class:`theanets.Classifier 165 | ` and :class:`theanets.Autoencoder 166 | ` and :class:`theanets.Regressor 167 | ` models. (The classifier uses categorical 168 | cross-entropy (XE) as its default loss, and the other two both use mean squared 169 | error, MSE.) 170 | 171 | To define a model with a new loss, just create a new :class:`theanets.Loss 172 | ` subclass and specify its name when you create your 173 | model. For example, to create a regression model that uses a step function 174 | averaged over all of the model inputs: 175 | 176 | .. code:: python 177 | 178 | class Step(theanets.Loss): 179 | def __call__(self, outputs): 180 | return (outputs[self.output_name] > 0).mean() 181 | 182 | net = theanets.Regressor([5, 6, 7], loss='step') 183 | 184 | Your loss function implementation must return a Theano expression that reflects 185 | the loss for your model. If you wish to make your loss work with weighted 186 | outputs, you will also need to include a case for having weights: 187 | 188 | .. code:: python 189 | 190 | class Step(theanets.Loss): 191 | def __call__(self, outputs): 192 | step = outputs[self.output_name] > 0 193 | if self._weights: 194 | return (self._weights * step).sum() / self._weights.sum() 195 | else: 196 | return step.mean() 197 | -------------------------------------------------------------------------------- /docs/api/models.rst: -------------------------------------------------------------------------------- 1 | .. _models: 2 | 3 | ====== 4 | Models 5 | ====== 6 | 7 | There are three major types of neural network models, each defined primarily by 8 | the :ref:`loss function ` that the model attempts to optimize. While 9 | other types of models are certainly possible, ``theanets`` only tries to handle 10 | the common cases with built-in model classes. If you want to define a new type 11 | of model, see :ref:`models-custom`. 12 | 13 | To describe the predefined models, we assume that a neural network has some set 14 | of parameters :math:`\theta`. In the feedforward pass, the network computes some 15 | function of an input vector :math:`x \in \mathbb{R}^n` using these parameters; 16 | we represent this feedforward function using the notation :math:`y = 17 | F_\theta(x)`. 18 | 19 | Autoencoder 20 | =========== 21 | 22 | An :class:`autoencoder ` takes an array of 23 | :math:`m` arbitrary data vectors :math:`X \in \mathbb{R}^{m \times n}` as input, 24 | transforms it in some way, and then attempts to recreate the original input as 25 | the output of the network. 26 | 27 | To evaluate the loss for an autoencoder, only the input data is required. The 28 | default autoencoder model computes the loss using the mean squared error between 29 | the network's output and the input: 30 | 31 | .. math:: 32 | \mathcal{L}(X, \theta) = \frac{1}{mn} \sum_{i=1}^m \left\| 33 | F_\theta(x_i) - x_i \right\|_2^2 + R(X, \theta) 34 | 35 | Autoencoders simply try to adjust their model parameters :math:`\theta` to 36 | minimize this squared error between the true inputs and the values that the 37 | network produces. 38 | 39 | In theory this could be trivial---if, for example, :math:`F_\theta(x) = x`---but 40 | in practice this doesn't actually happen very often. In addition, a 41 | :ref:`regularizer ` :math:`R(X, \theta)` 42 | can be added to the overall loss for the model to prevent this sort of trivial 43 | solution. 44 | 45 | To create an autoencoder in ``theanets``, just create an instance of the 46 | appropriate network subclass: 47 | 48 | .. code:: python 49 | 50 | net = theanets.Autoencoder() 51 | 52 | Of course you'll also need to specify which types of layers you'd like in your 53 | model; this is discussed in :ref:`guide-creating-specifying-layers`. 54 | 55 | Regression 56 | ========== 57 | 58 | A :class:`regression ` model is much like an 59 | autoencoder. Like an autoencoder, a regression model takes as input an array of 60 | arbitrary data :math:`X \in \mathbb{R}^{m \times n}`. However, at training time, 61 | a regression model also requires an array of expected target outputs :math:`Y 62 | \in \mathbb{R}^{m \times o}`. Like an autoencoder, the error between the 63 | network's output and the target is computed using the mean squared error: 64 | 65 | .. math:: 66 | \mathcal{L}(X, Y, \theta) = \frac{1}{mn} \sum_{i=1}^m \left\| 67 | F_\theta(x_i) - y_i \right\|_2^2 + R(X, \theta) 68 | 69 | The difference here is that instead of trying to produce the input, the 70 | regression model is trying to match the target output. 71 | 72 | To create a regression model in theanets, just invoke the constructor: 73 | 74 | .. code:: python 75 | 76 | net = theanets.Regressor() 77 | 78 | Again, you'll need to specify which types of layers you'd like in your model; 79 | this is discussed in :ref:`guide-creating-specifying-layers`. 80 | 81 | Classification 82 | ============== 83 | 84 | A :class:`classification ` model takes as input 85 | some piece of data that you want to classify (e.g., the pixels of an image, word 86 | counts from a document, etc.) and outputs a probability distribution over 87 | available labels. 88 | 89 | At training time, this type of model requires an array of input data :math:`X 90 | \in \mathbb{R}^{m \times n}` and a corresponding set of integer labels :math:`Y 91 | \in \{1,\dots,k\}^m`. The error is then computed as the cross-entropy between 92 | the network output and the true target labels: 93 | 94 | .. math:: 95 | \mathcal{L}(X, Y, \theta) = -\frac{1}{m} \sum_{i=1}^m \sum_{j=1}^k 96 | \delta_{j,y_i} \log F_\theta(x_i)_j + R(X, \theta) 97 | 98 | where :math:`\delta{a,b}` is the Kronecker delta, which is 1 if :math:`a=b` and 99 | 0 otherwise. 100 | 101 | To create a classifier model in ``theanets``, invoke its constructor: 102 | 103 | .. code:: python 104 | 105 | net = theanets.Classifier() 106 | 107 | As with the other models, you'll need to specify which types of layers you'd 108 | like in your model; this is discussed in 109 | :ref:`guide-creating-specifying-layers`. 110 | 111 | Recurrent Models 112 | ================ 113 | 114 | The three predefined models described above also exist in recurrent 115 | formulations. In recurrent networks, time is an explicit part of the model. In 116 | ``theanets``, if you wish to include recurrent layers in your model, you must 117 | use a model class from the :mod:`theanets.recurrent` module; this is because 118 | recurrent models require input and output data matrices with an additional 119 | dimension to represent time. In general, 120 | 121 | - the data shapes required for a recurrent layer are all one 122 | dimension larger than the corresponding shapes for a feedforward network, 123 | 124 | - the extra dimension represents time, and 125 | 126 | - the extra dimension is located on: 127 | 128 | - the first (0) axis in ``theanets`` versions through 0.6, or 129 | - the second (1) axis in ``theanets`` versions 0.7 and up. 130 | 131 | .. warning:: 132 | 133 | Starting with release 0.7.0 of ``theanets``, recurrent models have changed 134 | the expected axis ordering for data arrays! The axis ordering before version 135 | 0.7.0 was ``(time, batch, variables)``, and the axis ordering starting in the 136 | 0.7.0 release is ``(batch, time, variables)``. 137 | 138 | The new ordering is more consistent with other models in ``theanets``. 139 | Starting in the 0.7 release, the first axis (index 0) of data arrays for all 140 | model types represents the examples in a batch, and the last axis (index -1) 141 | represents the input variables. For recurrent models, the axis in the middle 142 | of a batch (index 1) represents time. 143 | 144 | .. note:: 145 | 146 | In recurrent models, the batch size is currently required to be greater than 147 | one. If you wish to run a recurrent model on a single sample, just create a 148 | batch with two copies of the same sample. 149 | 150 | Autoencoding 151 | ------------ 152 | 153 | A :class:`recurrent autoencoder `, just like its 154 | feedforward counterpart, takes as input a single array of data :math:`X \in 155 | \mathbb{R}^{m \times t \times n}` and attempts to recreate the same data at the 156 | output, under a squared-error loss. 157 | 158 | To create a model of this type, just invoke its constructor: 159 | 160 | .. code:: python 161 | 162 | net = theanets.recurrent.Autoencoder() 163 | 164 | Regression 165 | ---------- 166 | 167 | A :class:`recurrent regression ` model is also 168 | just like its feedforward counterpart. It requires two inputs at training time: 169 | an array of input data :math:`X \in \mathbb{R}^{m \times t \times n}` and a 170 | corresponding array of output data :math:`Y \in \mathbb{R}^{m \times t \times 171 | o}`. Like the feedforward regression models, the recurrent version attempts to 172 | produce the target outputs under a squared-error loss. 173 | 174 | To create a model of this type, just invoke its constructor: 175 | 176 | .. code:: python 177 | 178 | net = theanets.recurrent.Regressor() 179 | 180 | Classification 181 | -------------- 182 | 183 | A :class:`recurrent classification ` model is 184 | like a feedforward classifier in that it takes as input some piece of data that 185 | you want to classify (e.g., the pixels of an image, word counts from a document, 186 | etc.) and outputs a probability distribution over available labels. Computing 187 | the error for this type of model requires an input dataset :math:`X \in 188 | \mathbb{R}^{m \times t \times n}` and a corresponding set of integer labels 189 | :math:`Y \in \mathbb{Z}^{t \times m}`; the error is then computed as the 190 | cross-entropy between the network output and the target labels. 191 | 192 | To create a model of this type, just invoke its constructor: 193 | 194 | .. code:: python 195 | 196 | net = theanets.recurrent.Classifier() 197 | 198 | .. _models-custom: 199 | 200 | Custom Models 201 | ============= 202 | 203 | To create a custom model, just define a new subclass of :class:`theanets.Network 204 | `. 205 | 206 | For instance, the :class:`feedforward autoencoder 207 | ` model is defined basically like this: 208 | 209 | .. code:: python 210 | 211 | class Autoencoder(theanets.Network): 212 | def __init__(self, layers=(), loss='mse', weighted=False): 213 | super(Autoencoder, self).__init__( 214 | layers=layers, loss=loss, weighted=weighted) 215 | 216 | Essentially this model just defines a default loss on top of the functionality 217 | in :class:`theanets.Network ` for creating and managing 218 | layers and loss functions, training the model, making predictions, and so on. 219 | 220 | By defining a custom model class, you can also implement whatever helper 221 | functionality you think will be useful for your task. With the programming power 222 | of Python, the sky's the limit! 223 | -------------------------------------------------------------------------------- /docs/api/reference.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Reference 3 | ========= 4 | 5 | .. automodule:: theanets 6 | :no-members: 7 | :no-inherited-members: 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | theanets.activations.Activation 13 | theanets.activations.LGrelu 14 | theanets.activations.Maxout 15 | theanets.activations.Prelu 16 | theanets.activations.build 17 | theanets.feedforward.Autoencoder 18 | theanets.feedforward.Classifier 19 | theanets.feedforward.Regressor 20 | theanets.graph.Network 21 | theanets.layers.base.Concatenate 22 | theanets.layers.base.Flatten 23 | theanets.layers.base.Input 24 | theanets.layers.base.Layer 25 | theanets.layers.base.Product 26 | theanets.layers.base.Reshape 27 | theanets.layers.convolution.Conv1 28 | theanets.layers.feedforward.Classifier 29 | theanets.layers.feedforward.Feedforward 30 | theanets.layers.feedforward.Tied 31 | theanets.layers.recurrent.Bidirectional 32 | theanets.layers.recurrent.Clockwork 33 | theanets.layers.recurrent.GRU 34 | theanets.layers.recurrent.LSTM 35 | theanets.layers.recurrent.MRNN 36 | theanets.layers.recurrent.MUT1 37 | theanets.layers.recurrent.RNN 38 | theanets.layers.recurrent.RRNN 39 | theanets.layers.recurrent.SCRN 40 | theanets.losses.CrossEntropy 41 | theanets.losses.GaussianLogLikelihood 42 | theanets.losses.Hinge 43 | theanets.losses.KullbackLeiblerDivergence 44 | theanets.losses.Loss 45 | theanets.losses.MaximumMeanDiscrepancy 46 | theanets.losses.MeanAbsoluteError 47 | theanets.losses.MeanSquaredError 48 | theanets.recurrent.Autoencoder 49 | theanets.recurrent.Classifier 50 | theanets.recurrent.Regressor 51 | theanets.recurrent.Text 52 | theanets.recurrent.batches 53 | theanets.regularizers.BernoulliDropout 54 | theanets.regularizers.Contractive 55 | theanets.regularizers.GaussianNoise 56 | theanets.regularizers.HiddenL1 57 | theanets.regularizers.Regularizer 58 | theanets.regularizers.RecurrentNorm 59 | theanets.regularizers.RecurrentState 60 | theanets.regularizers.WeightL1 61 | theanets.regularizers.WeightL2 62 | theanets.trainer.DownhillTrainer 63 | theanets.trainer.SampleTrainer 64 | theanets.trainer.SupervisedPretrainer 65 | theanets.trainer.UnsupervisedPretrainer 66 | -------------------------------------------------------------------------------- /docs/api/regularizers.rst: -------------------------------------------------------------------------------- 1 | .. _regularizers: 2 | 3 | ============ 4 | Regularizers 5 | ============ 6 | 7 | The goal of training a neural network model is to minimize the loss function by 8 | making adjustments to the model parameters. In most practical applications, the 9 | loss is not known a priori, but an estimate of it is computed using a set of 10 | data (the "training data") that has been gathered from the problem being 11 | modeled. 12 | 13 | If a model has many parameters compared with the size of the training dataset, 14 | then many machine learning models exhibit a phenomenon called *overfitting*: the 15 | model may learn to predict the training data with no measurable error, but then 16 | if it is applied to a new dataset, it makes lots of mistakes. In such a case, 17 | the model has essentially memorized the training data at the cost of not being 18 | able to *generalize* to new and unseen, yet similar, datasets. The risk of 19 | overfitting usually increases with the size of the model and decreases with the 20 | size of the training dataset. 21 | 22 | A heuristic that can prevent models from overfitting on small datasets is based 23 | on the observation that "good" parameter values in most models are typically 24 | small: large parameter values often indicate overfitting. 25 | 26 | One way to encourage a model to use small parameter values is to assume that the 27 | parameter values are sampled from some prior distribution, rather than assuming 28 | that all parameter values in the model are equally likely. In this way of 29 | thinking about parameters, we can manipulate the prior distribution of the 30 | parameter values to express our knowledge as modelers of the problem at hand. 31 | 32 | In ``theanets``, regularization hyperparameters are provided when you train your 33 | model: 34 | 35 | .. code:: python 36 | 37 | net = theanets.Classifier(layers=[784, 1000, 784]) 38 | net.train(..., hidden_l1=0.1) 39 | 40 | Here we've specified that our model has a single, overcomplete hidden layer, and 41 | then when we train it, we specify that the activity of the hidden units in the 42 | network will be penalized with a 0.1 coefficient. The rest of this section 43 | details the built-in regularizers that are available in ``theanets``. 44 | 45 | Decay 46 | ===== 47 | 48 | Using "weight decay," we assume that parameters in a model are drawn from a 49 | zero-mean Gaussian distribution with an isotropic, modeler-specified standard 50 | deviation. In terms of loss functions, this equates to adding a term to the loss 51 | function that computes the :math:`L_2` norm of the parameter values in the 52 | model: 53 | 54 | .. math:: 55 | \mathcal{L}(\cdot) = \dots + \lambda \| \theta \|_2^2 56 | 57 | If the loss :math:`\mathcal{L}(\cdot)` represents some approximation to the 58 | log-posterior distribution of the model parameters given the data 59 | 60 | .. math:: 61 | \mathcal{L}(\cdot) = \log p(\theta|x) \propto \dots + \lambda \| \theta \|_2^2 62 | 63 | then the term with the :math:`L_2` norm on the parameters is like an unscaled 64 | Gaussian distribution. 65 | 66 | This type of regularization is specified using the ``weight_l2`` keyword 67 | argument during training: 68 | 69 | .. code:: python 70 | 71 | net.train(..., weight_l2=1e-4) 72 | 73 | The value of the argument is the strength of the regularizer in the loss for the 74 | model. Larger values create more pressure for small model weights. 75 | 76 | Sparsity 77 | ======== 78 | 79 | Sparse models have been shown to capture regularities seen in the mammalian 80 | visual cortex. In addition, sparse models in machine learning are often more 81 | performant than "dense" models (i.e., models without restriction on the hidden 82 | representation). Furthermore, sparse models tend to yield latent representations 83 | that are easier for humans to interpret than dense models. 84 | 85 | There are two main types of sparsity regularizers provided with ``theanets``: 86 | parameter sparsity and representation sparsity. 87 | 88 | The first type of sparse regularizer is just like weight decay, but instead of 89 | assuming that weights are drawn from a Gaussian distribution, here we assume 90 | that weights in the model are drawn from a distribution with a taller peak at 91 | zero and heavier tails, like a Laplace distribution. In terms of loss function, 92 | this regularizer adds a term with an :math:`L_1` norm to the model: 93 | 94 | .. math:: 95 | \mathcal{L}(\cdot) = \dots + \lambda \| \theta \|_1 96 | 97 | If the loss :math:`\mathcal{L}(\cdot)` represents some approximation to the 98 | log-posterior distribution of the model parameters given the data 99 | 100 | .. math:: 101 | \mathcal{L}(\cdot) = \log p(\theta|x) \propto \dots + \lambda \| \theta \|_1 102 | 103 | then this term is like an unscaled Laplace distribution. In practice, this 104 | regularizer encourages many of the model *parameters* to be zero. 105 | 106 | In ``theanets``, this sparse parameter regularization is specified using the 107 | ``weight_l1`` keyword argument during training: 108 | 109 | .. code:: python 110 | 111 | net.train(..., weight_l1=1e-4) 112 | 113 | The value of the argument is the strength of the regularizer in the loss for the 114 | model. The larger the regularization parameter, the more pressure for 115 | zero-valued weights. 116 | 117 | The second type of sparsity regularization puts pressure on the model to develop 118 | hidden *representations* that are mostly zero-valued. In this type of 119 | regularization, the model weights are penalized indirectly, since the hidden 120 | representation (i.e., the values of the hidden layer neurons in the network) are 121 | functions of both the model weights and the input data. In terms of loss 122 | functions, this regularizer adds a term to the loss that penalizes the 123 | :math:`L_1` norm of the hidden layer activations 124 | 125 | .. math:: 126 | \mathcal{L}(\cdot) = \dots + \lambda \sum_{i=2}^{N-1} \| f_i(x) \|_1 127 | 128 | where :math:`f_i(x)` represents the neuron activations of hidden layer 129 | :math:`i`. 130 | 131 | Sparse hidden activations have shown much promise in computational neural 132 | networks. In ``theanets`` this type of regularization is specified using the 133 | ``hidden_l1`` keyword argument during training: 134 | 135 | .. code:: python 136 | 137 | net.train(..., hidden_l1=0.1) 138 | 139 | The value of the argument is the strength of the regularizer in the loss for the 140 | model. Large values create more pressure for hidden representations that use 141 | mostly zeros. 142 | 143 | Noise 144 | ===== 145 | 146 | Another way of regularizing a model to prevent overfitting is to inject noise 147 | into the data or the representations during training. While noise could always 148 | be injected into the training batches manually, ``theanets`` provides two types 149 | of noise regularizers: additive Gaussian noise and multiplicative dropout 150 | (binary) noise. 151 | 152 | In one method, zero-mean Gaussian noise is added to the input data or hidden 153 | representations. These are specified during training using the ``input_noise`` 154 | and ``hidden_noise`` keyword arguments, respectively: 155 | 156 | .. code:: python 157 | 158 | net.train(..., input_noise=0.1) 159 | net.train(..., hidden_noise=0.1) 160 | 161 | The value of the argument specifies the standard deviation of the noise. 162 | 163 | In the other input regularization method, some of the inputs are randomly set to 164 | zero during training (this is sometimes called "dropout" or "multiplicative 165 | masking noise"). This type of noise is specified using the ``input_dropout`` and 166 | ``hidden_dropout`` keyword arguments, respectively: 167 | 168 | .. code:: python 169 | 170 | net.train(..., input_dropout=0.3) 171 | net.train(..., hidden_dropout=0.3) 172 | 173 | The value of the argument specifies the fraction of values in each input or 174 | hidden activation that are randomly set to zero. 175 | 176 | Instead of adding additional terms like the other regularizers, the noise 177 | regularizers can be seen as modifying the original loss for a model. For 178 | instance, consider an autoencoder model with two hidden layers: 179 | 180 | .. code:: python 181 | 182 | net = theanets.Autoencoder([ 183 | 100, 184 | dict(size=50, name='a'), 185 | dict(size=80, name='b'), 186 | dict(size=100, name='o')]) 187 | 188 | The loss for this model, without regularization, can be written as: 189 | 190 | .. math:: 191 | \mathcal{L}(X, \theta_a, \theta_b, \theta_o) = \frac{1}{mn} \sum_{i=1}^m \left\| 192 | \sigma_b(\sigma_a(x_i\theta_a)\theta_b)\theta_o - x_i \right\|_2^2 193 | 194 | where we've ignored the bias terms, and :math:`\theta_a`, :math:`\theta_b`, and 195 | :math:`\theta_o` are the parameters for layers a, b, and o, respectively. Also, 196 | :math:`\sigma_a` and :math:`\sigma_b` are the activation functions for their 197 | respective hidden layers. 198 | 199 | If we train this model using input and hidden noise: 200 | 201 | .. code:: python 202 | 203 | net.train(..., input_noise=q, hidden_noise=r) 204 | 205 | then the loss becomes: 206 | 207 | .. math:: 208 | \mathcal{L}(X, \theta_a, \theta_b, \theta_o) = \frac{1}{mn} \sum_{i=1}^m \left\| 209 | \left( \sigma_b\left( 210 | (\sigma_a((x_i+\epsilon_q)\theta_a)+\epsilon_r)\theta_b \right) + 211 | \epsilon_r \right)\theta_o - x_i \right\|_2^2 212 | 213 | where :math:`\epsilon_q` is white Gaussian noise drawn from 214 | :math:`\mathcal{N}(0, qI)` and :math:`\epsilon_r` is white Gaussian noise drawn 215 | separately for each hidden layer from :math:`\mathcal{N}(0, rI)`. The additive 216 | noise pushes the data and the representations off of their respective manifolds, 217 | but the loss is computed with respect to the uncorrupted input. This is thought 218 | to encourage the model to develop representations that push towards the true 219 | manifold of the data. 220 | 221 | Predefined Regularizers 222 | ======================= 223 | 224 | .. automodule:: theanets.regularizers 225 | :no-members: 226 | :no-inherited-members: 227 | 228 | .. autosummary:: 229 | :toctree: generated/ 230 | 231 | Regularizer 232 | HiddenL1 233 | WeightL1 234 | WeightL2 235 | Contractive 236 | RecurrentNorm 237 | RecurrentState 238 | BernoulliDropout 239 | GaussianNoise 240 | 241 | .. _regularizers-custom: 242 | 243 | Custom Regularizers 244 | =================== 245 | 246 | To create a custom regularizer in ``theanets``, you need to create a custom 247 | subclass of the :class:`theanets.Regularizer 248 | ` class, and then provide this regularizer 249 | when you run your model. 250 | 251 | To illustrate, let's suppose you created a linear autoencoder model that had a 252 | larger hidden layer than your dataset: 253 | 254 | .. code:: python 255 | 256 | net = theanets.Autoencoder([4, (8, 'linear'), (4, 'tied')]) 257 | 258 | Then, at least in theory, you risk learning an uninteresting "identity" model 259 | such that some hidden units are never used, and the ones that are have weights 260 | equal to the identity matrix. To prevent this from happening, you can impose a 261 | sparsity penalty when you train your model: 262 | 263 | .. code:: python 264 | 265 | net.train(..., hidden_l1=0.001) 266 | 267 | But then you might run into a situation where the sparsity penalty drives some 268 | of the hidden units in the model to zero, to "save" loss during training. 269 | Zero-valued features are probably not so interesting, so we can introduce 270 | another penalty to prevent feature weights from going to zero: 271 | 272 | .. code:: python 273 | 274 | class WeightInverse(theanets.Regularizer): 275 | def loss(self, layers, outputs): 276 | return sum((1 / (p * p).sum(axis=0)).sum() 277 | for l in layers for p in l.params 278 | if p.ndim == 2) 279 | 280 | net = theanets.Autoencoder([4, (8, 'linear'), (4, 'tied')]) 281 | net.train(..., hidden_l1=0.001, weightinverse=0.001) 282 | 283 | This code adds a new regularizer that penalizes the inverse of the squared 284 | length of each of the weights in the model's layers. Here we detect weights by 285 | only including parameters with 2 dimensions. 286 | -------------------------------------------------------------------------------- /docs/api/trainers.rst: -------------------------------------------------------------------------------- 1 | .. _trainers: 2 | 3 | ======== 4 | Trainers 5 | ======== 6 | 7 | The most common method for training a neural network model is to use a 8 | stochastic gradient-based optimizer. In ``theanets`` many of these algorithms 9 | are available by interfacing with the ``downhill`` package: 10 | 11 | - ``sgd``: `Stochastic gradient descent`_ 12 | - ``nag``: `Nesterov's accelerated gradient`_ 13 | - ``rprop``: `Resilient backpropagation`_ 14 | - ``rmsprop``: RMSProp_ 15 | - ``adadelta``: ADADELTA_ 16 | - ``esgd``: `Equilibrated SGD`_ 17 | - ``adam``: Adam_ 18 | 19 | .. _Stochastic gradient descent: http://downhill.readthedocs.org/en/stable/generated/downhill.first_order.SGD.html 20 | .. _Nesterov's accelerated gradient: http://downhill.readthedocs.org/en/stable/generated/downhill.first_order.NAG.html 21 | .. _Resilient backpropagation: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.RProp.html 22 | .. _RMSProp: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.RMSProp.html 23 | .. _ADADELTA: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.ADADELTA.html 24 | .. _Equilibrated SGD: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.ESGD.html 25 | .. _Adam: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.Adam.html 26 | 27 | In addition to the optimization algorithms provided by ``downhill``, 28 | ``theanets`` defines a few algorithms that are more specific to neural networks. 29 | These trainers tend to take advantage of the layered structure of the loss 30 | function for a network. 31 | 32 | - ``sample``: :class:`Sample trainer ` 33 | 34 | This trainer sets model parameters directly to samples drawn from the training 35 | data. This is a very fast "training" algorithm since all updates take place at 36 | once; however, often features derived directly from the training data require 37 | further tuning to perform well. 38 | 39 | - ``layerwise``: :class:`Layerwise (supervised) pretrainer ` 40 | 41 | Greedy supervised layerwise pre-training: This trainer applies RMSProp to each 42 | layer sequentially. 43 | 44 | - ``pretrain``: :class:`Unsupervised pretrainer ` 45 | 46 | Greedy unsupervised layerwise pre-training: This trainer applies RMSProp to a 47 | tied-weights "shadow" autoencoder using an unlabeled dataset, and then transfers 48 | the learned autoencoder weights to the model being trained. 49 | -------------------------------------------------------------------------------- /docs/api/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utilities: 2 | 3 | ========= 4 | Utilities 5 | ========= 6 | 7 | Recurrent helpers 8 | ================= 9 | 10 | .. automodule:: theanets.recurrent 11 | :no-members: 12 | :no-inherited-members: 13 | 14 | .. autosummary:: 15 | :toctree: generated/ 16 | 17 | batches 18 | Text 19 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import better 5 | 6 | extensions = [ 7 | 'sphinx.ext.autodoc', 8 | 'sphinx.ext.autosummary', 9 | 'sphinx.ext.intersphinx', 10 | 'sphinx.ext.mathjax', 11 | 'sphinx.ext.viewcode', 12 | 'numpydoc', 13 | ] 14 | autosummary_generate = True 15 | autodoc_default_flags = ['members'] 16 | numpydoc_show_class_members = False 17 | numpydoc_show_inherited_class_members = True 18 | source_suffix = '.rst' 19 | source_encoding = 'utf-8-sig' 20 | master_doc = 'index' 21 | project = u'Theanets' 22 | copyright = u'2015, Leif Johnson' 23 | version = '0.8' 24 | release = '0.8.0pre' 25 | exclude_patterns = ['_build'] 26 | templates_path = ['_templates'] 27 | pygments_style = 'tango' 28 | 29 | html_theme = 'better' 30 | html_theme_path = [better.better_theme_path] 31 | html_theme_options = dict( 32 | rightsidebar=False, 33 | inlinecss='', 34 | cssfiles=['_static/style-tweaks.css'], 35 | showheader=True, 36 | showrelbartop=True, 37 | showrelbarbottom=True, 38 | linktotheme=True, 39 | sidebarwidth='15rem', 40 | textcolor='#111', 41 | headtextcolor='#333', 42 | footertextcolor='#333', 43 | ga_ua='', 44 | ga_domain='', 45 | ) 46 | html_short_title = 'Home' 47 | html_static_path = ['_static'] 48 | 49 | 50 | def h(xs): 51 | return ['{}.html'.format(x) for x in xs.split()] 52 | html_sidebars = { 53 | 'index': h('gitwidgets globaltoc sourcelink searchbox'), 54 | '**': h('gitwidgets localtoc sourcelink searchbox'), 55 | } 56 | 57 | intersphinx_mapping = { 58 | 'python': ('https://docs.python.org/3.4/', None), 59 | 'downhill': ('http://downhill.readthedocs.org/en/stable/', None), 60 | 'numpy': ('http://docs.scipy.org/doc/numpy/', None), 61 | 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), 62 | } 63 | -------------------------------------------------------------------------------- /docs/examples/mnist-classifier.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | Classifying MNIST Digits 3 | ======================== 4 | 5 | A standard benchmark for neural network :class:`classification 6 | ` is the `MNIST digits dataset 7 | `_, a set of 70,000 28×28 images of 8 | hand-written digits. Each MNIST digit is labeled with the correct digit class 9 | (0, 1, ... 9). This example shows how to use ``theanets`` to create and train a 10 | model that can perform this task. 11 | 12 | .. image:: ../_static/mnist-digits-small.png 13 | 14 | Networks for classification map a layer of continuous-valued inputs, through one 15 | or more hidden layers, to an output layer that is activated through the `softmax 16 | function`_. The softmax generates output that can be treated as a categorical 17 | distribution over the digit labels given the input image. 18 | 19 | .. _softmax function: http://en.wikipedia.org/wiki/Softmax_function 20 | 21 | Defining the model 22 | ------------------ 23 | 24 | Now that you know which model to use for this task, you'll need to define some 25 | hyperparameters that determine the structure of your network. The most important 26 | of these is choosing a set of layer sizes that you want in your model. 27 | 28 | The first ("input") and last ("output") layers in your network must match the 29 | size of the data you'll be providing. For an MNIST classification task, this 30 | means your network must have 784 inputs (one for each image pixel) and 10 31 | outputs (one for each class). 32 | 33 | Between the input and output layers, on the other hand, can be any number of 34 | so-called "hidden" layers, in almost any configuration. Models with more than 35 | about two hidden layers are commonly called "deep" models and have been quite 36 | popular recently due to their success on a variety of difficult machine learning 37 | problems. For now, though, to keep things simple, let's start out with a model 38 | that just has one hidden layer with 100 units. 39 | 40 | Once you've chosen the layers you want in your model, you typically pass the 41 | layers to the model constructor:: 42 | 43 | net = theanets.Classifier(layers=[784, 100, 10]) 44 | 45 | This is all that's required to get started. There are many different 46 | hyperparameters that can also be useful when constructing a model; see 47 | :ref:`guide-creating` for more information. Particularly useful to know will be 48 | the different ways of creating layers; see 49 | :ref:`guide-creating-specifying-layers` for details. 50 | 51 | Preparing the data 52 | ------------------ 53 | 54 | In ``theanets``, the parameters of a model are initialized randomly. To improve 55 | the model's performance on the task, you'll need to train the model parameters. 56 | This training process requires a dataset to compute gradient and loss function 57 | values. 58 | 59 | In the case of the MNIST digits, our classifier model will consume a dataset 60 | consisting of two parts---"samples" (image pixels) and corresponding "labels" 61 | (integer class values). Each of these parts is provided as a ``numpy`` array: 62 | the samples are a two-dimensional array, with vectorized MNIST pixels arranged 63 | along the first axis and pixel data arranged along the second axis; the labels 64 | are a one-dimensional array, with one integer value per MNIST image. 65 | 66 | For easy access to the MNIST digits dataset, we'll use the ``skdata`` package 67 | and write a little bit of glue code to get the data into the desired format:: 68 | 69 | def load_mnist(): 70 | mnist = skdata.mnist.dataset.MNIST() 71 | mnist.meta # trigger download if needed. 72 | def arr(n, dtype): 73 | # convert an array to the proper shape and dtype 74 | arr = mnist.arrays[n] 75 | return arr.reshape((len(arr), -1)).astype(dtype) 76 | train_images = arr('train_images', 'f') / 255. 77 | train_labels = arr('train_labels', np.uint8) 78 | test_images = arr('test_images', 'f') / 255. 79 | test_labels = arr('test_labels', np.uint8) 80 | return ((train_images[:50000], train_labels[:50000, 0]), 81 | (train_images[50000:], train_labels[50000:, 0]), 82 | (test_images, test_labels[:, 0])) 83 | 84 | Here we've rescaled the image data so that each pixel lies in the interval [0, 85 | 1] instead of the default [0, 255]. (In general, it's a good idea to standardize 86 | the data for your problem so that each dimension has approximately the same 87 | scale.) We've also reshaped the data as described above. 88 | 89 | .. note:: 90 | 91 | Because ``theanets`` uses Theano for its computations, most datasets need to 92 | be cast to a value that is compatible with your setting for 93 | `Theano's "floatX" configuration parameter`_. Unless you have a really 94 | expensive GPU, this is likely to mean that you need to use 32-bit floats. 95 | 96 | .. _Theano's "floatX" configuration parameter: http://deeplearning.net/software/theano/library/config.html#config.floatX 97 | 98 | The load function returns a training split (the first 50000 examples), a 99 | validation split (the remainder of the training data from ``skdata``, containing 100 | 10000 examples), and a test split (the test split from ``skdata``, containing 101 | 10000 examples). The training dataset is used to compute parameter updates, and 102 | the validation dataset is used to determine when the model has stopped 103 | improving during training. 104 | 105 | There are other ways to provide data to your model during training; for a more 106 | complete description, see :ref:`guide-training-providing-data`. 107 | 108 | Training the model 109 | ------------------ 110 | 111 | Now that you have a model and some data, you're ready to train the model so that 112 | it performs the classification task as well as possible. Models are set up to 113 | handle training with fairly little work. 114 | 115 | The main decision to make during training is to choose the training algorithm to 116 | use, along with values for any associated hyperparameters. This is most 117 | naturally accomplished using the :func:`Network.train() 118 | ` method:: 119 | 120 | train, valid, test = load_mnist() 121 | 122 | net.train(train, 123 | valid, 124 | algo='nag', 125 | learning_rate=1e-3, 126 | momentum=0.9) 127 | 128 | The first positional argument to this method is the training dataset, and the 129 | second (if provided) is a validation dataset. If a validation dataset is not 130 | provided, the training dataset will be used for validation. 131 | 132 | The ``algo`` keyword argument specifies an algorithm to use for training. If you 133 | do not provide a value for this argument, :class:`RMSProp 134 | ` is currently used as the default training 135 | algorithm. Any subsequent keyword arguments will be passed to the training 136 | algorithm; these arguments typically specify hyperparameters of the algorithm 137 | like the learning rate and so forth. 138 | 139 | The available training methods are described in :ref:`trainers`; here we've 140 | specified Nesterov's Accelerated Gradient, a type of stochastic gradient descent 141 | with momentum. 142 | 143 | Visualizing features 144 | -------------------- 145 | 146 | Once you've trained a classification model for MNIST digits, it can be 147 | informative to visually inspect the features that the model has learned. Because 148 | the model was trained using the MNIST digits, you can reshape the learned 149 | features and visualize them as though they were 28×28 images:: 150 | 151 | img = np.zeros((28 * 10, 28 * 10), dtype='f') 152 | for i, pix in enumerate(net.find('hid1', 'w').get_value().T): 153 | r, c = divmod(i, 10) 154 | img[r * 28:(r+1) * 28, c * 28:(c+1) * 28] = pix.reshape((28, 28)) 155 | plt.imshow(img, cmap=plt.cm.gray) 156 | plt.show() 157 | 158 | In this example, the weights in layer 1 connect the inputs to the first hidden 159 | layer; these weights have one column of 784 values for each hidden node in the 160 | network, so we can iterate over the transpose and put each column---properly 161 | reshaped---into a giant image. 162 | 163 | The trained model can also be used to predict the class for a new MNIST digit:: 164 | 165 | predicted_class = net.predict(new_digit) 166 | 167 | For more information on the things you can do with a model, see 168 | :ref:`guide-using`. 169 | -------------------------------------------------------------------------------- /docs/examples/recurrent-memory.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | Remembering Network Inputs 3 | ========================== 4 | 5 | Recurrent neural networks are a family of network models whose computation graph 6 | contains a cycle---that is, there are some layers in a recurrent network whose 7 | outputs at a certain time step depend not only on the inputs at that time step, 8 | but also on the state of the network at some previous time step as well. 9 | 10 | Recurrent networks, while often quite tricky to train, can be used to solve 11 | difficult modeling tasks. Thanks to recent advances in optimization algorithms, 12 | recurrent networks are enjoying a resurgence in popularity and have been shown 13 | to be quite effective at a number of different temporal modeling tasks. 14 | 15 | In this section we consider a classic task for a recurrent network: remembering 16 | data from past inputs. In this task, a network model receives one input value at 17 | each time step. The network is to remember the first :math:`k` values, then wait 18 | for :math:`t` time steps, and then reproduce the first :math:`k` values that it 19 | saw. Effectively the model must ignore the inputs after time step :math:`k` and 20 | start producing the desired output at time step :math:`k + t`. 21 | 22 | Defining the model 23 | ================== 24 | 25 | We'll set up a recurrent model by creating a :class:`recurrent regression 26 | ` instance:: 27 | 28 | net = theanets.recurrent.Regressor(layers=[1, ('lstm', 10), 1]) 29 | 30 | Our network has three layers: the first just has one input unit, the next is a 31 | Long Short-Term Memory (LSTM) recurrent layer with ten units, and the output is 32 | a linear layer with just one output unit. This is just one way of specifying 33 | layers in a network; for more details see 34 | :ref:`guide-creating-specifying-layers`. 35 | 36 | Training the model 37 | ================== 38 | 39 | The most difficult part of training this model is creating the required data. To 40 | compute the loss for a recurrent regression model in ``theanets``, we need to 41 | provide two arrays of data---one input array, and one target output array. Each 42 | of these arrays must have three dimensions: the first is time, the second is the 43 | batch size, and the third is the number of inputs/outputs in the dataset. 44 | 45 | For the memory task, we can easily create random arrays with the appropriate 46 | shape. We just need to make sure that the last :math:`k` time steps of the 47 | output are set to the first :math:`k` time steps of the input:: 48 | 49 | T = 20 50 | K = 3 51 | BATCH_SIZE = 32 52 | 53 | def generate(): 54 | s, t = np.random.randn(2, T, BATCH_SIZE, 1).astype('f') 55 | s[:K] = t[-K:] = np.random.randn(K, BATCH_SIZE, 1) 56 | return [s, t] 57 | 58 | In ``theanets``, data can be provided to a trainer in several ways; here we've 59 | used a callable that generates batches of data for us. See 60 | :ref:`guide-training-providing-data` for more information. 61 | 62 | Having set up a way to create training data, we just need to pass this along to 63 | our training algorithm:: 64 | 65 | net.train(generate, algo='rmsprop') 66 | 67 | This process will adjust the weights in the model so that the outputs of the 68 | model, given the inputs, will be closer and closer to the targets that we 69 | provide. 70 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | THEANETS 3 | ======== 4 | 5 | The ``theanets`` package is a deep learning and neural network toolkit. It is 6 | written in Python to interoperate with excellent tools like ``numpy`` and 7 | ``scikit-learn``, and it uses Theano_ to accelerate computations when possible 8 | using your GPU. The package aims to provide: 9 | 10 | - a simple API for building and training common types of neural network models; 11 | - thorough documentation; 12 | - easy-to-read code; 13 | - and, under the hood, a fully expressive graph computation framework. 14 | 15 | The package strives to "make the easy things easy and the difficult things 16 | possible." Please try it out, and let us know what you think! 17 | 18 | The source code for ``theanets`` lives at http://github.com/lmjohns3/theanets, 19 | the documentation lives at http://theanets.readthedocs.org, and announcements 20 | and discussion happen on the `mailing list`_. 21 | 22 | .. _Theano: http://deeplearning.net/software/theano/ 23 | .. _mailing list: https://groups.google.com/forum/#!forum/theanets 24 | 25 | Quick Start: Classification 26 | =========================== 27 | 28 | Suppose you want to create a classifier and train it on some 100-dimensional 29 | data points that you've classified into 10 categories. No problem! With just a 30 | few lines you can (a) provide some data, (b) build and (c) train a model, 31 | and (d) evaluate the model:: 32 | 33 | import theanets 34 | from sklearn.datasets import make_classification 35 | from sklearn.metrics import confusion_matrix 36 | 37 | # Create a classification dataset. 38 | X, y = make_classification( 39 | n_samples=3000, n_features=100, n_classes=10, n_informative=10) 40 | X = X.astype('f') 41 | y = y.astype('i') 42 | cut = int(len(X) * 0.8) # training / validation split 43 | train = X[:cut], y[:cut] 44 | valid = X[cut:], y[cut:] 45 | 46 | # Build a classifier model with 100 inputs and 10 outputs. 47 | net = theanets.Classifier([100, 10]) 48 | 49 | # Train the model using SGD with momentum. 50 | net.train(train, valid, algo='sgd', learning_rate=1e-4, momentum=0.9) 51 | 52 | # Show confusion matrices on the training/validation splits. 53 | for label, (X, y) in (('training:', train), ('validation:', valid)): 54 | print(label) 55 | print(confusion_matrix(y, net.predict(X))) 56 | 57 | Layers 58 | ------ 59 | 60 | The model above is quite simplistic! Make it a bit more sophisticated by adding 61 | a hidden layer:: 62 | 63 | net = theanets.Classifier([100, 1000, 10]) 64 | 65 | In fact, you can just as easily create 3 (or any number of) hidden layers:: 66 | 67 | net = theanets.Classifier([ 68 | 100, 1000, 1000, 1000, 10]) 69 | 70 | By default, hidden layers use the relu transfer function. By passing a tuple 71 | instead of just an integer, you can change some of these layers to use different 72 | :mod:`activations `:: 73 | 74 | maxout = (1000, 'maxout:4') # maxout with 4 pieces. 75 | net = theanets.Classifier([ 76 | 100, 1000, maxout, (1000, 'tanh'), 10]) 77 | 78 | By passing a dictionary instead, you can specify even more attributes of each 79 | :mod:`layer `, like how its parameters are initialized:: 80 | 81 | # Sparsely-initialized layer with large nonzero weights. 82 | foo = dict(name='foo', size=1000, std=1, sparsity=0.9) 83 | net = theanets.Classifier([ 84 | 100, foo, (1000, 'maxout:4'), (1000, 'tanh'), 10]) 85 | 86 | Specifying layers is the heart of building models in ``theanets``. Read more 87 | about this in :ref:`guide-creating-specifying-layers`. 88 | 89 | Regularization 90 | -------------- 91 | 92 | Adding regularizers is easy, too! Just pass them to the training method. For 93 | instance, you can train up a sparse classification model with weight decay:: 94 | 95 | # Penalize hidden-unit activity (L1 norm) and weights (L2 norm). 96 | net.train(train, valid, hidden_l1=0.001, weight_l2=0.001) 97 | 98 | In ``theanets`` dropout is treated as a regularizer and can be set on many 99 | layers at once:: 100 | 101 | net.train(train, valid, hidden_dropout=0.5) 102 | 103 | or just on a specific layer:: 104 | 105 | net.train(train, valid, dropout={'foo:out': 0.5}) 106 | 107 | Similarly, you can add Gaussian noise to any of the layers (here, just to the 108 | input layer):: 109 | 110 | net.train(train, valid, input_noise=0.3) 111 | 112 | Optimization Algorithms 113 | ----------------------- 114 | 115 | You can optimize your model using any of the algorithms provided by downhill_ 116 | (SGD, NAG, RMSProp, ADADELTA, etc.), or additionally using a couple of 117 | :mod:`pretraining methods ` specific to neural networks. 118 | 119 | .. _downhill: http://downhill.readthedocs.org/ 120 | .. _pretraining methods: http://theanets.readthedocs.org/en/latest/reference.html#module-theanets.trainer 121 | 122 | You can also make as many successive calls to :func:`train() 123 | ` as you like. Each call can include different 124 | training algorithms:: 125 | 126 | net.train(train, valid, algo='rmsprop') 127 | net.train(train, valid, algo='nag') 128 | 129 | different learning hyperparameters:: 130 | 131 | net.train(train, valid, algo='rmsprop', learning_rate=0.1) 132 | net.train(train, valid, algo='rmsprop', learning_rate=0.01) 133 | 134 | and different regularization hyperparameters:: 135 | 136 | net.train(train, valid, input_noise=0.7) 137 | net.train(train, valid, input_noise=0.3) 138 | 139 | Training models is a bit more art than science, but ``theanets`` tries to make 140 | it easy to evaluate different training approaches. Read more about this in 141 | :ref:`guide-training`. 142 | 143 | Quick Start: Recurrent Models 144 | ============================= 145 | 146 | Recurrent neural networks are becoming quite important for many sequence-based 147 | tasks in machine learning; one popular toy example for recurrent models is to 148 | generate text that's similar to some body of training text. 149 | 150 | In these models, a recurrent classifier is set up to predict the identity of the 151 | next character in a sequence of text, given all of the preceding characters. The 152 | inputs to the model are the one-hot encodings of a sequence of characters from 153 | the text, and the corresponding outputs are the class labels of the subsequent 154 | character. The ``theanets`` code has a :class:`Text ` 155 | helper class that provides easy encoding and decoding of text to and from 156 | integer classes; using the helper makes the top-level code look like:: 157 | 158 | import numpy as np, re, theanets 159 | 160 | chars = re.sub(r'\s+', ' ', open('corpus.txt').read().lower()) 161 | txt = theanets.recurrent.Text(chars, min_count=10) 162 | A = 1 + len(txt.alpha) # of letter classes 163 | 164 | # create a model to train: input -> gru -> relu -> softmax. 165 | net = theanets.recurrent.Classifier([ 166 | A, (100, 'gru'), (1000, 'relu'), A]) 167 | 168 | # train the model iteratively; draw a sample after every epoch. 169 | seed = txt.encode(txt.text[300017:300050]) 170 | for tm, _ in net.itertrain(txt.classifier_batches(100, 32), momentum=0.9): 171 | print('{}|{} ({:.1f}%)'.format( 172 | txt.decode(seed), 173 | txt.decode(net.predict_sequence(seed, 40)), 174 | 100 * tm['acc'])) 175 | 176 | This example uses several features of ``theanets`` that make modeling neural 177 | networks fun and interesting. The model uses a layer of :class:`Gated Recurrent 178 | Units ` to capture the temporal dependencies in 179 | the data. It also `uses a callable`_ to provide data to the model, and takes 180 | advantage of `iterative training`_ to sample an output from the model after each 181 | training epoch. 182 | 183 | .. _uses a callable: http://downhill.readthedocs.org/en/stable/guide.html#data-using-callables 184 | .. _iterative training: http://downhill.readthedocs.org/en/stable/guide.html#iterative-optimization 185 | 186 | To run this example, download a text you'd like to model (e.g., Herman 187 | Melville's *Moby Dick*) and save it in ``corpus.txt``:: 188 | 189 | curl http://www.gutenberg.org/cache/epub/2701/pg2701.txt > corpus.txt 190 | 191 | Then when you run the script, the output might look something like this 192 | (abbreviated to show patterns):: 193 | 194 | used for light, but only as an oi|pr vgti ki nliiariiets-a, o t.;to niy , (16.6%) 195 | used for light, but only as an oi|s bafsvim-te i"eg nadg tiaraiatlrekls tv (20.2%) 196 | used for light, but only as an oi|vetr uob bsyeatit is-ad. agtat girirole, (28.5%) 197 | used for light, but only as an oi|siy thinle wonl'th, in the begme sr"hey (29.9%) 198 | used for light, but only as an oi|nr. bonthe the tuout honils ohe thib th (30.5%) 199 | used for light, but only as an oi|kg that mand sons an, of,rtopit bale thu (31.0%) 200 | used for light, but only as an oi|nsm blasc yan, ang theate thor wille han (32.1%) 201 | used for light, but only as an oi|b thea mevind, int amat ars sif istuad p (33.3%) 202 | used for light, but only as an oi|msenge bie therale hing, aik asmeatked s (34.1%) 203 | used for light, but only as an oi|ge," rrermondy ghe e comasnig that urle (35.5%) 204 | used for light, but only as an oi|s or thartich comase surt thant seaiceng (36.1%) 205 | used for light, but only as an oi|s lot fircennor, unding dald bots trre i (37.1%) 206 | used for light, but only as an oi|st onderass noptand. "peles, suiondes is (38.2%) 207 | used for light, but only as an oi|gnith. s. lited, anca! stobbease so las, (39.3%) 208 | used for light, but only as an oi|chics fleet dong berieribus armor has or (40.1%) 209 | used for light, but only as an oi|cs and quirbout detom tis glome dold pco (41.1%) 210 | used for light, but only as an oi|nht shome wand, the your at movernife lo (42.0%) 211 | used for light, but only as an oi|r a reald hind the, with of the from sti (43.0%) 212 | used for light, but only as an oi|t beftect. how shapellatgen the fortower (44.0%) 213 | used for light, but only as an oi|rtucated fanns dountetter from fom to wi (45.2%) 214 | used for light, but only as an oi|r the sea priised tay queequings hearhou (46.8%) 215 | used for light, but only as an oi|ld, wode, i long ben! but the gentived. (48.0%) 216 | used for light, but only as an oi|r wide-no nate was him. "a king to had o (49.1%) 217 | used for light, but only as an oi|l erol min't defositanable paring our. 4 (50.0%) 218 | used for light, but only as an oi|l the motion ahab, too, and relay in aha (51.0%) 219 | used for light, but only as an oi|n dago, and contantly used the coil; but (52.3%) 220 | used for light, but only as an oi|l starbuckably happoss of the fullies ti (52.4%) 221 | used for light, but only as an oi|led-bubble most disinuan into the mate-- (53.3%) 222 | used for light, but only as an oi|len. ye?' 'tis though moby starbuck, and (53.6%) 223 | used for light, but only as an oi|l, and the pequodeers. but was all this: (53.9%) 224 | used for light, but only as an oi|ling his first repore to the pequod, sym (54.4%) 225 | used for light, but only as an oi|led escried; we they like potants--old s (54.3%) 226 | used for light, but only as an oi|l-ginqueg! i save started her supplain h (54.3%) 227 | used for light, but only as an oi|l is, the captain all this mildly bounde (54.9%) 228 | 229 | Here, the seed text is shown left of the pipe character, and the randomly 230 | sampled sequence follows. In parantheses are the per-character accuracy values 231 | on the training set while training the model. The pattern of learning proceeds 232 | from almost-random character generation, to producing groups of letters 233 | separated by spaces, to generating words that seem like they might belong in 234 | *Moby Dick*, things like "captain," "ahab, too," and "constantly used the coil." 235 | 236 | Much amusement can be derived from a temporal model extending itself forward in 237 | this way. After all, how else would we ever think of "Pequodeers," 238 | "Starbuckably," or "Ginqueg"?! 239 | 240 | User Guide 241 | ========== 242 | 243 | .. toctree:: 244 | :maxdepth: 2 245 | 246 | guide 247 | 248 | Examples 249 | ======== 250 | 251 | .. toctree:: 252 | :maxdepth: 2 253 | :glob: 254 | 255 | examples/* 256 | 257 | API Documentation 258 | ================= 259 | 260 | .. toctree:: 261 | :maxdepth: 2 262 | :glob: 263 | 264 | api/models 265 | api/layers 266 | api/activations 267 | api/losses 268 | api/regularizers 269 | api/trainers 270 | api/utils 271 | 272 | .. toctree:: 273 | :hidden: 274 | 275 | api/reference 276 | 277 | Indices and tables 278 | ================== 279 | 280 | - :ref:`genindex` 281 | - :ref:`modindex` 282 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\theanets.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\theanets.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpydoc 2 | sphinx-better-theme 3 | -------------------------------------------------------------------------------- /examples/cifar-autoencoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import click 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import theanets 7 | 8 | from utils import load_cifar, plot_layers, plot_images 9 | 10 | K = 655 # this value of K retains 99% of the variance in the cifar images. 11 | 12 | def pca(dataset): 13 | mean = dataset[:3000].mean(axis=0) 14 | 15 | theanets.log('computing whitening transform') 16 | x = dataset[:3000] - mean 17 | vals, vecs = np.linalg.eigh(np.dot(x.T, x) / len(x)) 18 | vals = vals[::-1] 19 | vecs = vecs[:, ::-1] 20 | 21 | vals = np.sqrt(vals[:K]) 22 | vecs = vecs[:, :K] 23 | 24 | def whiten(x): 25 | return np.dot(x, np.dot(vecs, np.diag(1. / vals))) 26 | 27 | def color(z): 28 | return np.dot(z, np.dot(np.diag(vals), vecs.T)) 29 | 30 | return whiten, color 31 | 32 | 33 | @click.command() 34 | @click.option('--features', default=None, type=int, metavar='N', 35 | help='Train a model with NxN hidden features.') 36 | def main(features): 37 | train, valid, _ = load_cifar() 38 | 39 | whiten, color = pca(train[0]) 40 | 41 | feat = features or int(np.sqrt(2 * K)) 42 | n = theanets.Autoencoder([K, feat ** 2, K]) 43 | n.train(whiten(train), whiten(valid), input_noise=1, train_batches=313) 44 | 45 | plot_layers([ 46 | color(n.find('hid1', 'w').get_value().T).T, 47 | color(n.find('out', 'w').get_value())], channels=3) 48 | plt.tight_layout() 49 | plt.show() 50 | 51 | valid = whiten(valid[:100]) 52 | plot_images(color(valid), 121, 'Sample data', channels=3) 53 | plot_images(color(n.predict(valid)), 122, 54 | 'Reconstructed data', channels=3) 55 | plt.tight_layout() 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /examples/lstm-chime.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | '''Theanets example using a deep bidirectional LSTM for phoneme classification. 4 | 5 | This example loads an audio classification benchmark from github, defines a 6 | callable for extracting batches from the downloaded dataset, and trains a deep 7 | classifier network on the data. The network that is evaluated as part of the 8 | benchmarks is a three-layer bidirectional LSTM. Typically the model exceeds 90% 9 | accuracy on the training set, but reaches only about 70% accuracy on the 10 | validation set. Clearly overtraining is a critical issue here. 11 | 12 | This example only works with Python 2 at the moment. 13 | ''' 14 | 15 | import io 16 | import numpy as np 17 | import theanets 18 | import scipy.io 19 | import os 20 | import tempfile 21 | import urllib 22 | import zipfile 23 | 24 | BATCH_SIZE = 32 25 | TRAIN_NC = os.path.join(tempfile.gettempdir(), 'chime1_train.nc') 26 | VALID_NC = os.path.join(tempfile.gettempdir(), 'chime1_valid.nc') 27 | ZIPURL = 'https://github.com/craffel/lstm_benchmarks/archive/master.zip' 28 | 29 | # If needed, get the data files from https://github.com/craffel/lstm_benchmarks. 30 | if not os.path.isfile(TRAIN_NC) or not os.path.isfile(VALID_NC): 31 | theanets.log('attempting data copy from url: {}', ZIPURL) 32 | z = zipfile.ZipFile(io.BytesIO(urllib.urlopen(ZIPURL).read())) 33 | with open(TRAIN_NC, 'wb') as savefile: 34 | savefile.write(z.read('lstm_benchmarks-master/data/train_1_speaker.nc')) 35 | with open(VALID_NC, 'wb') as savefile: 36 | savefile.write(z.read('lstm_benchmarks-master/data/val_1_speaker.nc')) 37 | z.close() 38 | 39 | 40 | def batch_at(features, labels, seq_begins, seq_lengths): 41 | '''Extract a single batch of data to pass to the model being trained. 42 | 43 | Parameters 44 | ---------- 45 | features, labels : ndarray 46 | Arrays of the input features and target labels. 47 | seq_begins : ndarray 48 | Array of the start offsets of the speech segments to include. 49 | seq_lengths : ndarray 50 | Array of the lengths of the speech segments to include in the batch. 51 | 52 | Returns 53 | ------- 54 | features, labels, mask : ndarrays 55 | A triple of arrays for training a network. The first element contains 56 | input features, the second contains target labels, and the third 57 | contains a "mask" consisting of ones where there is valid data and zeros 58 | everywhere else. 59 | ''' 60 | length = seq_lengths.max() 61 | feat = np.zeros((BATCH_SIZE, length, features.shape[-1]), 'f') 62 | labl = np.zeros((BATCH_SIZE, length), 'i') 63 | mask = np.zeros((BATCH_SIZE, length), 'f') 64 | for b, (begin, length) in enumerate(zip(seq_begins, seq_lengths)): 65 | feat[b, :length] = features[begin:begin+length] 66 | labl[b, :length] = labels[begin:begin+length] 67 | mask[b, :length] = 1 68 | return [feat, labl, mask] 69 | 70 | 71 | def batches(dataset): 72 | '''Returns a callable that chooses sequences from netcdf data.''' 73 | seq_lengths = dataset.variables['seqLengths'].data 74 | seq_begins = np.concatenate(([0], np.cumsum(seq_lengths)[:-1])) 75 | 76 | def sample(): 77 | chosen = np.random.choice( 78 | list(range(len(seq_lengths))), BATCH_SIZE, replace=False) 79 | return batch_at(dataset.variables['inputs'].data, 80 | dataset.variables['targetClasses'].data, 81 | seq_begins[chosen], 82 | seq_lengths[chosen]) 83 | 84 | return sample 85 | 86 | 87 | # Now that we can load data, we construct a recurrent classifier model and then 88 | # train it up! Training progress will be displayed on the console. This example 89 | # can take a good while to run, especially the first time it is run (it takes 90 | # about 20min to compile the model from scratch, but only a few minutes if all 91 | # of the compiler targets are cached). 92 | 93 | def layer(n): 94 | '''Helper for building a bidirectional LSTM layer with n cells.''' 95 | return dict(form='bidirectional', worker='lstm', size=n) 96 | 97 | n = theanets.recurrent.Classifier( 98 | layers=(39, layer(156), layer(300), layer(102), (51, 'softmax')), 99 | weighted=True, 100 | ) 101 | 102 | n.train( 103 | batches(scipy.io.netcdf_file(TRAIN_NC)), 104 | batches(scipy.io.netcdf_file(VALID_NC)), 105 | algo='rmsprop', 106 | learning_rate=0.0001, 107 | momentum=0.9, 108 | max_gradient_clip=1, 109 | input_noise=0.6, 110 | train_batches=30, 111 | valid_batches=3, 112 | batch_size=BATCH_SIZE, 113 | ) 114 | -------------------------------------------------------------------------------- /examples/mnist-autoencoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | '''Single-layer autoencoder example using MNIST digit data. 4 | 5 | This example shows one way to train a single-layer autoencoder model using the 6 | handwritten MNIST digits. 7 | 8 | This example also shows the use of command-line arguments. 9 | ''' 10 | 11 | import click 12 | import matplotlib.pyplot as plt 13 | import theanets 14 | 15 | from utils import load_mnist, plot_layers, plot_images 16 | 17 | @click.command() 18 | @click.option('--features', default=16, type=int, metavar='N', 19 | help='Train a model with NxN hidden features.') 20 | def main(features): 21 | # load up the MNIST digit dataset. 22 | train, valid, _ = load_mnist() 23 | 24 | net = theanets.Autoencoder([784, features ** 2, 784]) 25 | net.train(train, valid, 26 | train_batches=100, 27 | input_noise=0.1, 28 | weight_l2=0.0001, 29 | algo='rmsprop', 30 | momentum=0.9, 31 | min_improvement=0.1) 32 | 33 | plot_layers([net.find('hid1', 'w'), net.find('out', 'w')]) 34 | plt.tight_layout() 35 | plt.show() 36 | 37 | v = valid[0][:100] 38 | plot_images(v, 121, 'Sample data') 39 | plot_images(net.predict(v), 122, 'Reconstructed data') 40 | plt.tight_layout() 41 | plt.show() 42 | 43 | 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /examples/mnist-classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import theanets 5 | 6 | from utils import load_mnist, plot_layers 7 | 8 | 9 | train, valid, _ = load_mnist(labels=True) 10 | 11 | N = 10 12 | 13 | net = theanets.Classifier([784, N * N, ('softmax', 10)]) 14 | net.train(train, valid, min_improvement=0.001, train_batches=100) 15 | 16 | plot_layers([net.find('hid1', 'w'), net.find('out', 'w')]) 17 | plt.tight_layout() 18 | plt.show() 19 | -------------------------------------------------------------------------------- /examples/mnist-convolution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import theanets 5 | 6 | from utils import load_mnist, plot_filters 7 | 8 | 9 | SHAPE = (28, 28, 1) 10 | 11 | train, valid, _ = load_mnist(labels=True) 12 | 13 | X, y = train 14 | train = X.reshape((-1, ) + SHAPE), y 15 | 16 | X, y = valid 17 | valid = X.reshape((-1, ) + SHAPE), y 18 | 19 | net = theanets.convolution.Classifier([ 20 | SHAPE, dict(form='conv2', size=100, filter_size=(14, 14)), 'flatten', 10]) 21 | net.train(train, valid, train_batches=100, valid_batches=100) 22 | 23 | plot_filters(net.find('hid1', 'w')) 24 | plt.tight_layout() 25 | plt.show() 26 | -------------------------------------------------------------------------------- /examples/mnist-deep-autoencoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import theanets 5 | 6 | from utils import load_mnist, plot_layers, plot_images 7 | 8 | 9 | train, valid, _ = load_mnist() 10 | 11 | net = theanets.Autoencoder( 12 | layers=(784, 256, 100, 64, ('tied', 100), ('tied', 256), ('tied', 784)), 13 | ) 14 | net.train(train, valid, 15 | algo='layerwise', 16 | patience=1, 17 | min_improvement=0.05, 18 | train_batches=100) 19 | net.train(train, valid, min_improvment=0.01, train_batches=100) 20 | 21 | plot_layers([net.find(i, 'w') for i in (1, 2, 3)], tied_weights=True) 22 | plt.tight_layout() 23 | plt.show() 24 | 25 | valid = valid[0][:100] 26 | plot_images(valid, 121, 'Sample data') 27 | plot_images(net.predict(valid), 122, 'Reconstructed data') 28 | plt.tight_layout() 29 | plt.show() 30 | -------------------------------------------------------------------------------- /examples/mnist-deep-classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import theanets 5 | 6 | from utils import load_mnist, plot_layers, plot_images 7 | 8 | 9 | net = theanets.Classifier( 10 | layers=(784, 1024, 256, 64, ('softmax', 10)), 11 | ) 12 | 13 | # first, run an unsupervised layerwise pretrainer. 14 | train, valid, _ = load_mnist() 15 | net.train(train, valid, 16 | algo='pretrain', 17 | patience=1, 18 | min_improvement=0.1, 19 | train_batches=100) 20 | 21 | # second, run a supervised trainer on the classifier model. 22 | train, valid, _ = load_mnist(labels=True) 23 | net.train(train, valid, min_improvement=0.01, train_batches=100) 24 | 25 | plot_layers([net.find(i, 'w') for i in (1, 2, 3)]) 26 | plt.tight_layout() 27 | plt.show() 28 | -------------------------------------------------------------------------------- /examples/mnist-rica.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import theanets 6 | 7 | from utils import load_mnist, plot_layers, plot_images 8 | 9 | 10 | class WeightInverse(theanets.Regularizer): 11 | def loss(self, layers, outputs): 12 | return sum((1 / (w * w).sum(axis=0)).sum() 13 | for l in layers for w in l.params 14 | if w.ndim > 1) 15 | 16 | 17 | (train, ), (valid, ), _ = load_mnist() 18 | 19 | # mean-center the digits and compute a pca whitening transform. 20 | 21 | m = train.mean(axis=0) 22 | train -= m 23 | valid -= m 24 | 25 | theanets.log('computing whitening transform') 26 | vals, vecs = np.linalg.eigh(np.dot(train.T, train) / len(train)) 27 | vals = vals[::-1] 28 | vecs = vecs[:, ::-1] 29 | 30 | K = 197 # this retains 99% of the variance in the digit data. 31 | vals = np.sqrt(vals[:K]) 32 | vecs = vecs[:, :K] 33 | 34 | 35 | def whiten(x): 36 | return np.dot(x, np.dot(vecs, np.diag(1. / vals))) 37 | 38 | 39 | def color(z): 40 | return np.dot(z, np.dot(np.diag(vals), vecs.T)) 41 | 42 | # now train our model on the whitened dataset. 43 | 44 | N = 20 45 | 46 | net = theanets.Autoencoder([K, (N * N, 'linear'), (K, 'tied')]) 47 | 48 | net.train(whiten(train), 49 | whiten(valid), 50 | hidden_l1=0.5, 51 | weightinverse=1e-6, 52 | train_batches=300, 53 | monitors={'hid1:out': (-0.9, -0.1, 0.1, 0.9)}) 54 | 55 | # color the network weights so they are viewable as digits. 56 | plot_layers([color(net.find('hid1', 'w').get_value().T).T], tied_weights=True) 57 | plt.tight_layout() 58 | plt.show() 59 | 60 | plot_images(valid[:N*N], 121, 'Sample data') 61 | plot_images(color(net.predict(whiten(valid[:N*N]))), 122, 'Reconstructed data') 62 | plt.tight_layout() 63 | plt.show() 64 | -------------------------------------------------------------------------------- /examples/recurrent-addition.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import numpy.random as rng 6 | import theanets 7 | 8 | BATCH_SIZE = 32 9 | STEPS = 20 10 | 11 | weight = np.zeros((STEPS, BATCH_SIZE, 1), 'f') 12 | weight[-1:] = 1 13 | 14 | 15 | def examples(): 16 | x, z = rng.uniform(0, 1, size=(2, STEPS, BATCH_SIZE, 1)) 17 | y = np.zeros((STEPS, BATCH_SIZE, 1)) 18 | idx = list(range(STEPS - 1)) 19 | for b in range(BATCH_SIZE): 20 | rng.shuffle(idx) 21 | y[idx[0], b] = 1 22 | y[idx[1], b] = 1 23 | z[-1, b] = x[idx[0], b] + x[idx[1], b] 24 | return np.concatenate([x, y], axis=2).astype('f'), z.astype('f'), weight 25 | 26 | src, tgt, wgt = examples() 27 | theanets.log('data batches: {} -> {} @ {}', src.shape, tgt.shape, wgt.shape) 28 | 29 | e = theanets.Experiment( 30 | theanets.recurrent.Regressor, 31 | layers=(2, dict(form='rnn', activation='relu', size=100, radius=1), 1), 32 | weighted=True) 33 | e.train(examples) 34 | prd = e.network.transform(src) 35 | -------------------------------------------------------------------------------- /examples/recurrent-autoencoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy.random as rng 4 | import theanets 5 | 6 | TIME = 10 7 | BATCH_SIZE = 32 8 | 9 | e = theanets.Experiment( 10 | theanets.recurrent.Autoencoder, 11 | layers=(3, ('rnn', 10), 3), 12 | batch_size=BATCH_SIZE) 13 | 14 | 15 | def generate(): 16 | return [rng.randn(TIME, BATCH_SIZE, 3).astype('f')] 17 | 18 | batch = generate() 19 | theanets.log('data batches: {}', batch[0].shape) 20 | 21 | e.train(generate) 22 | -------------------------------------------------------------------------------- /examples/recurrent-memory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | '''This example examines recurrent performance in a memory task. 4 | 5 | In the memory task, a network is supposed to read in T frames of n-dimensional 6 | data and reproduce the first t frames at the output of the network, after 7 | reading in T - t additional frames of n-dimensional data. 8 | 9 | This task is quite difficult for most neural network models, since the hidden 10 | layer in the network must effectively store the first inputs somewhere, preserve 11 | those values for an arbitrary amount of time (while also discarding any new 12 | inputs), and then reproduce the stored inputs in the proper order. 13 | 14 | This example uses a vanilla RNN to perform this task, but the network 15 | configuration can easily be changed to test the performance of different 16 | layer types (or even multilayer architectures). 17 | ''' 18 | 19 | import matplotlib.pyplot as plt 20 | import numpy as np 21 | import theanets 22 | 23 | TIME = 10 # Total numer of time steps. 24 | BITS = 3 # Number of steps to remember/reproduce. 25 | BATCH_SIZE = 32 26 | 27 | 28 | # Here we create a mask that will be used to weight the target outputs for the 29 | # network. These weights are zero everywhere except for the last BITS time 30 | # steps, which forces the network to do anything it can to reproduce the input 31 | # pattern at the end of the output. 32 | mask = np.ones((BATCH_SIZE, TIME, 1), bool) 33 | mask[:, :TIME - BITS - 1] = 0 34 | 35 | 36 | # We use a callable to generate a batch of random input data to present to our 37 | # network model. Each batch consists of a random input pattern, a random output 38 | # pattern whose final BITS elements correspond to the initial BITS elements of 39 | # the input, and the fixed weight mask from above. 40 | def generate(): 41 | s, t = np.random.randn(2, BATCH_SIZE, TIME, 1).astype('f') 42 | s[:, :BITS] = t[:, -BITS:] = np.random.randn(BATCH_SIZE, BITS, 1) 43 | return s, t, mask 44 | 45 | src, tgt, msk = generate() 46 | theanets.log('data batches: {} -> {} @ {}', src.shape, tgt.shape, msk.shape) 47 | 48 | 49 | # Create a new recurrent regression model and train it up. 50 | net = theanets.recurrent.Regressor( 51 | layers=(1, dict(form='rnn', activation='relu', size=10, diagonal=1), 1), 52 | weighted=True) 53 | 54 | net.train(generate, 55 | batch_size=BATCH_SIZE, 56 | algorithm='rmsprop', 57 | max_gradient_norm=1, 58 | learning_rate=0.001, 59 | momentum=0.9, 60 | monitor_gradients=True) 61 | 62 | 63 | # Now we plot the results. Our plot contains two rows. On the top row, a random 64 | # batch of input values are shown -- time is on the y-axis, and the examples are 65 | # laid out along the x-axis. On the bottom row, the outputs from the network 66 | # model are shown -- again, time and example are on the y- and x-axes, 67 | # respectively. 68 | def plot(n, z, label, rectangle): 69 | ax = plt.subplot(2, 1, n) 70 | ax.set_frame_on(False) 71 | for loc, spine in ax.spines.items(): 72 | spine.set_color('none') 73 | ax.imshow(z, cmap='gray', vmin=-vm, vmax=vm) 74 | ax.fill_between([-0.5, BATCH_SIZE - 0.5], 75 | rectangle - 0.5, 76 | rectangle + BITS - 0.5, 77 | lw=0, color='#17becf', alpha=0.3) 78 | ax.set_xticks([]) 79 | ax.set_yticks([]) 80 | if n == 2: 81 | ax.set_xlabel('Example') 82 | ax.set_ylabel(label) 83 | 84 | out = net.predict(src)[:, :, 0].T 85 | vm = max(abs(src[:, :BITS]).max(), abs(out[:, -BITS]).max()) 86 | 87 | plot(1, src[:, :, 0].T, 'Input', 0) 88 | plot(2, out, 'Output', TIME - BITS) 89 | 90 | plt.show() 91 | -------------------------------------------------------------------------------- /examples/recurrent-sinusoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | '''This example compares recurrent layer performance on a sine-generation task. 4 | 5 | The task is to generate a complex sine wave that is constructed as a 6 | superposition of a small set of pure frequencies. All networks are constructed 7 | with one input (which receives all zero values), one recurrent hidden layer, and 8 | one output (which is tasked with matching the target sine wave). Each model is 9 | trained and then its predicted output is plotted for easy visual comparison of 10 | the behavior of the different layer models. 11 | 12 | For this task, the clockwork RNN layer tends to perform the best of the layer 13 | models, even though the clockwork layer uses the simplest activation (linear) 14 | and has the fewest parameters (~2000 for a 64-node hidden layer, versus ~4000 15 | for a vanilla RNN and ~17000 for an LSTM). The vanilla RNN layer tends to do the 16 | worst, or at the least is the most sensitive to the initialization of the 17 | parameters. The other layer models fall somewhere in the middle but tend only to 18 | match the dominant frequency in the target wave. 19 | ''' 20 | 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import theanets 24 | 25 | COLORS = ['#d62728', '#1f77b4', '#2ca02c', '#9467bd', '#ff7f0e', 26 | '#e377c2', '#8c564b', '#bcbd22', '#7f7f7f', '#17becf'] 27 | 28 | BATCH_SIZE = 2 29 | 30 | 31 | # Construct a complex sine wave as a sum of pure-frequency waves. 32 | TAU = 2 * np.pi 33 | T = np.linspace(0, TAU, 256) 34 | SIN = sum(c * np.sin(TAU * f * T) for c, f in ((2, 1.5), (3, 1.8), (4, 1.1))) 35 | 36 | 37 | # Create an input dataset consisting of all zeros, and an output dataset 38 | # containing the target sine wave. We have to stack the target sine wave here 39 | # because recurrent models expect a tensor with three dimensions, and the batch 40 | # size for recurrent networks must be greater than 1. 41 | ZERO = np.zeros((BATCH_SIZE, len(T), 1), 'f') 42 | WAVES = np.concatenate([SIN[None, :, None]] * BATCH_SIZE, axis=0).astype('f') 43 | 44 | 45 | # Set up plotting axes to show the output result and learning curves. 46 | _, (wave_ax, learn_ax) = plt.subplots(2, 1) 47 | 48 | # Plot the target wave. 49 | wave_ax.plot(T, SIN, ':', label='Target', alpha=0.7, color='#111111') 50 | 51 | 52 | # For each layer type, train a model containing that layer, and plot its 53 | # predicted output. 54 | for i, layer in enumerate(( 55 | dict(form='rnn', activation='linear', diagonal=0.5), 56 | dict(form='rnn', activation='relu', diagonal=0.5), 57 | dict(form='rrnn', activation='relu', rate='vector', diagonal=0.5), 58 | dict(form='scrn', activation='elu'), 59 | dict(form='gru', activation='relu'), 60 | dict(form='lstm', activation='tanh'), 61 | dict(form='clockwork', activation='linear', periods=(1, 4, 16, 64)))): 62 | name = '{form}+{activation}'.format(**layer) 63 | layer['size'] = 64 64 | theanets.log('training {} model', name) 65 | net = theanets.recurrent.Regressor([1, layer, 1]) 66 | losses = [] 67 | for tm, _ in net.itertrain([ZERO, WAVES], 68 | monitor_gradients=True, 69 | batch_size=BATCH_SIZE, 70 | algorithm='rmsprop', 71 | learning_rate=0.0001, 72 | momentum=0.9, 73 | min_improvement=0.01): 74 | losses.append(tm['loss']) 75 | prd = net.predict(ZERO) 76 | wave_ax.plot(T, prd[0, :, 0].flatten(), label=name, alpha=0.7, color=COLORS[i]) 77 | learn_ax.plot(losses, label=name, alpha=0.7, color=COLORS[i]) 78 | 79 | 80 | # Make the plots look nice. 81 | for ax in [wave_ax, learn_ax]: 82 | ax.xaxis.tick_bottom() 83 | ax.yaxis.tick_left() 84 | ax.spines['top'].set_color('none') 85 | ax.spines['right'].set_color('none') 86 | ax.spines['bottom'].set_position(('outward', 6)) 87 | ax.spines['left'].set_position(('outward', 6)) 88 | 89 | wave_ax.set_ylabel('Amplitude') 90 | wave_ax.set_xlabel('Time') 91 | 92 | learn_ax.set_ylabel('Loss') 93 | learn_ax.set_xlabel('Training Epoch') 94 | learn_ax.grid(True) 95 | 96 | plt.legend() 97 | plt.show() 98 | -------------------------------------------------------------------------------- /examples/recurrent-text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import theanets 6 | 7 | import utils 8 | 9 | COLORS = ['#d62728', '#1f77b4', '#2ca02c', '#9467bd', '#ff7f0e', 10 | '#e377c2', '#8c564b', '#bcbd22', '#7f7f7f', '#17becf'] 11 | 12 | URL = 'http://www.gutenberg.org/cache/epub/2701/pg2701.txt' 13 | 14 | with open(utils.find('moby.txt', URL)) as handle: 15 | text = theanets.recurrent.Text(handle.read().lower().replace('\n', ' ')) 16 | 17 | seed = text.encode(text.text[200000:200010]) 18 | for i, layer in enumerate(( 19 | dict(form='rnn', activation='sigmoid', diagonal=0.99), 20 | dict(form='gru', activation='sigmoid'), 21 | dict(form='scrn', activation='sigmoid'), 22 | dict(form='bcrnn', activation='sigmoid', num_modules=5), 23 | dict(form='lstm'), 24 | dict(form='mrnn', activation='sigmoid', factors=len(text.alpha)), 25 | dict(form='clockwork', activation='sigmoid', periods=(1, 2, 4, 8, 16)))): 26 | losses = [] 27 | layer.update(size=100) 28 | net = theanets.recurrent.Classifier([ 29 | 1 + len(text.alpha), layer, 1000, 1 + len(text.alpha)]) 30 | for tm, _ in net.itertrain(text.classifier_batches(30, 16), 31 | min_improvement=0.99, 32 | validate_every=50, 33 | patience=0, 34 | algo='adam', 35 | max_gradient_norm=1, 36 | learning_rate=0.01): 37 | if np.isnan(tm['loss']): 38 | break 39 | print('{}|{} ({:.1f}%)'.format( 40 | text.decode(seed), 41 | text.decode(net.predict_sequence(seed, 30)), 42 | 100 * tm['acc'])) 43 | losses.append(tm['loss']) 44 | 45 | plt.plot(losses, label=layer['form'], alpha=0.7, color=COLORS[i]) 46 | 47 | plt.gca().xaxis.tick_bottom() 48 | plt.gca().yaxis.tick_left() 49 | plt.gca().spines['top'].set_color('none') 50 | plt.gca().spines['right'].set_color('none') 51 | plt.gca().spines['bottom'].set_position(('outward', 6)) 52 | plt.gca().spines['left'].set_position(('outward', 6)) 53 | 54 | plt.gca().set_ylabel('Loss') 55 | plt.gca().set_xlabel('Training Epoch') 56 | plt.gca().grid(True) 57 | 58 | plt.legend() 59 | plt.show() 60 | -------------------------------------------------------------------------------- /examples/utils.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import numpy as np 3 | import os 4 | import pickle 5 | import sys 6 | import tarfile 7 | import tempfile 8 | import urllib 9 | 10 | try: 11 | import matplotlib.pyplot as plt 12 | except ImportError: 13 | raise RuntimeError('please install matplotlib to run the examples!') 14 | 15 | DATASETS = os.path.join(tempfile.gettempdir(), 'theanets-datasets') 16 | 17 | 18 | def find(dataset, url): 19 | '''Find the location of a dataset on disk, downloading if needed.''' 20 | fn = os.path.join(DATASETS, dataset) 21 | dn = os.path.dirname(fn) 22 | if not os.path.exists(dn): 23 | print('creating dataset directory: %s', dn) 24 | os.makedirs(dn) 25 | if not os.path.exists(fn): 26 | if sys.version_info < (3, ): 27 | urllib.urlretrieve(url, fn) 28 | else: 29 | urllib.request.urlretrieve(url, fn) 30 | return fn 31 | 32 | 33 | def load_mnist(flatten=True, labels=False): 34 | '''Load the MNIST digits dataset.''' 35 | fn = find('mnist.pkl.gz', 'http://deeplearning.net/data/mnist/mnist.pkl.gz') 36 | h = gzip.open(fn, 'rb') 37 | if sys.version_info < (3, ): 38 | (timg, tlab), (vimg, vlab), (simg, slab) = pickle.load(h) 39 | else: 40 | (timg, tlab), (vimg, vlab), (simg, slab) = pickle.load(h, encoding='bytes') 41 | h.close() 42 | if not flatten: 43 | timg = timg.reshape((-1, 28, 28, 1)) 44 | vimg = vimg.reshape((-1, 28, 28, 1)) 45 | simg = simg.reshape((-1, 28, 28, 1)) 46 | if labels: 47 | return ((timg, tlab.astype('i')), 48 | (vimg, vlab.astype('i')), 49 | (simg, slab.astype('i'))) 50 | return (timg, ), (vimg, ), (simg, ) 51 | 52 | 53 | def load_cifar(flatten=True, labels=False): 54 | '''Load the CIFAR10 image dataset.''' 55 | def extract(name): 56 | print('extracting data from {}'.format(name)) 57 | h = tar.extractfile(name) 58 | if sys.version_info < (3, ): 59 | d = pickle.load(h) 60 | else: 61 | d = pickle.load(h, encoding='bytes') 62 | for k in list(d): 63 | d[k.decode('utf8')] = d[k] 64 | h.close() 65 | img = d['data'].reshape( 66 | (-1, 3, 32, 32)).transpose((0, 2, 3, 1)).astype('f') / 128 - 1 67 | if flatten: 68 | img = img.reshape((-1, 32 * 32 * 3)) 69 | d['data'] = img 70 | return d 71 | 72 | fn = find('cifar10.tar.gz', 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz') 73 | tar = tarfile.open(fn) 74 | 75 | imgs = [] 76 | labs = [] 77 | for i in range(1, 6): 78 | d = extract('cifar-10-batches-py/data_batch_{}'.format(i)) 79 | imgs.extend(d['data']) 80 | labs.extend(d['labels']) 81 | timg = np.asarray(imgs[:40000]) 82 | tlab = np.asarray(labs[:40000], 'i') 83 | vimg = np.asarray(imgs[40000:]) 84 | vlab = np.asarray(labs[40000:], 'i') 85 | 86 | d = extract('cifar-10-batches-py/test_batch') 87 | simg = d['data'] 88 | slab = d['labels'] 89 | 90 | tar.close() 91 | 92 | if labels: 93 | return (timg, tlab), (vimg, vlab), (simg, slab) 94 | return (timg, ), (vimg, ), (simg, ) 95 | 96 | 97 | def plot_images(imgs, loc, title=None, channels=1): 98 | '''Plot an array of images. 99 | 100 | We assume that we are given a matrix of data whose shape is (n*n, s*s*c) -- 101 | that is, there are n^2 images along the first axis of the array, and each 102 | image is c squares measuring s pixels on a side. Each row of the input will 103 | be plotted as a sub-region within a single image array containing an n x n 104 | grid of images. 105 | ''' 106 | n = int(np.sqrt(len(imgs))) 107 | assert n * n == len(imgs), 'images array must contain a square number of rows!' 108 | s = int(np.sqrt(len(imgs[0]) / channels)) 109 | assert s * s == len(imgs[0]) / channels, 'images must be square!' 110 | 111 | img = np.zeros(((s+1) * n - 1, (s+1) * n - 1, channels), dtype=imgs[0].dtype) 112 | for i, pix in enumerate(imgs): 113 | r, c = divmod(i, n) 114 | img[r * (s+1):(r+1) * (s+1) - 1, 115 | c * (s+1):(c+1) * (s+1) - 1] = pix.reshape((s, s, channels)) 116 | 117 | img -= img.min() 118 | img /= img.max() 119 | 120 | ax = plt.gcf().add_subplot(loc) 121 | ax.xaxis.set_visible(False) 122 | ax.yaxis.set_visible(False) 123 | ax.set_frame_on(False) 124 | ax.imshow(img.squeeze(), cmap=plt.cm.gray) 125 | if title: 126 | ax.set_title(title) 127 | 128 | 129 | def plot_layers(weights, tied_weights=False, channels=1): 130 | '''Create a plot of weights, visualized as "bottom-level" pixel arrays.''' 131 | if hasattr(weights[0], 'get_value'): 132 | weights = [w.get_value() for w in weights] 133 | k = min(len(weights), 9) 134 | imgs = np.eye(weights[0].shape[0]) 135 | for i, weight in enumerate(weights[:-1]): 136 | imgs = np.dot(weight.T, imgs) 137 | plot_images(imgs, 138 | 100 + 10 * k + i + 1, 139 | channels=channels, 140 | title='Layer {}'.format(i+1)) 141 | weight = weights[-1] 142 | n = weight.shape[1] / channels 143 | if int(np.sqrt(n)) ** 2 != n: 144 | return 145 | if tied_weights: 146 | imgs = np.dot(weight.T, imgs) 147 | plot_images(imgs, 148 | 100 + 10 * k + k, 149 | channels=channels, 150 | title='Layer {}'.format(k)) 151 | else: 152 | plot_images(weight, 153 | 100 + 10 * k + k, 154 | channels=channels, 155 | title='Decoding weights') 156 | 157 | 158 | def plot_filters(filters): 159 | '''Create a plot of conv filters, visualized as pixel arrays.''' 160 | imgs = filters.get_value() 161 | 162 | N, channels, x, y = imgs.shape 163 | n = int(np.sqrt(N)) 164 | assert n * n == N, 'filters must contain a square number of rows!' 165 | assert channels == 1 or channels == 3, 'can only plot grayscale or rgb filters!' 166 | 167 | img = np.zeros(((y+1) * n - 1, (x+1) * n - 1, channels), dtype=imgs[0].dtype) 168 | for i, pix in enumerate(imgs): 169 | r, c = divmod(i, n) 170 | img[r * (y+1):(r+1) * (y+1) - 1, 171 | c * (x+1):(c+1) * (x+1) - 1] = pix.transpose((1, 2, 0)) 172 | 173 | img -= img.min() 174 | img /= img.max() 175 | 176 | ax = plt.gcf().add_subplot(111) 177 | ax.xaxis.set_visible(False) 178 | ax.yaxis.set_visible(False) 179 | ax.set_frame_on(False) 180 | ax.imshow(img.squeeze(), cmap=plt.cm.gray) 181 | -------------------------------------------------------------------------------- /examples/weighted-classification.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn.datasets 3 | import sklearn.metrics 4 | import theanets 5 | 6 | samples, labels = sklearn.datasets.make_classification( 7 | n_samples=10000, 8 | n_features=100, 9 | n_informative=30, 10 | n_redundant=30, 11 | n_repeated=0, 12 | n_classes=2, 13 | n_clusters_per_class=3, 14 | weights=[0.99, 0.01], 15 | flip_y=0.01, 16 | ) 17 | 18 | weights = np.ones_like(labels) 19 | weights[labels.nonzero()] *= 10 20 | 21 | 22 | def split(a, b): 23 | return [samples[a:b].astype('float32'), 24 | labels[a:b].astype('int32'), 25 | weights[a:b].astype('float32')] 26 | 27 | train = split(0, 9000) 28 | valid = split(9000, 10000) 29 | 30 | net = theanets.Classifier( 31 | layers=(100, 10, 2), 32 | weighted=True, 33 | ) 34 | 35 | net.train(train, valid) 36 | 37 | truth = valid[1] 38 | theanets.log('# of true 1s: {}', truth.sum()) 39 | 40 | guess = net.predict(valid[0]) 41 | theanets.log('# of predicted 1s: {}', guess.sum()) 42 | 43 | cm = sklearn.metrics.confusion_matrix(truth, guess) 44 | theanets.log('confusion matrix (true class = rows, predicted class = cols):') 45 | theanets.log(str(cm)) 46 | -------------------------------------------------------------------------------- /examples/xor-classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | '''Example using the theanets package for learning the XOR relation.''' 5 | 6 | import numpy as np 7 | import theanets 8 | 9 | X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype='f') 10 | Y = np.array([[0], [1], [1], [0]], dtype='f') 11 | 12 | net = theanets.Regressor([dict(size=2, input_noise=0.3), 2, 1]) 13 | net.train([X, Y], algo='rmsprop', patience=10, batch_size=4) 14 | 15 | theanets.log('Input: {}', [list(x) for x in X]) 16 | theanets.log('XOR output: {}', Y.T) 17 | theanets.log('NN XOR predictions: {}', net.predict(X.astype('f')).T.round(2)) 18 | -------------------------------------------------------------------------------- /scripts/theanets-char-rnn: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import click 4 | import logging 5 | import numpy as np 6 | import theanets 7 | 8 | @click.command() 9 | @click.option('-d', '--data', multiple=True, metavar='FILE', 10 | help='load text from FILE') 11 | @click.option('-t', '--time', default=100, type=int, metavar='T', 12 | help='train on sequences of T characters') 13 | @click.option('-a', '--alphabet', default='', metavar='CHARS', 14 | help='use CHARS for alphabet; defaults to all chars in text') 15 | @click.option('-A', '--exclude-alphabet', default='', metavar='CHARS', 16 | help='discard CHARS from alphabet') 17 | @click.option('-l', '--layers', multiple=True, type=int, default=[100], metavar='N', 18 | help='construct a network with layers of size N1, N2, ...') 19 | @click.option('-L', '--layer-type', default='lstm', metavar='{rnn|gru|lstm|clockwork}', 20 | help='construct a network with this RNN layer type') 21 | @click.option('-g', '--activation', default='relu', metavar='FUNC', 22 | help='function for hidden unit activations') 23 | @click.option('-O', '--algorithm', default=['nag'], multiple=True, metavar='ALGO', 24 | help='train with the given optimization algorithm(s)') 25 | @click.option('-p', '--patience', type=int, default=4, metavar='N', 26 | help='stop training if less than --min-improvement for N validations') 27 | @click.option('-v', '--validate-every', type=int, default=10, metavar='N', 28 | help='validate the model every N updates') 29 | @click.option('-b', '--batch-size', type=int, default=64, metavar='N', 30 | help='train with mini-batches of size N') 31 | @click.option('-B', '--train-batches', type=int, metavar='N', 32 | help='use at most N batches during gradient computations') 33 | @click.option('-V', '--valid-batches', type=int, metavar='N', 34 | help='use at most N batches during validation') 35 | @click.option('-i', '--min-improvement', type=float, default=0, metavar='R', 36 | help='train until relative improvement is less than R') 37 | @click.option('-x', '--max-gradient-norm', type=float, default=1, metavar='V', 38 | help='clip gradient norm to the interval [0, V]') 39 | @click.option('-r', '--learning-rate', type=float, default=1e-4, metavar='V', 40 | help='train the network with a learning rate of V') 41 | @click.option('-m', '--momentum', type=float, default=0.9, metavar='V', 42 | help='train the network with momentum of V') 43 | @click.option('-n', '--nesterov/--no-nesterov', help='use Nesterov momentum') 44 | @click.option('-s', '--save-progress', metavar='FILE', 45 | help='save the model periodically to FILE') 46 | @click.option('-S', '--save-every', type=float, default=0, metavar='N', 47 | help='save the model every N iterations or -N minutes') 48 | @click.option('--input-noise', type=float, default=0, metavar='S', 49 | help='add noise to network inputs drawn from N(0, S)') 50 | @click.option('--input-dropouts', type=float, default=0, metavar='R', 51 | help='randomly set fraction R of input activations to 0') 52 | @click.option('--hidden-noise', type=float, default=0, metavar='S', 53 | help='add noise to hidden activations drawn from N(0, S)') 54 | @click.option('--hidden-dropouts', type=float, default=0, metavar='R', 55 | help='randomly set fraction R of hidden activations to 0') 56 | @click.option('--hidden-l1', type=float, default=0, metavar='K', 57 | help='regularize hidden activity with K on the L1 term') 58 | @click.option('--hidden-l2', type=float, default=0, metavar='K', 59 | help='regularize hidden activity with K on the L2 term') 60 | @click.option('--weight-l1', type=float, default=0, metavar='K', 61 | help='regularize network weights with K on the L1 term') 62 | @click.option('--weight-l2', type=float, default=0, metavar='K', 63 | help='regularize network weights with K on the L2 term') 64 | @click.option('--rms-halflife', type=float, default=5, metavar='N', 65 | help='use a half-life of N for RMS exponential moving averages') 66 | @click.option('--rms-regularizer', type=float, default=1e-8, metavar='N', 67 | help='regularize RMS exponential moving averages by N') 68 | def main(**kwargs): 69 | corpus = [] 70 | for f in kwargs['data']: 71 | corpus.append(open(f).read()) 72 | logging.info('%s: loaded training document', f) 73 | logging.info('loaded %d training documents', len(corpus)) 74 | 75 | alpha = set(kwargs['alphabet']) 76 | if not alpha: 77 | for c in corpus: 78 | alpha |= set(c) 79 | alpha -= set(kwargs['exclude_alphabet']) 80 | alpha = sorted(alpha) 81 | logging.info('character alphabet: %s', alpha) 82 | 83 | # encode document chars as integer alphabet index values. 84 | encoded = [np.array([alpha.index(c) for c in doc]) for doc in corpus] 85 | 86 | def batch(): 87 | T, B = kwargs['time'], kwargs['batch_size'] 88 | inputs = np.zeros((T, B, len(alpha)), 'f') 89 | outputs = np.zeros((T, B), 'i') 90 | enc = np.random.choice(encoded) 91 | for b in range(B): 92 | o = np.random.randint(len(enc) - T - 1) 93 | inputs[np.arange(T), b, enc[o:o+T]] = 1 94 | outputs[np.arange(T), b] = enc[o+1:o+T+1] 95 | return [inputs, outputs] 96 | 97 | layers = [len(alpha)] 98 | for l in kwargs['layers']: 99 | layers.append(dict(size=l, 100 | form=kwargs['layer_type'], 101 | activation=kwargs['activation'])) 102 | layers.append(len(alpha)) 103 | 104 | exp = theanets.Experiment(theanets.recurrent.Classifier, layers=layers) 105 | 106 | exp.train( 107 | batch, 108 | algo=kwargs['algorithm'], 109 | patience=kwargs['patience'], 110 | min_improvement=kwargs['min_improvement'], 111 | validate_every=kwargs['validate_every'], 112 | batch_size=kwargs['batch_size'], 113 | train_batches=kwargs['train_batches'], 114 | valid_batches=kwargs['valid_batches'], 115 | learning_rate=kwargs['learning_rate'], 116 | momentum=kwargs['momentum'], 117 | nesterov=kwargs['nesterov'], 118 | save_progress=kwargs['save_progress'], 119 | save_every=kwargs['save_every'], 120 | weight_l1=kwargs['weight_l1'], 121 | weight_l2=kwargs['weight_l2'], 122 | hidden_l2=kwargs['hidden_l2'], 123 | hidden_l1=kwargs['hidden_l1'], 124 | input_noise=kwargs['input_noise'], 125 | input_dropouts=kwargs['input_dropouts'], 126 | hidden_noise=kwargs['hidden_noise'], 127 | hidden_dropouts=kwargs['hidden_dropouts'], 128 | ) 129 | 130 | if kwargs['save_progress']: 131 | exp.save(kwargs['save_progress']) 132 | 133 | 134 | if __name__ == '__main__': 135 | main() 136 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pytest] 2 | #pep8ignore = E226,E302,E41 3 | pep8maxlinelength = 90 4 | 5 | [bdist_wheel] 6 | universal = 1 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import setuptools 3 | 4 | README = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'README.rst') 5 | 6 | setuptools.setup( 7 | name='theanets', 8 | version='0.8.0pre', 9 | packages=setuptools.find_packages(), 10 | author='lmjohns3', 11 | author_email='theanets@googlegroups.com', 12 | description='Feedforward and recurrent neural nets using Theano', 13 | long_description=open(README).read(), 14 | license='MIT', 15 | url='http://github.com/lmjohns3/theanets', 16 | keywords=('machine-learning ' 17 | 'neural-network ' 18 | 'deep-neural-network ' 19 | 'recurrent-neural-network ' 20 | 'autoencoder ' 21 | 'sparse-autoencoder ' 22 | 'classifier ' 23 | 'theano ' 24 | ), 25 | install_requires=['click', 'downhill', 'theano', 26 | # TODO(leif): remove when theano is fixed. 27 | 'nose-parameterized'], 28 | classifiers=[ 29 | 'Development Status :: 3 - Alpha', 30 | 'Intended Audience :: Science/Research', 31 | 'License :: OSI Approved :: MIT License', 32 | 'Operating System :: OS Independent', 33 | 'Topic :: Scientific/Engineering', 34 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /test/activations_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import io 4 | import numpy as np 5 | import pytest 6 | import theanets 7 | import theano 8 | 9 | PROBE = np.array([-10, -1, -0.1, 0, 0.1, 1, 10], 'f') 10 | 11 | 12 | @pytest.mark.parametrize('activation, expected', [ 13 | ('linear', PROBE), 14 | ('logistic', 1 / (1 + np.exp(-PROBE))), 15 | ('sigmoid', 1 / (1 + np.exp(-PROBE))), 16 | ('softmax', np.exp(PROBE) / sum(np.exp(PROBE))), 17 | ('softplus', np.log1p(np.exp(PROBE))), 18 | ('relu', np.clip(PROBE, 0, 100)), 19 | ('rect:max', np.clip(PROBE, 0, 100)), 20 | ('rect:min', np.clip(PROBE, -100, 1)), 21 | ('rect:minmax', np.clip(np.clip(PROBE, 0, 100), -100, 1)), 22 | ('norm:mean', PROBE - PROBE.mean()), 23 | ('norm:max', PROBE / abs(PROBE).max()), 24 | ('norm:std', PROBE / PROBE.std()), 25 | ('norm:z', (PROBE - PROBE.mean()) / PROBE.std()), 26 | 27 | # values based on random initial parameters using seed below 28 | ('elu', np.array([ 29 | -1.15013397, -0.74292195, -0.0999504, 0, 0.1, 1, 10], 'f')), 30 | ('prelu', np.array([ 31 | -11.50186157, -1.17528522, -0.10503119, 0, 0.1, 1, 10], 'f')), 32 | ('lgrelu', np.array([ 33 | -10.52778435, -1.04052365, -0.11633276, 0, 0.10640667, 34 | 1.04642045, 10.21983242], 'f')), 35 | ('maxout:3', np.array([ 36 | 16.60424042, 1.80405843, 1.99347568, 0.3595323, -0.513098, 37 | 2.77195668, 0.61599374], 'f')), 38 | 39 | # combo burgers 40 | ('relu+tanh', np.tanh(np.clip(PROBE, 0, 100))), 41 | ('softplus+norm:z', ((np.log1p(np.exp(PROBE)) - 42 | np.log1p(np.exp(PROBE)).mean()) / 43 | np.log1p(np.exp(PROBE)).std())), 44 | ]) 45 | def test_activation(activation, expected): 46 | layer = theanets.layers.Feedforward(inputs='in', size=7, rng=13) 47 | layer.bind(theanets.Network([3])) 48 | f = theanets.activations.build(activation, layer) 49 | actual = f(theano.shared(PROBE)) 50 | if hasattr(actual, 'eval'): 51 | actual = actual.eval() 52 | assert np.allclose(actual, expected) 53 | 54 | 55 | def test_build(): 56 | layer = theanets.layers.Feedforward(inputs='in', size=3, activation='relu') 57 | layer.bind(theanets.Network([3])) 58 | a = layer.activate 59 | assert callable(a) 60 | assert a.name == 'relu' 61 | assert a.params == [] 62 | 63 | 64 | def test_build_composed(): 65 | layer = theanets.layers.Feedforward( 66 | inputs='in', size=3, activation='relu+norm:z') 67 | layer.bind(theanets.Network([3])) 68 | a = layer.activate 69 | assert callable(a) 70 | assert a.name == 'norm:z(relu)', a.name 71 | assert a.params == [] 72 | 73 | 74 | def test_save_load_composed(): 75 | model = theanets.Network([3, (4, 'relu+norm:z')]) 76 | handle = io.BytesIO() 77 | model.save(handle) 78 | handle.seek(0) 79 | second = theanets.Network.load(handle) 80 | assert second.layers[1].activate.f.name == model.layers[1].activate.f.name 81 | assert second.layers[1].activate.g.name == model.layers[1].activate.g.name 82 | 83 | 84 | @pytest.mark.parametrize('activation, expected', [ 85 | ('prelu', ['l.leak']), 86 | ('lgrelu', ['l.gain', 'l.leak']), 87 | ('maxout:4', ['l.intercept', 'l.slope']), 88 | ]) 89 | def test_parameters(activation, expected): 90 | layer = theanets.layers.Feedforward( 91 | inputs='in', size=3, activation=activation, name='l') 92 | layer.bind(theanets.Network([3, layer])) 93 | assert sorted(p.name for p in layer.activate.params) == expected 94 | -------------------------------------------------------------------------------- /test/convolution_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import theanets 4 | 5 | import util as u 6 | 7 | REG_LAYERS = [ 8 | (u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT, u.NUM_INPUTS), 9 | dict(size=u.NUM_HID1, form='conv2', filter_size=u.CNN.FILTER_SIZE), 10 | dict(size=u.NUM_HID2, form='conv2', filter_size=u.CNN.FILTER_SIZE), 11 | 'flat', 12 | u.NUM_OUTPUTS] 13 | 14 | CLF_LAYERS = [ 15 | (u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT, u.NUM_INPUTS), 16 | dict(size=u.NUM_HID1, form='conv2', filter_size=u.CNN.FILTER_SIZE), 17 | dict(size=u.NUM_HID2, form='conv2', filter_size=u.CNN.FILTER_SIZE), 18 | 'flat', 19 | u.NUM_CLASSES] 20 | 21 | 22 | def assert_shape(actual, width, height, channels): 23 | assert actual == (u.NUM_EXAMPLES, width, height, channels) 24 | 25 | 26 | @pytest.mark.parametrize('Model, layers, weighted, data', [ 27 | (theanets.convolution.Regressor, REG_LAYERS, False, u.CNN.REG_DATA), 28 | (theanets.convolution.Classifier, CLF_LAYERS, False, u.CNN.CLF_DATA), 29 | (theanets.convolution.Regressor, REG_LAYERS, True, u.CNN.WREG_DATA), 30 | (theanets.convolution.Classifier, CLF_LAYERS, True, u.CNN.WCLF_DATA), 31 | ]) 32 | def test_sgd(Model, layers, weighted, data): 33 | u.assert_progress(Model(layers, weighted=weighted), data) 34 | 35 | 36 | @pytest.mark.parametrize('Model, layers, output', [ 37 | (theanets.convolution.Regressor, REG_LAYERS, u.NUM_OUTPUTS), 38 | (theanets.convolution.Classifier, CLF_LAYERS, (u.NUM_EXAMPLES, )), 39 | ]) 40 | def test_predict(Model, layers, output): 41 | u.assert_shape(Model(layers).predict(u.CNN.INPUTS).shape, output) 42 | 43 | 44 | @pytest.mark.parametrize('Model, layers, target, score', [ 45 | (theanets.convolution.Regressor, REG_LAYERS, u.OUTPUTS, -16.850263595581055), 46 | (theanets.convolution.Classifier, CLF_LAYERS, u.CLASSES, 0.171875), 47 | ]) 48 | def test_score(Model, layers, target, score): 49 | assert Model(layers).score(u.CNN.INPUTS, target) == score 50 | 51 | 52 | @pytest.mark.parametrize('Model, layers, target', [ 53 | (theanets.convolution.Regressor, REG_LAYERS, u.NUM_OUTPUTS), 54 | (theanets.convolution.Classifier, CLF_LAYERS, u.NUM_CLASSES), 55 | ]) 56 | def test_predict(Model, layers, target): 57 | outs = Model(layers).feed_forward(u.CNN.INPUTS) 58 | assert len(list(outs)) == 8 59 | W, H = u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT 60 | w, h = u.CNN.FILTER_WIDTH, u.CNN.FILTER_HEIGHT 61 | assert_shape(outs['in:out'].shape, W, H, u.NUM_INPUTS) 62 | assert_shape(outs['hid1:out'].shape, W - w + 1, H - h + 1, u.NUM_HID1) 63 | assert_shape(outs['hid2:out'].shape, W - 2 * w + 2, H - 2 * h + 2, u.NUM_HID2) 64 | u.assert_shape(outs['out:out'].shape, target) 65 | 66 | 67 | class TestClassifier: 68 | @pytest.fixture 69 | def net(self): 70 | return theanets.convolution.Classifier(CLF_LAYERS) 71 | 72 | def test_predict_proba(self, net): 73 | u.assert_shape(net.predict_proba(u.CNN.INPUTS).shape, u.NUM_CLASSES) 74 | 75 | def test_predict_logit(self, net): 76 | u.assert_shape(net.predict_logit(u.CNN.INPUTS).shape, u.NUM_CLASSES) 77 | 78 | def test_score(self, net): 79 | w = 0.5 * np.ones(u.CLASSES.shape, 'f') 80 | assert 0 <= net.score(u.CNN.INPUTS, u.CLASSES, w) <= 1 81 | -------------------------------------------------------------------------------- /test/feedforward_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import theanets 4 | 5 | import util as u 6 | 7 | 8 | @pytest.mark.parametrize('Model, layers, weighted, data', [ 9 | (theanets.Regressor, u.REG_LAYERS, False, u.REG_DATA), 10 | (theanets.Classifier, u.CLF_LAYERS, False, u.CLF_DATA), 11 | (theanets.Autoencoder, u.AE_LAYERS, False, u.AE_DATA), 12 | (theanets.Regressor, u.REG_LAYERS, True, u.WREG_DATA), 13 | (theanets.Classifier, u.CLF_LAYERS, True, u.WCLF_DATA), 14 | (theanets.Autoencoder, u.AE_LAYERS, True, u.WAE_DATA), 15 | ]) 16 | def test_sgd(Model, layers, weighted, data): 17 | u.assert_progress(Model(layers, weighted=weighted), data) 18 | 19 | 20 | @pytest.mark.parametrize('Model, layers, output', [ 21 | (theanets.Regressor, u.REG_LAYERS, u.NUM_OUTPUTS), 22 | (theanets.Classifier, u.CLF_LAYERS, (u.NUM_EXAMPLES, )), 23 | (theanets.Autoencoder, u.AE_LAYERS, u.NUM_INPUTS), 24 | ]) 25 | def test_predict(Model, layers, output): 26 | u.assert_shape(Model(layers).predict(u.INPUTS).shape, output) 27 | 28 | 29 | @pytest.mark.parametrize('Model, layers, target, score', [ 30 | (theanets.Regressor, u.REG_LAYERS, u.OUTPUTS, -1.0473043918609619), 31 | (theanets.Classifier, u.CLF_LAYERS, u.CLASSES, 0.171875), 32 | (theanets.Autoencoder, u.AE_LAYERS, u.INPUTS, 15.108331680297852), 33 | ]) 34 | def test_score(Model, layers, target, score): 35 | assert Model(layers).score(u.INPUTS, target) == score 36 | 37 | 38 | @pytest.mark.parametrize('Model, layers, target', [ 39 | (theanets.Regressor, u.REG_LAYERS, u.NUM_OUTPUTS), 40 | (theanets.Classifier, u.CLF_LAYERS, u.NUM_CLASSES), 41 | (theanets.Autoencoder, u.AE_LAYERS, u.NUM_INPUTS), 42 | ]) 43 | def test_feed_forward(Model, layers, target): 44 | outs = Model(layers).feed_forward(u.INPUTS) 45 | assert len(list(outs)) == 7 46 | u.assert_shape(outs['in:out'].shape, u.NUM_INPUTS) 47 | u.assert_shape(outs['hid1:out'].shape, u.NUM_HID1) 48 | u.assert_shape(outs['hid2:out'].shape, u.NUM_HID2) 49 | u.assert_shape(outs['out:out'].shape, target) 50 | 51 | 52 | def test_decode_from_multiple_layers(): 53 | net = theanets.Regressor([u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, dict( 54 | size=u.NUM_OUTPUTS, inputs=('hid2:out', 'hid1:out'))]) 55 | outs = net.feed_forward(u.INPUTS) 56 | assert len(list(outs)) == 7 57 | u.assert_shape(outs['in:out'].shape, u.NUM_INPUTS) 58 | u.assert_shape(outs['hid1:out'].shape, u.NUM_HID1) 59 | u.assert_shape(outs['hid2:out'].shape, u.NUM_HID2) 60 | u.assert_shape(outs['out:out'].shape, u.NUM_OUTPUTS) 61 | 62 | 63 | class TestClassifier: 64 | @pytest.fixture 65 | def net(self): 66 | return theanets.Classifier(u.CLF_LAYERS) 67 | 68 | def test_predict_proba(self, net): 69 | u.assert_shape(net.predict_proba(u.INPUTS).shape, u.NUM_CLASSES) 70 | 71 | def test_predict_logit(self, net): 72 | u.assert_shape(net.predict_logit(u.INPUTS).shape, u.NUM_CLASSES) 73 | 74 | def test_score(self, net): 75 | w = 0.5 * np.ones(u.CLASSES.shape, 'f') 76 | assert 0 <= net.score(u.INPUTS, u.CLASSES, w) <= 1 77 | 78 | 79 | class TestAutoencoder: 80 | @pytest.fixture 81 | def net(self): 82 | return theanets.Autoencoder(u.AE_LAYERS) 83 | 84 | def test_encode_hid1(self, net): 85 | z = net.encode(u.INPUTS, 'hid1') 86 | u.assert_shape(z.shape, u.NUM_HID1) 87 | 88 | def test_encode_hid2(self, net): 89 | z = net.encode(u.INPUTS, 'hid2') 90 | u.assert_shape(z.shape, u.NUM_HID2) 91 | 92 | def test_decode_hid1(self, net): 93 | x = net.decode(net.encode(u.INPUTS)) 94 | u.assert_shape(x.shape, u.NUM_INPUTS) 95 | 96 | def test_decode_hid2(self, net): 97 | x = net.decode(net.encode(u.INPUTS, 'hid2'), 'hid2') 98 | u.assert_shape(x.shape, u.NUM_INPUTS) 99 | 100 | def test_score(self, net): 101 | labels = np.random.randint(0, 2, size=u.INPUTS.shape) 102 | assert net.score(u.INPUTS, labels) < 0 103 | -------------------------------------------------------------------------------- /test/graph_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import pytest 4 | import theanets 5 | 6 | try: 7 | from itertools import izip as zip 8 | except ImportError: # python3 9 | pass 10 | 11 | import util as u 12 | 13 | 14 | class TestNetwork: 15 | def test_layer_ints(self): 16 | model = theanets.Regressor((1, 2, 3)) 17 | assert len(model.layers) == 3 18 | 19 | @pytest.mark.parametrize('layers', [ 20 | (1, (2, 'relu'), 3), 21 | (1, dict(size=2, activation='relu', form='rnn'), 3), 22 | (1, 2, dict(size=3, inputs='hid1')), 23 | (1, 2, dict(size=3, inputs=('in', 'hid1'))), 24 | (1, 2, (1, 'tied')), 25 | (1, 2, dict(size=1, form='tied', partner='hid1')), 26 | ]) 27 | def test_layer_tuples(self, layers): 28 | model = theanets.Regressor(layers) 29 | assert len(model.layers) == 3 30 | 31 | assert isinstance(model.layers[0], theanets.layers.Input) 32 | assert model.layers[0].kwargs['activation'] == 'linear' 33 | assert model.layers[0].output_shape == (1, ) 34 | 35 | assert model.layers[1].kwargs['activation'] == 'relu' 36 | spec = layers[1] 37 | if isinstance(spec, dict) and spec.get('form') == 'rnn': 38 | assert isinstance(model.layers[1], theanets.layers.RNN) 39 | else: 40 | assert isinstance(model.layers[1], theanets.layers.Feedforward) 41 | 42 | assert model.layers[2].kwargs['activation'] == 'linear' 43 | spec = layers[2] 44 | if (isinstance(spec, tuple) and 'tied' in spec) or \ 45 | (isinstance(spec, dict) and spec.get('form') == 'tied'): 46 | assert isinstance(model.layers[2], theanets.layers.Tied) 47 | assert model.layers[2].partner is model.layers[1] 48 | 49 | @pytest.mark.parametrize('layers', [ 50 | (1, 2, dict(size=3, inputs='hid2')), 51 | (1, (2, 'tied'), (2, 'tied'), (1, 'tied')), 52 | ]) 53 | def test_layers_raises(self, layers): 54 | with pytest.raises(theanets.util.ConfigurationError): 55 | theanets.Regressor(layers) 56 | 57 | @pytest.mark.parametrize('spec, cls, shape, act', [ 58 | (6, theanets.layers.Feedforward, (6, ), None), 59 | ((6, ), theanets.layers.Feedforward, (6, ), None), 60 | ((6, 7), theanets.layers.Feedforward, (6, 7), None), 61 | ((6, 'linear'), theanets.layers.Feedforward, (6, ), 'linear'), 62 | ((6, 'linear', 'classifier'), theanets.layers.Classifier, (6, ), 'softmax'), 63 | (dict(size=6), theanets.layers.Feedforward, (6, ), None), 64 | (dict(size=6, form='ff'), theanets.layers.Feedforward, (6, ), None), 65 | (dict(size=6, activation='linear'), theanets.layers.Feedforward, (6, ), 'linear'), 66 | (dict(shape=(6, 7)), theanets.layers.Feedforward, (6, 7), None), 67 | ]) 68 | def test_add_layer(self, spec, cls, shape, act): 69 | model = theanets.Regressor([3, spec, 4]) 70 | layer = model.layers[1] 71 | assert len(model.layers) == 3 72 | assert isinstance(layer, cls) 73 | assert layer.output_shape == shape 74 | if act is not None: 75 | assert layer.kwargs['activation'] == act 76 | 77 | @pytest.mark.parametrize('spec', [ 78 | (6, 'tied', 7), 79 | None, 80 | 'ff', 81 | 'tied', 82 | dict(form='ff'), 83 | dict(form='tied'), 84 | dict(form='tied', partner='hello'), 85 | dict(form='ff', inputs=('a', 'b')), 86 | ]) 87 | def test_add_layer_errors(self, spec): 88 | with pytest.raises(theanets.util.ConfigurationError): 89 | theanets.Network([dict(form='input', name='a', shape=(3, 5)), 90 | dict(form='input', name='b', shape=(4, 3)), 91 | spec, 92 | 4]) 93 | 94 | def test_updates(self): 95 | model = theanets.Regressor((15, 13)) 96 | assert not model.updates() 97 | 98 | def test_default_output_name(self): 99 | model = theanets.Regressor((1, 2, dict(size=1, form='tied', name='foo'))) 100 | assert model.losses[0].output_name == 'foo:out' 101 | model = theanets.Regressor((1, 2, 1)) 102 | assert model.losses[0].output_name == 'out:out' 103 | 104 | def test_find_number(self): 105 | model = theanets.Regressor((1, 2, 1)) 106 | p = model.find(1, 0) 107 | assert p.name == 'hid1.w' 108 | p = model.find(2, 0) 109 | assert p.name == 'out.w' 110 | 111 | def test_find_name(self): 112 | model = theanets.Regressor((1, 2, 1)) 113 | p = model.find('hid1', 'w') 114 | assert p.name == 'hid1.w' 115 | p = model.find('out', 'w') 116 | assert p.name == 'out.w' 117 | 118 | def test_find_missing(self): 119 | model = theanets.Regressor((1, 2, 1)) 120 | try: 121 | model.find('hid4', 'w') 122 | assert False 123 | except KeyError: 124 | pass 125 | try: 126 | model.find(0, 0) 127 | assert False 128 | except KeyError: 129 | pass 130 | try: 131 | model.find(1, 3) 132 | assert False 133 | except KeyError: 134 | pass 135 | 136 | def test_train(self): 137 | model = theanets.Regressor((1, 2, 1)) 138 | tm, vm = model.train([np.random.randn(100, 1).astype('f'), 139 | np.random.randn(100, 1).astype('f')]) 140 | assert tm['loss'] > 0 141 | 142 | 143 | class TestMonitors: 144 | @pytest.fixture 145 | def net(self): 146 | return theanets.Regressor((10, 15, 14, 13)) 147 | 148 | def assert_monitors(self, net, monitors, expected, sort=False): 149 | mon = [k for k, v in net.monitors(monitors=monitors)] 150 | if sort: 151 | mon = sorted(mon) 152 | assert mon == expected 153 | 154 | def test_dict(self, net): 155 | self.assert_monitors(net, {'hid1:out': 1}, ['err', 'hid1:out<1']) 156 | 157 | def test_list(self, net): 158 | self.assert_monitors(net, [('hid1:out', 1)], ['err', 'hid1:out<1']) 159 | 160 | def test_list_values(self, net): 161 | self.assert_monitors( 162 | net, {'hid1:out': [2, 1]}, ['err', 'hid1:out<2', 'hid1:out<1']) 163 | 164 | def test_dict_values(self, net): 165 | self.assert_monitors( 166 | net, {'hid1:out': dict(a=lambda e: e+1, b=lambda e: e+2)}, 167 | ['err', 'hid1:out:a', 'hid1:out:b'], sort=True) 168 | 169 | def test_not_found(self, net): 170 | self.assert_monitors(net, {'hid10:out': 1}, ['err']) 171 | 172 | def test_param(self, net): 173 | self.assert_monitors(net, {'hid1.w': 1}, ['err', 'hid1.w<1']) 174 | 175 | def test_wildcard(self, net): 176 | self.assert_monitors( 177 | net, {'*.w': 1}, ['err', 'hid1.w<1', 'hid2.w<1', 'out.w<1']) 178 | self.assert_monitors(net, {'hid?.w': 1}, ['err', 'hid1.w<1', 'hid2.w<1']) 179 | 180 | 181 | def test_save_every(tmpdir): 182 | net = theanets.Autoencoder((u.NUM_INPUTS, (3, 'prelu'), u.NUM_INPUTS)) 183 | p = tmpdir.mkdir('graph-test').join('model.pkl') 184 | fn = os.path.join(p.dirname, p.basename) 185 | train = net.itertrain([u.INPUTS], save_every=2, save_progress=fn) 186 | for i, _ in enumerate(zip(train, range(9))): 187 | if i == 3 or i == 5 or i == 7: 188 | assert p.check() 189 | else: 190 | assert not p.check() 191 | if p.check(): 192 | p.remove() 193 | -------------------------------------------------------------------------------- /test/layers_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import theanets 4 | import theano.tensor as TT 5 | 6 | import util as u 7 | 8 | NI = u.NUM_INPUTS 9 | NH = u.NUM_HID1 10 | 11 | 12 | class TestFeedforward: 13 | @pytest.mark.parametrize('form, name, params, count, outputs', [ 14 | ('feedforward', 'feedforward', 'w b', 1 + NI, 'out pre'), 15 | ('ff', 'feedforward', 'w b', 1 + NI, 'out pre'), 16 | ('classifier', 'classifier', 'w b', 1 + NI, 'out pre'), 17 | ('flatten', 'flatten', '', 0, 'out'), 18 | ('flat', 'flatten', '', 0, 'out'), 19 | ('concatenate', 'concatenate', '', 0, 'out'), 20 | ('concat', 'concatenate', '', 0, 'out'), 21 | ('product', 'product', '', 0, 'out'), 22 | ('prod', 'product', '', 0, 'out'), 23 | ]) 24 | def test_build(self, form, name, params, count, outputs): 25 | layer = theanets.Layer.build(form, size=NI, name='l', inputs='in') 26 | layer.bind(theanets.Network([NI])) 27 | 28 | assert layer.__class__.__name__.lower() == name 29 | 30 | assert sorted(p.name for p in layer.params) == \ 31 | sorted('l.' + p for p in params.split()) 32 | 33 | assert sum(np.prod(p.get_value().shape) for p in layer.params) == count * NI 34 | 35 | out, upd = layer.connect({'in:out': TT.matrix('x')}) 36 | assert sorted(out) == sorted('l:' + o for o in outputs.split()) 37 | assert sorted(upd) == [] 38 | 39 | assert layer.to_spec() == dict( 40 | form=name, name='l', size=NI, inputs='in', 41 | activation=layer.kwargs.get('activation', 'relu')) 42 | 43 | @pytest.mark.parametrize('layer', [ 44 | NH, 45 | dict(form='ff', inputs=('hid1', 'hid2'), size=NH), 46 | dict(form='tied', partner='hid1'), 47 | dict(form='prod', inputs=('hid1', 'hid2'), size=NH), 48 | dict(form='concat', inputs=('hid1', 'hid2'), size=2 * NH), 49 | ('flat', NH), 50 | ]) 51 | def test_predict(self, layer): 52 | net = theanets.Autoencoder([NI, NH, NH, layer, NI]) 53 | assert net.predict(u.INPUTS).shape == (u.NUM_EXAMPLES, NI) 54 | 55 | def test_multiple_inputs(self): 56 | layer = theanets.layers.Feedforward(inputs=('in', 'hid1'), size=NH, name='l') 57 | layer.bind(theanets.Network([NH, NH, NH])) 58 | 59 | total = sum(np.prod(p.get_value().shape) for p in layer.params) 60 | assert total == (1 + 2 * NH) * NH 61 | 62 | assert sorted(p.name for p in layer.params) == \ 63 | ['l.b', 'l.w_hid1:out', 'l.w_in:out'] 64 | 65 | assert layer.to_spec() == dict( 66 | form='feedforward', name='l', size=NH, activation='relu', 67 | inputs=('in', 'hid1')) 68 | 69 | def test_reshape(self): 70 | layer = theanets.layers.Reshape(inputs='in', shape=(4, 2), name='l') 71 | layer.bind(theanets.Network([8])) 72 | 73 | assert sum(np.prod(p.get_value().shape) for p in layer.params) == 0 74 | 75 | assert sorted(p.name for p in layer.params) == [] 76 | 77 | assert layer.to_spec() == dict( 78 | form='reshape', name='l', shape=(4, 2), inputs='in', 79 | activation='relu') 80 | 81 | 82 | class TestRecurrent: 83 | @pytest.mark.parametrize('form, kwargs, count, params, outputs', [ 84 | ('rnn', {}, 1 + NI + NH, 'xh hh b', 'out pre'), 85 | ('clockwork', {'periods': (1, 2, 4, 8)}, 1 + NI + NH, 'xh hh b', 'out pre'), 86 | ('rrnn', {'rate': 'uniform'}, 1 + NI + NH, 'xh hh b', 'out pre rate hid'), 87 | ('rrnn', {'rate': 'log'}, 1 + NI + NH, 'xh hh b', 'out pre rate hid'), 88 | ('rrnn', {'rate': 'vector'}, 2 + NI + NH, 'xh hh b r', 'out pre rate hid'), 89 | ('rrnn', {'rate': 'matrix'}, 2 + NH + 2 * NI, 'xh hh b r xr', 'out pre rate hid'), 90 | ('gru', {}, 3 * (1 + NI + NH), 'b w hh hr hz', 'hid out pre rate'), 91 | ('mut1', {}, 3 + 3 * NI + 2 * NH, 'bh br bz hh hr xh xr xz', 'hid out pre rate'), 92 | ('scrn', {}, 2 * (1 + NI + 2 * NH), 'w ho so hh sh b r', 'out hid rate state'), 93 | ('lstm', {}, 7 + 4 * NH + 4 * NI, 'xh hh b cf ci co', 'out cell'), 94 | ('conv1', {'filter_size': 13}, 1 + 13 * NI, 'w b', 'pre out'), 95 | ('mrnn', {'factors': 3}, (7 + NI) * NH + 3 * NI, 'xh xf hf fh b', 96 | 'out pre factors'), 97 | ('bidirectional', {}, 1 + NI + NH // 2, 98 | 'l_bw.b l_bw.hh l_bw.xh l_fw.b l_fw.xh l_fw.hh', 99 | 'bw_out bw_pre fw_out fw_pre out pre'), 100 | ]) 101 | def test_build(self, form, kwargs, count, params, outputs): 102 | layer = theanets.Layer.build(form, size=NH, name='l', inputs='in', **kwargs) 103 | layer.bind(theanets.Network([dict(size=NI, ndim=3)])) 104 | 105 | assert layer.__class__.__name__.lower() == form 106 | 107 | expected = sorted('l.' + p for p in params.split()) 108 | if form == 'bidirectional': 109 | expected = sorted(params.split()) 110 | assert sorted(p.name for p in layer.params) == expected 111 | 112 | expected = count * NH 113 | if form == 'mrnn': 114 | expected = count 115 | assert sum(np.prod(p.get_value().shape) for p in layer.params) == expected 116 | 117 | out, upd = layer.connect({'in:out': TT.tensor3('x')}) 118 | assert sorted(out) == sorted('l:' + o for o in outputs.split()) 119 | assert sorted(upd) == [] 120 | 121 | spec = {} 122 | if form == 'mrnn': 123 | spec['factors'] = 3 124 | if form == 'bidirectional': 125 | spec['worker'] = 'rnn' 126 | if form == 'clockwork': 127 | spec['periods'] = (1, 2, 4, 8) 128 | if form == 'scrn': 129 | spec['s_0'] = None 130 | spec['context_size'] = int(1 + np.sqrt(NH)) 131 | if form == 'lstm': 132 | spec['c_0'] = None 133 | if form not in ('bidirectional', 'conv1'): 134 | spec['h_0'] = None 135 | assert layer.to_spec() == dict( 136 | form=form, name='l', size=NH, inputs='in', 137 | activation=layer.kwargs.get('activation', 'relu'), **spec) 138 | 139 | @pytest.mark.parametrize('layer', [ 140 | (NH, 'rnn'), 141 | dict(size=NH, form='conv1', filter_size=13), 142 | ]) 143 | def test_predict(self, layer): 144 | T = u.RNN.NUM_TIMES 145 | if isinstance(layer, dict) and layer.get('form') == 'conv1': 146 | T -= layer['filter_size'] - 1 147 | net = theanets.recurrent.Autoencoder([NI, NH, NH, layer, NI]) 148 | assert net.predict(u.RNN.INPUTS).shape == (u.NUM_EXAMPLES, T, NI) 149 | 150 | 151 | class TestConvolution: 152 | @pytest.mark.parametrize('form, kwargs, count, params, outputs', [ 153 | ('conv2', {'filter_size': u.CNN.FILTER_SIZE}, 154 | 1 + NI * u.CNN.FILTER_HEIGHT * u.CNN.FILTER_WIDTH, 'w b', 'out pre'), 155 | ]) 156 | def test_build(self, form, kwargs, count, params, outputs): 157 | layer = theanets.Layer.build(form, size=NH, name='l', inputs='in', **kwargs) 158 | layer.bind(theanets.Network([dict(size=NI, ndim=4)])) 159 | 160 | assert layer.__class__.__name__.lower() == form 161 | 162 | expected = sorted('l.' + p for p in params.split()) 163 | assert sorted(p.name for p in layer.params) == expected 164 | 165 | expected = count * NH 166 | assert sum(np.prod(p.get_value().shape) for p in layer.params) == expected 167 | 168 | out, upd = layer.connect({'in:out': TT.tensor4('x')}) 169 | assert sorted(out) == sorted('l:' + o for o in outputs.split()) 170 | assert sorted(upd) == [] 171 | 172 | assert layer.to_spec() == dict( 173 | form=form, name='l', size=NH, inputs='in', activation='relu') 174 | 175 | @pytest.mark.parametrize('layer', [ 176 | dict(size=NH, form='conv2', filter_size=u.CNN.FILTER_SIZE), 177 | ]) 178 | def test_predict(self, layer): 179 | net = theanets.convolution.Regressor([ 180 | (u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT, NI), 181 | NH, layer, 'flat', u.NUM_OUTPUTS]) 182 | assert net.predict(u.CNN.INPUTS).shape == (u.NUM_EXAMPLES, u.NUM_OUTPUTS) 183 | -------------------------------------------------------------------------------- /test/losses_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import theanets 3 | 4 | import util as u 5 | 6 | 7 | class TestBuild: 8 | def test_mse(self): 9 | l = theanets.Loss.build('mse', target=2) 10 | assert callable(l) 11 | assert len(l.variables) == 1 12 | 13 | def test_mse_weighted(self): 14 | l = theanets.Loss.build('mse', target=2, weighted=True) 15 | assert callable(l) 16 | assert len(l.variables) == 2 17 | 18 | 19 | @pytest.mark.parametrize('loss', ['xe', 'hinge']) 20 | def test_classification(loss): 21 | net = theanets.Classifier([ 22 | u.NUM_INPUTS, u.NUM_HID1, u.NUM_CLASSES], loss=loss) 23 | u.assert_progress(net, u.CLF_DATA) 24 | 25 | 26 | @pytest.mark.parametrize('loss', ['mse', 'mae', 'mmd']) 27 | def test_regression(loss): 28 | net = theanets.Regressor([ 29 | u.NUM_INPUTS, u.NUM_HID1, u.NUM_OUTPUTS], loss=loss) 30 | u.assert_progress(net, u.REG_DATA) 31 | 32 | 33 | def test_kl(): 34 | net = theanets.Regressor([ 35 | u.NUM_INPUTS, u.NUM_HID1, (u.NUM_OUTPUTS, 'softmax')], loss='kl') 36 | u.assert_progress(net, [u.INPUTS, abs(u.OUTPUTS)]) 37 | 38 | 39 | def test_gll(): 40 | net = theanets.Regressor([ 41 | u.NUM_INPUTS, 42 | dict(name='hid', size=u.NUM_HID1), 43 | dict(name='covar', activation='relu', inputs='hid', size=u.NUM_OUTPUTS), 44 | dict(name='mean', activation='linear', inputs='hid', size=u.NUM_OUTPUTS), 45 | ]) 46 | net.set_loss('gll', target=2, mean_name='mean', covar_name='covar') 47 | u.assert_progress(net, u.REG_DATA) 48 | -------------------------------------------------------------------------------- /test/main_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import tempfile 4 | import theanets 5 | 6 | 7 | class TestExperiment: 8 | def test_save_load(self): 9 | exp = theanets.Experiment(theanets.Autoencoder, layers=(10, 3, 4, 10)) 10 | net = exp.network 11 | f, p = tempfile.mkstemp(suffix='pkl') 12 | os.close(f) 13 | os.unlink(p) 14 | try: 15 | exp.save(p) 16 | assert os.path.isfile(p) 17 | exp.load(p) 18 | assert exp.network is not net 19 | for lo, ln in zip(net.layers, exp.network.layers): 20 | assert lo.name == ln.name 21 | assert lo._input_shapes == ln._input_shapes 22 | for po, pn in zip(net.params, exp.network.params): 23 | assert po.name == pn.name 24 | assert np.allclose(po.get_value(), pn.get_value()) 25 | finally: 26 | if os.path.exists(p): 27 | os.unlink(p) 28 | -------------------------------------------------------------------------------- /test/recurrent_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import theanets 4 | 5 | import util as u 6 | 7 | AE_LAYERS = [u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), (u.NUM_HID2, 'rnn'), u.NUM_INPUTS] 8 | CLF_LAYERS = [u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), (u.NUM_HID2, 'rnn'), u.NUM_CLASSES] 9 | REG_LAYERS = [u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), (u.NUM_HID2, 'rnn'), u.NUM_OUTPUTS] 10 | 11 | 12 | def assert_shape(actual, expected): 13 | if not isinstance(expected, tuple): 14 | expected = (u.NUM_EXAMPLES, u.RNN.NUM_TIMES, expected) 15 | assert actual == expected 16 | 17 | 18 | @pytest.mark.parametrize('Model, layers, weighted, data', [ 19 | (theanets.recurrent.Regressor, REG_LAYERS, False, u.RNN.REG_DATA), 20 | (theanets.recurrent.Classifier, CLF_LAYERS, False, u.RNN.CLF_DATA), 21 | (theanets.recurrent.Autoencoder, AE_LAYERS, False, u.RNN.AE_DATA), 22 | (theanets.recurrent.Regressor, REG_LAYERS, True, u.RNN.WREG_DATA), 23 | (theanets.recurrent.Classifier, CLF_LAYERS, True, u.RNN.WCLF_DATA), 24 | (theanets.recurrent.Autoencoder, AE_LAYERS, True, u.RNN.WAE_DATA), 25 | ]) 26 | def test_sgd(Model, layers, weighted, data): 27 | u.assert_progress(Model(layers, weighted=weighted), data) 28 | 29 | 30 | @pytest.mark.parametrize('Model, layers', [ 31 | (theanets.recurrent.Regressor, REG_LAYERS), 32 | (theanets.recurrent.Classifier, CLF_LAYERS), 33 | (theanets.recurrent.Autoencoder, AE_LAYERS), 34 | ]) 35 | def test_predict(Model, layers): 36 | assert_shape(Model(layers).predict(u.INPUTS).shape, output) 37 | 38 | 39 | @pytest.mark.parametrize('Model, layers, target, score', [ 40 | (theanets.recurrent.Regressor, REG_LAYERS, u.RNN.OUTPUTS, -0.73883247375488281), 41 | (theanets.recurrent.Classifier, CLF_LAYERS, u.RNN.CLASSES, 0.0020161290322580645), 42 | (theanets.recurrent.Autoencoder, AE_LAYERS, u.RNN.INPUTS, 81.411415100097656), 43 | ]) 44 | def test_score(Model, layers, target, score): 45 | assert Model(layers).score(u.RNN.INPUTS, target) == score 46 | 47 | 48 | @pytest.mark.parametrize('Model, layers, target', [ 49 | (theanets.recurrent.Regressor, REG_LAYERS, u.NUM_OUTPUTS), 50 | (theanets.recurrent.Classifier, CLF_LAYERS, u.NUM_CLASSES), 51 | (theanets.recurrent.Autoencoder, AE_LAYERS, u.NUM_INPUTS), 52 | ]) 53 | def test_predict(Model, layers, target): 54 | outs = Model(layers).feed_forward(u.RNN.INPUTS) 55 | assert len(list(outs)) == 7 56 | assert_shape(outs['in:out'].shape, u.NUM_INPUTS) 57 | assert_shape(outs['hid1:out'].shape, u.NUM_HID1) 58 | assert_shape(outs['hid2:out'].shape, u.NUM_HID2) 59 | assert_shape(outs['out:out'].shape, target) 60 | 61 | 62 | def test_symbolic_initial_state(): 63 | net = theanets.recurrent.Regressor([ 64 | dict(size=u.NUM_INPUTS, form='input', name='h0', ndim=2), 65 | dict(size=u.NUM_INPUTS, form='input', name='in'), 66 | dict(size=u.NUM_HID1, form='rnn', name='rnn', h_0='h0'), 67 | dict(size=u.NUM_OUTPUTS, form='ff', name='out'), 68 | ]) 69 | H0 = np.random.randn(u.NUM_EXAMPLES, u.NUM_HID1).astype('f') 70 | u.assert_progress(net, [H0, u.RNN.INPUTS, u.RNN.OUTPUTS]) 71 | 72 | 73 | class TestClassifier: 74 | @pytest.fixture 75 | def net(self): 76 | return theanets.recurrent.Classifier(CLF_LAYERS) 77 | 78 | def test_predict_proba(self, net): 79 | assert_shape(net.predict_proba(u.RNN.INPUTS).shape, u.NUM_CLASSES) 80 | 81 | def test_predict_logit(self, net): 82 | assert_shape(net.predict_logit(u.RNN.INPUTS).shape, u.NUM_CLASSES) 83 | 84 | def test_score(self, net): 85 | w = 0.5 * np.ones(u.CLASSES.shape, 'f') 86 | assert 0 <= net.score(u.RNN.INPUTS, u.CLASSES, w) <= 1 87 | 88 | def test_predict_sequence(self, net): 89 | assert list(net.predict_sequence([0, 1, 2], 5, rng=13)) == [4, 5, 1, 3, 1] 90 | 91 | 92 | class TestAutoencoder: 93 | @pytest.fixture 94 | def net(self): 95 | return theanets.recurrent.Autoencoder(AE_LAYERS) 96 | 97 | def test_encode_hid1(self, net): 98 | z = net.encode(u.RNN.INPUTS, 'hid1') 99 | assert_shape(z.shape, u.NUM_HID1) 100 | 101 | def test_encode_hid2(self, net): 102 | z = net.encode(u.RNN.INPUTS, 'hid2') 103 | assert_shape(z.shape, u.NUM_HID2) 104 | 105 | def test_decode_hid1(self, net): 106 | x = net.decode(net.encode(u.RNN.INPUTS)) 107 | assert_shape(x.shape, u.NUM_INPUTS) 108 | 109 | def test_decode_hid2(self, net): 110 | x = net.decode(net.encode(u.RNN.INPUTS, 'hid2'), 'hid2') 111 | assert_shape(x.shape, u.NUM_INPUTS) 112 | 113 | def test_score(self, net): 114 | labels = np.random.randint(0, 2, size=u.RNN.INPUTS.shape) 115 | assert net.score(u.RNN.INPUTS, labels) < 0 116 | 117 | 118 | class TestFunctions: 119 | @pytest.fixture 120 | def samples(self): 121 | return np.random.randn(2 * u.RNN.NUM_TIMES, u.NUM_INPUTS) 122 | 123 | @pytest.fixture 124 | def labels(self): 125 | return np.random.randn(2 * u.RNN.NUM_TIMES, u.NUM_OUTPUTS) 126 | 127 | def test_batches_labeled(self, samples, labels): 128 | f = theanets.recurrent.batches( 129 | [samples, labels], steps=u.RNN.NUM_TIMES, batch_size=u.NUM_EXAMPLES) 130 | assert len(f()) == 2 131 | assert_shape(f()[0].shape, u.NUM_INPUTS) 132 | assert_shape(f()[1].shape, u.NUM_OUTPUTS) 133 | 134 | def test_batches_unlabeled(self, samples): 135 | f = theanets.recurrent.batches( 136 | [samples], steps=u.RNN.NUM_TIMES, batch_size=u.NUM_EXAMPLES) 137 | assert len(f()) == 1 138 | assert_shape(f()[0].shape, u.NUM_INPUTS) 139 | 140 | 141 | class TestText: 142 | TXT = 'hello world, how are you!' 143 | 144 | def test_min_count(self): 145 | txt = theanets.recurrent.Text(self.TXT, min_count=2, unknown='_') 146 | assert txt.text == 'hello worl__ how _re _o__' 147 | assert txt.alpha == ' ehlorw' 148 | 149 | txt = theanets.recurrent.Text(self.TXT, min_count=3, unknown='_') 150 | assert txt.text == '__llo _o_l__ _o_ ___ _o__' 151 | assert txt.alpha == ' lo' 152 | 153 | @pytest.fixture 154 | def txt(self): 155 | return theanets.recurrent.Text(self.TXT, alpha='helo wrd,!', unknown='_') 156 | 157 | def test_alpha(self, txt): 158 | assert txt.text == 'hello world, how _re _o_!' 159 | assert txt.alpha == 'helo wrd,!' 160 | 161 | def test_encode(self, txt): 162 | assert txt.encode('hello!') == [1, 2, 3, 3, 4, 10] 163 | assert txt.encode('you!') == [0, 4, 0, 10] 164 | 165 | def test_decode(self, txt): 166 | assert txt.decode([1, 2, 3, 3, 4, 10]) == 'hello!' 167 | assert txt.decode([0, 4, 0, 10]) == '_o_!' 168 | 169 | def test_classifier_batches(self, txt): 170 | b = txt.classifier_batches(steps=8, batch_size=5) 171 | assert len(b()) == 2 172 | assert b()[0].shape == (5, 8, 1 + len(txt.alpha)) 173 | assert b()[1].shape == (5, 8) 174 | assert not np.allclose(b()[0], b()[0]) 175 | -------------------------------------------------------------------------------- /test/regularizers_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import theanets 3 | 4 | import util as u 5 | 6 | 7 | @pytest.fixture 8 | def exp(): 9 | return theanets.Regressor([u.NUM_INPUTS, 20, u.NUM_OUTPUTS], rng=115) 10 | 11 | 12 | def assert_progress(net, data=u.REG_DATA, **kwargs): 13 | start = best = None 14 | for _, val in net.itertrain( 15 | data, 16 | algorithm='sgd', 17 | patience=2, 18 | min_improvement=0.01, 19 | max_gradient_norm=1, 20 | batch_size=u.NUM_EXAMPLES, 21 | **kwargs): 22 | if start is None: 23 | start = best = val['loss'] 24 | if val['loss'] < best: 25 | best = val['loss'] 26 | assert best < start # should have made progress! 27 | 28 | 29 | def test_build_dict(exp): 30 | regs = theanets.regularizers.from_kwargs( 31 | exp, regularizers=dict(input_noise=0.01)) 32 | assert len(regs) == 1 33 | 34 | 35 | def test_build_list(exp): 36 | reg = theanets.regularizers.Regularizer.build('weight_l2', 0.01) 37 | regs = theanets.regularizers.from_kwargs(exp, regularizers=[reg]) 38 | assert len(regs) == 1 39 | 40 | 41 | @pytest.mark.parametrize('key, value', [ 42 | ('input_noise', 0.1), 43 | ('input_dropout', 0.2), 44 | ('hidden_noise', 0.1), 45 | ('hidden_dropout', 0.2), 46 | ('noise', {'*:out': 0.1}), 47 | ('dropout', {'hid?:out': 0.2}), 48 | ('hidden_l1', 0.1), 49 | ('weight_l1', 0.1), 50 | ('weight_l2', 0.01), 51 | ('contractive', 0.01), 52 | ]) 53 | def test_sgd(key, value, exp): 54 | assert_progress(exp, **{key: value}) 55 | 56 | 57 | class TestRNN: 58 | @pytest.fixture 59 | def net(self): 60 | return theanets.recurrent.Regressor([ 61 | u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), u.NUM_HID2, u.NUM_OUTPUTS]) 62 | 63 | def test_recurrent_matching(self, net): 64 | regs = theanets.regularizers.from_kwargs(net) 65 | outputs, _ = net.build_graph(regs) 66 | matches = theanets.util.outputs_matching(outputs, 'hid1:out') 67 | assert [n for n, e in matches] == ['hid1:out'] 68 | 69 | @pytest.mark.parametrize('key, value', [ 70 | ('recurrent_norm', dict(pattern='hid1:out', weight=0.1)), 71 | ('recurrent_state', dict(pattern='hid1:out', weight=0.1)), 72 | ]) 73 | def test_progress(self, key, value, net): 74 | assert_progress(net, data=u.RNN.REG_DATA, **{key: value}) 75 | 76 | @pytest.mark.parametrize('key, value', [ 77 | ('recurrent_norm', 0.1), 78 | ('recurrent_state', 0.1), 79 | ]) 80 | def test_raises(self, key, value, net): 81 | with pytest.raises(theanets.util.ConfigurationError): 82 | assert_progress(net, **{key: value}) 83 | -------------------------------------------------------------------------------- /test/sparse_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import scipy.sparse 4 | import theanets 5 | 6 | import util as u 7 | 8 | CSR = dict(form='input', size=u.NUM_INPUTS, sparse='csr', name='in') 9 | CSC = dict(form='input', size=u.NUM_INPUTS, sparse='csc', name='in') 10 | REG_LAYERS = dict(csr=[CSR] + u.REG_LAYERS[1:], csc=[CSC] + u.REG_LAYERS[1:]) 11 | CLF_LAYERS = dict(csr=[CSR] + u.CLF_LAYERS[1:], csc=[CSC] + u.CLF_LAYERS[1:]) 12 | AE_LAYERS = dict(csr=[CSR] + u.AE_LAYERS[1:], csc=[CSC] + u.AE_LAYERS[1:]) 13 | 14 | CSR = scipy.sparse.csr_matrix(u.INPUTS) 15 | CSC = scipy.sparse.csc_matrix(u.INPUTS) 16 | INPUTS = dict(csr=CSR, csc=CSC) 17 | REG_DATA = dict(csr=[CSR] + u.REG_DATA[1:], csc=[CSC] + u.REG_DATA[1:]) 18 | WREG_DATA = dict(csr=[CSR] + u.WREG_DATA[1:], csc=[CSC] + u.WREG_DATA[1:]) 19 | CLF_DATA = dict(csr=[CSR] + u.CLF_DATA[1:], csc=[CSC] + u.CLF_DATA[1:]) 20 | WCLF_DATA = dict(csr=[CSR] + u.WCLF_DATA[1:], csc=[CSC] + u.WCLF_DATA[1:]) 21 | 22 | 23 | @pytest.mark.parametrize('Model, layers, sparse, weighted, data', [ 24 | (theanets.Regressor, REG_LAYERS, 'csr', True, WREG_DATA), 25 | (theanets.Classifier, CLF_LAYERS, 'csr', True, WCLF_DATA), 26 | (theanets.Regressor, REG_LAYERS, 'csc', True, WREG_DATA), 27 | (theanets.Classifier, CLF_LAYERS, 'csc', True, WCLF_DATA), 28 | (theanets.Regressor, REG_LAYERS, 'csr', False, REG_DATA), 29 | (theanets.Classifier, CLF_LAYERS, 'csr', False, CLF_DATA), 30 | (theanets.Regressor, REG_LAYERS, 'csc', False, REG_DATA), 31 | (theanets.Classifier, CLF_LAYERS, 'csc', False, CLF_DATA), 32 | ]) 33 | def test_sgd(Model, layers, sparse, weighted, data): 34 | u.assert_progress(Model(layers[sparse], weighted=weighted), data[sparse]) 35 | 36 | 37 | @pytest.mark.parametrize('Model, layers, output', [ 38 | (theanets.Regressor, u.REG_LAYERS, u.NUM_OUTPUTS), 39 | (theanets.Classifier, u.CLF_LAYERS, (u.NUM_EXAMPLES, )), 40 | (theanets.Autoencoder, u.AE_LAYERS, u.NUM_INPUTS), 41 | ]) 42 | def test_predict(Model, layers, output): 43 | u.assert_shape(Model(layers).predict(u.INPUTS).shape, output) 44 | 45 | 46 | @pytest.mark.parametrize('Model, layers, target, score', [ 47 | (theanets.Regressor, u.REG_LAYERS, u.OUTPUTS, -1.0473043918609619), 48 | (theanets.Classifier, u.CLF_LAYERS, u.CLASSES, 0.171875), 49 | (theanets.Autoencoder, u.AE_LAYERS, u.INPUTS, 15.108331680297852), 50 | ]) 51 | def test_score(Model, layers, target, score): 52 | assert Model(layers).score(u.INPUTS, target) == score 53 | 54 | 55 | @pytest.mark.parametrize('Model, layers, sparse, target', [ 56 | (theanets.Regressor, REG_LAYERS, 'csr', u.NUM_OUTPUTS), 57 | (theanets.Classifier, CLF_LAYERS, 'csr', u.NUM_CLASSES), 58 | (theanets.Autoencoder, AE_LAYERS, 'csr', u.NUM_INPUTS), 59 | (theanets.Regressor, REG_LAYERS, 'csc', u.NUM_OUTPUTS), 60 | (theanets.Classifier, CLF_LAYERS, 'csc', u.NUM_CLASSES), 61 | (theanets.Autoencoder, AE_LAYERS, 'csc', u.NUM_INPUTS), 62 | ]) 63 | def test_feed_forward(Model, layers, sparse, target): 64 | outs = Model(layers[sparse]).feed_forward(INPUTS[sparse]) 65 | assert len(list(outs)) == 7 66 | u.assert_shape(outs['in:out'].shape, u.NUM_INPUTS) 67 | u.assert_shape(outs['hid1:out'].shape, u.NUM_HID1) 68 | u.assert_shape(outs['hid2:out'].shape, u.NUM_HID2) 69 | u.assert_shape(outs['out:out'].shape, target) 70 | -------------------------------------------------------------------------------- /test/trainer_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import theanets 3 | 4 | import util as u 5 | 6 | 7 | @pytest.fixture 8 | def ae(): 9 | return theanets.Autoencoder([ 10 | u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, u.NUM_INPUTS]) 11 | 12 | 13 | def test_downhill(ae): 14 | # this really tests that interaction with downhill works. 15 | u.assert_progress(ae, u.AE_DATA) 16 | 17 | 18 | def test_layerwise(ae): 19 | u.assert_progress(ae, u.AE_DATA, algo='layerwise') 20 | 21 | 22 | def test_layerwise_tied(): 23 | ae = theanets.Autoencoder([ 24 | u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, 25 | (u.NUM_HID1, 'tied'), (u.NUM_INPUTS, 'tied')]) 26 | u.assert_progress(ae, u.AE_DATA, algo='layerwise') 27 | 28 | 29 | def test_sample(ae): 30 | trainer = ae.itertrain(u.AE_DATA, algo='sample') 31 | train0, valid0 = next(trainer) 32 | # for this trainer, we don't measure the loss. 33 | assert train0['loss'] == 0 == valid0['loss'] 34 | 35 | 36 | def test_unsupervised_pretrainer(): 37 | u.assert_progress( 38 | theanets.Experiment(theanets.Classifier, u.CLF_LAYERS), 39 | u.AE_DATA, algo='pretrain') 40 | -------------------------------------------------------------------------------- /test/util.py: -------------------------------------------------------------------------------- 1 | '''Helper code for theanets unit tests.''' 2 | 3 | import numpy as np 4 | 5 | np.random.seed(13) 6 | 7 | NUM_EXAMPLES = 64 8 | NUM_INPUTS = 7 9 | NUM_HID1 = 8 10 | NUM_HID2 = 12 11 | NUM_OUTPUTS = 5 12 | NUM_CLASSES = 6 13 | 14 | INPUTS = np.random.randn(NUM_EXAMPLES, NUM_INPUTS).astype('f') 15 | INPUT_WEIGHTS = abs(np.random.randn(NUM_EXAMPLES, NUM_INPUTS)).astype('f') 16 | OUTPUTS = np.random.randn(NUM_EXAMPLES, NUM_OUTPUTS).astype('f') 17 | OUTPUT_WEIGHTS = abs(np.random.randn(NUM_EXAMPLES, NUM_OUTPUTS)).astype('f') 18 | CLASSES = np.random.randint(NUM_CLASSES, size=NUM_EXAMPLES).astype('i') 19 | CLASS_WEIGHTS = abs(np.random.rand(NUM_EXAMPLES)).astype('f') 20 | 21 | AE_DATA = [INPUTS] 22 | WAE_DATA = [INPUTS, INPUT_WEIGHTS] 23 | AE_LAYERS = [NUM_INPUTS, NUM_HID1, NUM_HID2, NUM_INPUTS] 24 | 25 | CLF_DATA = [INPUTS, CLASSES] 26 | WCLF_DATA = [INPUTS, CLASSES, CLASS_WEIGHTS] 27 | CLF_LAYERS = [NUM_INPUTS, NUM_HID1, NUM_HID2, NUM_CLASSES] 28 | 29 | REG_DATA = [INPUTS, OUTPUTS] 30 | WREG_DATA = [INPUTS, OUTPUTS, OUTPUT_WEIGHTS] 31 | REG_LAYERS = [NUM_INPUTS, NUM_HID1, NUM_HID2, NUM_OUTPUTS] 32 | 33 | 34 | class RNN: 35 | NUM_TIMES = 31 36 | 37 | INPUTS = np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_INPUTS).astype('f') 38 | INPUT_WEIGHTS = abs( 39 | np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_INPUTS)).astype('f') 40 | OUTPUTS = np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_OUTPUTS).astype('f') 41 | OUTPUT_WEIGHTS = abs( 42 | np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_OUTPUTS)).astype('f') 43 | CLASSES = np.random.randn(NUM_EXAMPLES, NUM_TIMES).astype('i') 44 | CLASS_WEIGHTS = abs(np.random.rand(NUM_EXAMPLES, NUM_TIMES)).astype('f') 45 | 46 | AE_DATA = [INPUTS] 47 | WAE_DATA = [INPUTS, INPUT_WEIGHTS] 48 | 49 | CLF_DATA = [INPUTS, CLASSES] 50 | WCLF_DATA = [INPUTS, CLASSES, CLASS_WEIGHTS] 51 | 52 | REG_DATA = [INPUTS, OUTPUTS] 53 | WREG_DATA = [INPUTS, OUTPUTS, OUTPUT_WEIGHTS] 54 | 55 | 56 | class CNN: 57 | NUM_WIDTH = 13 58 | NUM_HEIGHT = 15 59 | 60 | FILTER_WIDTH = 4 61 | FILTER_HEIGHT = 3 62 | FILTER_SIZE = (FILTER_WIDTH, FILTER_HEIGHT) 63 | 64 | INPUTS = np.random.randn( 65 | NUM_EXAMPLES, NUM_WIDTH, NUM_HEIGHT, NUM_INPUTS).astype('f') 66 | 67 | CLF_DATA = [INPUTS, CLASSES] 68 | WCLF_DATA = [INPUTS, CLASSES, CLASS_WEIGHTS] 69 | 70 | REG_DATA = [INPUTS, OUTPUTS] 71 | WREG_DATA = [INPUTS, OUTPUTS, OUTPUT_WEIGHTS] 72 | 73 | 74 | def assert_progress(model, data, algo='sgd'): 75 | trainer = model.itertrain( 76 | data, algo=algo, momentum=0.5, batch_size=3, max_gradient_norm=1) 77 | train0, valid0 = next(trainer) 78 | train1, valid1 = next(trainer) 79 | assert train1['loss'] < valid0['loss'] # should have made progress! 80 | assert valid1['loss'] == valid0['loss'] # no new validation occurred 81 | 82 | 83 | def assert_shape(actual, expected): 84 | if not isinstance(expected, tuple): 85 | expected = (NUM_EXAMPLES, expected) 86 | assert actual == expected 87 | -------------------------------------------------------------------------------- /test/util_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theanets 3 | import theanets.util 4 | 5 | 6 | class TestRandomMatrix: 7 | def test_sparsity(self): 8 | x = theanets.util.random_matrix(1000, 200, sparsity=0.1, rng=5) 9 | assert x.shape == (1000, 200) 10 | assert np.allclose(x.mean(), 0, atol=1e-2), x.mean() 11 | assert np.allclose(x.std(), 0.95, atol=1e-2), x.std() 12 | assert np.allclose((x == 0).mean(), 0.1, atol=1e-1), (x == 0).mean() 13 | 14 | def test_diagonal(self): 15 | x = theanets.util.random_matrix(1000, 200, diagonal=0.9, rng=4) 16 | assert x.shape == (1000, 200) 17 | assert np.allclose(np.diag(x), 0.9), np.diag(x) 18 | assert x.sum() == 180, x.sum() 19 | 20 | def test_radius(self): 21 | x = theanets.util.random_matrix(1000, 200, radius=2, rng=4) 22 | assert x.shape == (1000, 200) 23 | u, s, vT = np.linalg.svd(x) 24 | assert s[0] == 2, s 25 | assert s[1] < 2 26 | 27 | 28 | class TestRandomVector: 29 | def test_rng(self): 30 | x = theanets.util.random_vector(10000, rng=4) 31 | assert x.shape == (10000, ) 32 | assert np.allclose(x.mean(), 0, atol=1e-2) 33 | assert np.allclose(x.std(), 1, atol=1e-2) 34 | 35 | 36 | class TestMatching: 37 | def test_params_matching(self): 38 | net = theanets.Autoencoder([10, 20, 30, 10]) 39 | 40 | match = sorted(theanets.util.params_matching(net.layers, '*')) 41 | assert len(match) == 6 42 | assert [n for n, _ in match] == [ 43 | 'hid1.b', 'hid1.w', 'hid2.b', 'hid2.w', 'out.b', 'out.w'] 44 | 45 | match = sorted(theanets.util.params_matching(net.layers, '*.w')) 46 | assert len(match) == 3 47 | assert [n for n, _ in match] == ['hid1.w', 'hid2.w', 'out.w'] 48 | 49 | match = sorted(theanets.util.params_matching(net.layers, 'o*.?')) 50 | assert len(match) == 2 51 | assert [n for n, _ in match] == ['out.b', 'out.w'] 52 | 53 | def test_outputs_matching(self): 54 | outputs, _ = theanets.Autoencoder([10, 20, 30, 10]).build_graph() 55 | 56 | match = sorted(theanets.util.outputs_matching(outputs, '*')) 57 | assert len(match) == 7 58 | assert [n for n, _ in match] == [ 59 | 'hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre', 60 | 'in:out', 'out:out', 'out:pre'] 61 | 62 | match = sorted(theanets.util.outputs_matching(outputs, 'hid?:*')) 63 | assert len(match) == 4 64 | assert [n for n, _ in match] == [ 65 | 'hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre'] 66 | 67 | match = sorted(theanets.util.outputs_matching(outputs, '*:pre')) 68 | assert len(match) == 3 69 | assert [n for n, _ in match] == ['hid1:pre', 'hid2:pre', 'out:pre'] 70 | -------------------------------------------------------------------------------- /theanets/__init__.py: -------------------------------------------------------------------------------- 1 | '''This package groups together a bunch of Theano code for neural nets.''' 2 | 3 | from .activations import Activation 4 | from .feedforward import Autoencoder, Regressor, Classifier 5 | from .graph import Network 6 | from .layers import Layer 7 | from .losses import Loss 8 | from .main import Experiment 9 | from .regularizers import Regularizer 10 | from .util import log 11 | 12 | from . import convolution 13 | from . import recurrent 14 | from . import regularizers 15 | 16 | __version__ = '0.8.0pre' 17 | -------------------------------------------------------------------------------- /theanets/activations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | r'''Activation functions for network layers. 4 | 5 | Activation functions are normally constructed using the :func:`build` function. 6 | Commonly available functions are: 7 | 8 | - "linear" 9 | - "logistic" (or "sigmoid") 10 | - "tanh" 11 | - "softmax" (typically used for :class:`classifier ` 12 | output layers) 13 | - "relu" (or "rect:max") 14 | - "rect:min" 15 | - "rect:minmax" 16 | - "softplus" (continuous approximation of "relu") 17 | - "norm:mean": subtractive (mean) batch normalization 18 | - "norm:max": divisive (max) batch normalization 19 | - "norm:std": divisive (standard deviation) batch normalization 20 | - "norm:z": z-score batch normalization 21 | 22 | Additionally, the names of all classes defined in this module can be used as 23 | keys when building an activation function. 24 | ''' 25 | 26 | import functools 27 | import numpy as np 28 | import theano 29 | import theano.tensor as TT 30 | 31 | from . import util 32 | 33 | 34 | def _identity(x): return x 35 | 36 | 37 | def _relu(x): return (x + abs(x)) / 2 38 | 39 | 40 | def _trel(x): return (x + 1 - abs(x - 1)) / 2 41 | 42 | 43 | def _rect(x): return (abs(x) + 1 - abs(x - 1)) / 2 44 | 45 | 46 | def _norm_mean(x): return x - x.mean(axis=-1, keepdims=True) 47 | 48 | 49 | def _norm_max(x): return x / (abs(x).max(axis=-1, keepdims=True) + 1e-8) 50 | 51 | 52 | def _norm_std(x): return x / (x.std(axis=-1, keepdims=True) + 1e-8) 53 | 54 | 55 | def _norm_z(x): return ((x - x.mean(axis=-1, keepdims=True)) / 56 | (x.std(axis=-1, keepdims=True) + 1e-8)) 57 | 58 | 59 | def _softmax(x): 60 | z = TT.exp(x - x.max(axis=-1, keepdims=True)) 61 | return z / z.sum(axis=-1, keepdims=True) 62 | 63 | 64 | COMMON = { 65 | # s-shaped 66 | 'tanh': TT.tanh, 67 | 'logistic': TT.nnet.sigmoid, 68 | 'sigmoid': TT.nnet.sigmoid, 69 | 70 | # softmax (typically for classification) 71 | 'softmax': _softmax, 72 | 73 | # linear variants 74 | 'linear': _identity, 75 | 'softplus': TT.nnet.softplus, 76 | 'relu': _relu, 77 | 'rect:max': _relu, 78 | 'rect:min': _trel, 79 | 'rect:minmax': _rect, 80 | 81 | # batch normalization 82 | 'norm:mean': _norm_mean, 83 | 'norm:max': _norm_max, 84 | 'norm:std': _norm_std, 85 | 'norm:z': _norm_z, 86 | } 87 | 88 | 89 | def build(name, layer, **kwargs): 90 | '''Construct an activation function by name. 91 | 92 | Parameters 93 | ---------- 94 | name : str or :class:`Activation` 95 | The name of the type of activation function to build, or an 96 | already-created instance of an activation function. 97 | layer : :class:`theanets.layers.Layer` 98 | The layer to which this activation will be applied. 99 | kwargs : dict 100 | Additional named arguments to pass to the activation constructor. 101 | 102 | Returns 103 | ------- 104 | activation : :class:`Activation` 105 | A neural network activation function instance. 106 | ''' 107 | if isinstance(name, Activation): 108 | return name 109 | 110 | if '+' in name: 111 | return functools.reduce( 112 | Compose, (build(n, layer, **kwargs) for n in name.split('+'))) 113 | 114 | act = COMMON.get(name) 115 | if act is not None: 116 | act.name = name 117 | act.params = [] 118 | return act 119 | 120 | if name.lower().startswith('maxout') and ':' in name: 121 | name, pieces = name.split(':', 1) 122 | kwargs['pieces'] = int(pieces) 123 | kwargs['name'] = name 124 | kwargs['layer'] = layer 125 | return Activation.build(name, **kwargs) 126 | 127 | 128 | class Activation(util.Registrar(str('Base'), (), {})): 129 | '''An activation function for a neural network layer. 130 | 131 | Parameters 132 | ---------- 133 | name : str 134 | Name of this activation function. 135 | layer : :class:`Layer` 136 | The layer to which this function is applied. 137 | 138 | Attributes 139 | ---------- 140 | name : str 141 | Name of this activation function. 142 | layer : :class:`Layer` 143 | The layer to which this function is applied. 144 | ''' 145 | 146 | def __init__(self, name, layer, **kwargs): 147 | self.name = name 148 | self.layer = layer 149 | self.kwargs = kwargs 150 | self.params = [] 151 | 152 | def __call__(self, x): 153 | '''Compute a symbolic expression for this activation function. 154 | 155 | Parameters 156 | ---------- 157 | x : Theano expression 158 | A Theano expression representing the input to this activation 159 | function. 160 | 161 | Returns 162 | ------- 163 | y : Theano expression 164 | A Theano expression representing the output from this activation 165 | function. 166 | ''' 167 | raise NotImplementedError 168 | 169 | 170 | class Compose(Activation): 171 | r'''Compose two activation functions.''' 172 | 173 | def __init__(self, f, g): 174 | self.f = f 175 | self.g = g 176 | self.name = '{}({})'.format(g.name, f.name) 177 | self.layer = None 178 | self.kwargs = {} 179 | self.params = getattr(g, 'params', []) + getattr(f, 'params', []) 180 | 181 | def __call__(self, x): 182 | return self.g(self.f(x)) 183 | 184 | 185 | class Prelu(Activation): 186 | r'''Parametric rectified linear activation with learnable leak rate. 187 | 188 | This activation is characterized by two linear pieces joined at the origin. 189 | For negative inputs, the unit response is a linear function of the input 190 | with slope :math:`r` (the "leak rate"). For positive inputs, the unit 191 | response is the identity function: 192 | 193 | .. math:: 194 | f(x) = \left\{ \begin{eqnarray*} rx &\qquad& \mbox{if } x < 0 \\ 195 | x &\qquad& \mbox{otherwise} \end{eqnarray*} \right. 196 | 197 | This activation allocates a separate leak rate for each unit in its layer. 198 | 199 | References 200 | ---------- 201 | K He, X Zhang, S Ren, J Sun (2015), "Delving Deep into Rectifiers: 202 | Surpassing Human-Level Performance on ImageNet Classification" 203 | http://arxiv.org/abs/1502.01852 204 | ''' 205 | 206 | __extra_registration_keys__ = ['leaky-relu'] 207 | 208 | def __init__(self, *args, **kwargs): 209 | super(Prelu, self).__init__(*args, **kwargs) 210 | arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT) 211 | self.leak = theano.shared(0.1 * abs(arr), name=self.layer._fmt('leak')) 212 | self.params.append(self.leak) 213 | 214 | def __call__(self, x): 215 | return (x + abs(x)) / 2 + TT.exp(self.leak) * (x - abs(x)) / 2 216 | 217 | 218 | class LGrelu(Activation): 219 | r'''Rectified linear activation with learnable leak rate and gain. 220 | 221 | This activation is characterized by two linear pieces joined at the origin. 222 | For negative inputs, the unit response is a linear function of the input 223 | with slope :math:`r` (the "leak rate"). For positive inputs, the unit 224 | response is a different linear function of the input with slope :math:`g` 225 | (the "gain"): 226 | 227 | .. math:: 228 | f(x) = \left\{ \begin{eqnarray*} rx &\qquad& \mbox{if } x < 0 \\ 229 | gx &\qquad& \mbox{otherwise} \end{eqnarray*} \right. 230 | 231 | This activation allocates a separate leak and gain rate for each unit in its 232 | layer. 233 | ''' 234 | 235 | __extra_registration_keys__ = ['leaky-gain-relu'] 236 | 237 | def __init__(self, *args, **kwargs): 238 | super(LGrelu, self).__init__(*args, **kwargs) 239 | arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT) 240 | self.gain = theano.shared(0.1 * abs(arr), name=self.layer._fmt('gain')) 241 | self.params.append(self.gain) 242 | arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT) 243 | self.leak = theano.shared(0.1 * abs(arr), name=self.layer._fmt('leak')) 244 | self.params.append(self.leak) 245 | 246 | def __call__(self, x): 247 | return TT.exp(self.gain) * (x + abs(x)) / 2 + TT.exp(self.leak) * (x - abs(x)) / 2 248 | 249 | 250 | class Elu(Activation): 251 | r'''Exponential linear activation with learnable gain. 252 | 253 | This activation is characterized by two pieces joined at the origin. For 254 | negative inputs, the unit response is a decaying exponential function of the 255 | input with saturation :math:`\alpha`. For positive inputs, the unit response 256 | is the identity linear function of the input: 257 | 258 | .. math:: 259 | f(x) = \left\{ \begin{eqnarray*} \alpha (exp(x) - 1) &\qquad& \mbox{if } x < 0 \\ 260 | x &\qquad& \mbox{otherwise} \end{eqnarray*} \right. 261 | 262 | This activation allocates a separate gain for each unit in its layer. 263 | ''' 264 | 265 | __extra_registration_keys__ = [] 266 | 267 | def __init__(self, *args, **kwargs): 268 | super(Elu, self).__init__(*args, **kwargs) 269 | arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT) 270 | self.gain = theano.shared(0.1 * abs(arr), name=self.layer._fmt('gain')) 271 | self.params.append(self.gain) 272 | 273 | def __call__(self, x): 274 | return x * (x >= 0) + TT.exp(self.gain) * (TT.exp(x) - 1) * (x < 0) 275 | 276 | 277 | class Maxout(Activation): 278 | r'''Arbitrary piecewise linear activation. 279 | 280 | This activation is unusual in that it requires a parameter at initialization 281 | time: the number of linear pieces to use. Consider a layer for the moment 282 | with just one unit. A maxout activation with :math:`k` pieces uses a slope 283 | :math:`m_k` and an intercept :math:`b_k` for each linear piece. It then 284 | transforms the input to the maximum of all of the pieces: 285 | 286 | .. math:: 287 | f(x) = \max_k m_k x + b_k 288 | 289 | The parameters :math:`m_k` and :math:`b_k` are learnable. 290 | 291 | For layers with more than one unit, the maxout activation allocates a slope 292 | :math:`m_{ki}` and intercept :math:`b_{ki}` for each unit :math:`i` and each 293 | piece :math:`k`. The activation for unit :math:`x_i` is: 294 | 295 | .. math:: 296 | f(x_i) = \max_k m_{ki} x_i + b_{ki} 297 | 298 | Again, the slope and intercept parameters are learnable. 299 | 300 | This activation is actually a generalization of the rectified linear 301 | activations; to see how, just allocate 2 pieces and set the intercepts to 0. 302 | The slopes of the ``relu`` activation are given by :math:`m = (0, 1)`, those 303 | of the :class:`Prelu` function are given by :math:`m = (r, 1)`, and those of 304 | the :class:`LGrelu` are given by :math:`m = (r, g)` where :math:`r` is the 305 | leak rate parameter and :math:`g` is a gain parameter. 306 | 307 | .. note:: 308 | 309 | To use this activation in a network layer specification, provide an 310 | activation string of the form ``'maxout:k'``, where ``k`` is an integer 311 | giving the number of piecewise functions. 312 | 313 | For example, the layer tuple ``(100, 'rnn', 'maxout:10')`` specifies a 314 | vanilla :class:`RNN ` layer with 100 units 315 | and a maxout activation with 10 pieces. 316 | 317 | Parameters 318 | ---------- 319 | pieces : int 320 | Number of linear pieces to use in the activation. 321 | ''' 322 | 323 | def __init__(self, *args, **kwargs): 324 | super(Maxout, self).__init__(*args, **kwargs) 325 | 326 | self.pieces = kwargs['pieces'] 327 | 328 | m = self.layer.rng.randn(self.layer.output_size, self.pieces).astype(util.FLOAT) 329 | self.slope = theano.shared(m, name=self.layer._fmt('slope')) 330 | self.params.append(self.slope) 331 | 332 | b = self.layer.rng.randn(self.layer.output_size, self.pieces).astype(util.FLOAT) 333 | self.intercept = theano.shared(b, name=self.layer._fmt('intercept')) 334 | self.params.append(self.intercept) 335 | 336 | def __call__(self, x): 337 | dims = list(range(x.ndim)) + ['x'] 338 | return (x.dimshuffle(*dims) * self.slope + self.intercept).max(axis=-1) 339 | -------------------------------------------------------------------------------- /theanets/convolution.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | '''This module contains convolution network structures.''' 4 | 5 | from . import feedforward 6 | 7 | 8 | class Regressor(feedforward.Regressor): 9 | '''A regressor attempts to produce a target output.''' 10 | 11 | INPUT_NDIM = 4 12 | '''Number of dimensions for holding input data arrays.''' 13 | 14 | 15 | class Classifier(feedforward.Classifier): 16 | '''A classifier attempts to match a 1-hot target output.''' 17 | 18 | INPUT_NDIM = 4 19 | '''Number of dimensions for holding input data arrays.''' 20 | -------------------------------------------------------------------------------- /theanets/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | from .feedforward import * 3 | from .convolution import * 4 | from .recurrent import * 5 | -------------------------------------------------------------------------------- /theanets/layers/convolution.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | '''Convolutional layers "scan" over input data.''' 4 | 5 | from __future__ import division 6 | 7 | import numpy as np 8 | import theano 9 | import theano.tensor as TT 10 | 11 | from . import base 12 | from .. import util 13 | 14 | __all__ = [ 15 | 'Conv1', 16 | 'Conv2', 17 | 'Pool1', 18 | 'Pool2', 19 | ] 20 | 21 | 22 | class Convolution(base.Layer): 23 | '''Convolution layers convolve filters over the input arrays. 24 | 25 | Parameters 26 | ---------- 27 | filter_size : (int, int) 28 | Size of the convolution filters for this layer. 29 | stride : (int, int), optional 30 | Apply convolutions with this stride; i.e., skip this many samples 31 | between convolutions. Defaults to (1, 1)---that is, no skipping. 32 | border_mode : str, optional 33 | Compute convolutions with this border mode. Defaults to 'valid'. 34 | ''' 35 | 36 | def __init__(self, filter_size, stride=(1, 1), border_mode='valid', **kwargs): 37 | self.filter_size = filter_size 38 | self.stride = stride 39 | self.border_mode = border_mode 40 | super(Convolution, self).__init__(**kwargs) 41 | 42 | def log(self): 43 | inputs = ', '.join('"{0}" {1}'.format(*ns) for ns in self._input_shapes.items()) 44 | util.log('layer {0.__class__.__name__} "{0.name}" ' 45 | '{0.output_shape} {1} {0.border_mode} ' 46 | 'filters {2}{3} from {4}', self, 47 | getattr(self.activate, 'name', self.activate), 48 | 'x'.join(str(i) for i in self.filter_size), 49 | ''.join('+{}'.format(i) for i in self.stride), 50 | inputs) 51 | util.log('learnable parameters: {}', self.log_params()) 52 | 53 | def add_conv_weights(self, name, mean=0, std=None, sparsity=0): 54 | '''Add a convolutional weight array to this layer's parameters. 55 | 56 | Parameters 57 | ---------- 58 | name : str 59 | Name of the parameter to add. 60 | mean : float, optional 61 | Mean value for randomly-initialized weights. Defaults to 0. 62 | std : float, optional 63 | Standard deviation of initial matrix values. Defaults to 64 | :math:`1 / sqrt(n_i + n_o)`. 65 | sparsity : float, optional 66 | Fraction of weights to set to zero. Defaults to 0. 67 | ''' 68 | nin = self.input_size 69 | nout = self.output_size 70 | mean = self.kwargs.get( 71 | 'mean_{}'.format(name), 72 | self.kwargs.get('mean', mean)) 73 | std = self.kwargs.get( 74 | 'std_{}'.format(name), 75 | self.kwargs.get('std', std or 1 / np.sqrt(nin + nout))) 76 | sparsity = self.kwargs.get( 77 | 'sparsity_{}'.format(name), 78 | self.kwargs.get('sparsity', sparsity)) 79 | arr = np.zeros((nout, nin) + self.filter_size, util.FLOAT) 80 | for r in range(self.filter_size[0]): 81 | for c in range(self.filter_size[1]): 82 | arr[:, :, r, c] = util.random_matrix( 83 | nout, nin, mean, std, sparsity=sparsity, rng=self.rng) 84 | self._params.append(theano.shared(arr, name=self._fmt(name))) 85 | 86 | 87 | class Conv1(Convolution): 88 | '''1-dimensional convolutions run over one data axis. 89 | 90 | Notes 91 | ----- 92 | 93 | One-dimensional convolution layers are typically used in ``theanets`` models 94 | that use recurrent inputs and outputs, i.e., 95 | :class:`theanets.recurrent.Autoencoder`, 96 | :class:`theanets.recurrent.Predictor`, 97 | :class:`theanets.recurrent.Classifier`, or 98 | :class:`theanets.recurrent.Regressor`. 99 | 100 | The convolution will be applied over the "time" dimension (axis 1). 101 | 102 | Parameters 103 | ---------- 104 | filter_size : int 105 | Length of the convolution filters for this layer. 106 | stride : int, optional 107 | Apply convolutions with this stride; i.e., skip this many samples 108 | between convolutions. Defaults to 1, i.e., no skipping. 109 | border_mode : str, optional 110 | Compute convolutions with this border mode. Defaults to 'valid'. 111 | ''' 112 | 113 | def __init__(self, filter_size, stride=1, border_mode='valid', **kwargs): 114 | super(Conv1, self).__init__( 115 | filter_size=(1, filter_size), 116 | stride=(1, stride), 117 | border_mode=border_mode, 118 | **kwargs) 119 | 120 | def setup(self): 121 | self.add_conv_weights('w') 122 | self.add_bias('b', self.output_size) 123 | 124 | def resolve_outputs(self): 125 | if self.input_shape is None or self.input_shape[0] is None: 126 | return super(Conv1, self).resolve_outputs() 127 | image = np.array(self.input_shape[:-1]) 128 | kernel = np.array(self.filter_size) 129 | result = image 130 | if self.border_mode == 'full': 131 | result = image + kernel - 1 132 | if self.border_mode == 'valid': 133 | result = image - kernel + 1 134 | self._output_shapes['out'] = tuple(result) + (self.kwargs['size'], ) 135 | 136 | def transform(self, inputs): 137 | # input is: (batch, time, input) 138 | # conv2d wants: (batch, input, 1, time) 139 | x = inputs[self.input_name].dimshuffle(0, 2, 'x', 1) 140 | 141 | pre = TT.nnet.conv2d( 142 | x, 143 | self.find('w'), 144 | image_shape=(None, self.input_size, 1, None), 145 | filter_shape=(self.output_size, self.input_size) + self.filter_size, 146 | border_mode=self.border_mode, 147 | subsample=self.stride, 148 | ).dimshuffle(0, 3, 1, 2)[:, :, :, 0] + self.find('b') 149 | # conv2d output is: (batch, output, 1, time) 150 | # we want: (batch, time, output) 151 | # (have to do [:, :, :, 0] to remove unused trailing dimension) 152 | 153 | return dict(pre=pre, out=self.activate(pre)), [] 154 | 155 | 156 | class Conv2(Convolution): 157 | '''2-dimensional convolutions run over two data axes. 158 | 159 | Two-dimensional convolution layers are standard image processing techniques. 160 | In theanets, these layers expect an input consisting of (num-examples, 161 | width, height, num-channels). 162 | 163 | Parameters 164 | ---------- 165 | filter_size : (int, int) 166 | Size of the convolution filters for this layer. 167 | stride : (int, int), optional 168 | Apply convolutions with this stride; i.e., skip this many samples 169 | between convolutions. Defaults to (1, 1), i.e., no skipping. 170 | border_mode : str, optional 171 | Compute convolutions with this border mode. Defaults to 'valid'. 172 | ''' 173 | 174 | def setup(self): 175 | self.add_conv_weights('w') 176 | self.add_bias('b', self.output_size) 177 | 178 | def resolve_outputs(self): 179 | shape = self.input_shape 180 | if shape is None or shape[0] is None or shape[1] is None: 181 | return super(Conv2, self).resolve_outputs() 182 | image = np.array(shape[:-1]) 183 | kernel = np.array(self.filter_size) 184 | result = image 185 | if self.border_mode == 'full': 186 | result = image + kernel - 1 187 | if self.border_mode == 'valid': 188 | result = image - kernel + 1 189 | self._output_shapes['out'] = tuple(result) + (self.kwargs['size'], ) 190 | 191 | def transform(self, inputs): 192 | # input is: (batch, width, height, input) 193 | # conv2d wants: (batch, input, width, height) 194 | x = inputs[self.input_name].dimshuffle(0, 3, 1, 2) 195 | 196 | pre = TT.nnet.conv2d( 197 | x, 198 | self.find('w'), 199 | image_shape=(None, self.input_size, None, None), 200 | filter_shape=(self.output_size, self.input_size) + self.filter_size, 201 | border_mode=self.border_mode, 202 | subsample=self.stride, 203 | ).dimshuffle(0, 2, 3, 1) + self.find('b') 204 | # conv2d output is: (batch, output, width, height) 205 | # we want: (batch, width, height, output) 206 | 207 | return dict(pre=pre, out=self.activate(pre)), [] 208 | 209 | 210 | class Pooling(base.Layer): 211 | ''' 212 | ''' 213 | 214 | 215 | class Pool1(Pooling): 216 | ''' 217 | ''' 218 | 219 | def transform(self, inputs): 220 | # input is: (batch, time, input) 221 | # conv2d wants: (batch, input, time, 1) 222 | x = inputs[self.input_name].dimshuffle(0, 2, 1, 'x') 223 | 224 | pre = TT.signal.downsample.max_pool_2d( 225 | x, self.pool_size, st=self.stride, mode=self.mode, 226 | ).dimshuffle(0, 2, 1, 3)[:, :, :, 0] 227 | # conv2d output is: (batch, output, time, 1) 228 | # we want: (batch, time, output) 229 | 230 | return dict(pre=pre, out=self.activate(pre)), [] 231 | 232 | 233 | class Pool2(Pooling): 234 | ''' 235 | ''' 236 | 237 | def transform(self, inputs): 238 | # input is: (batch, width, height, input) 239 | # conv2d wants: (batch, input, width, height) 240 | x = inputs[self.input_name].dimshuffle(0, 3, 1, 2) 241 | 242 | pre = TT.signal.downsample.max_pool_2d( 243 | x, self.pool_size, st=self.stride, mode=self.mode, 244 | ).dimshuffle(0, 2, 3, 1) 245 | # conv2d output is: (batch, output, width, height) 246 | # we want: (batch, width, height, output) 247 | 248 | return dict(pre=pre, out=self.activate(pre)), [] 249 | -------------------------------------------------------------------------------- /theanets/layers/feedforward.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | r'''Feedforward layers for neural network computation graphs.''' 4 | 5 | from __future__ import division 6 | 7 | import numpy as np 8 | import theano.sparse as SS 9 | import theano.tensor as TT 10 | 11 | from . import base 12 | from .. import util 13 | 14 | __all__ = [ 15 | 'Classifier', 16 | 'Feedforward', 17 | 'Tied', 18 | ] 19 | 20 | 21 | class Feedforward(base.Layer): 22 | '''A feedforward neural network layer performs a transform of its input. 23 | 24 | More precisely, feedforward layers as implemented here perform an affine 25 | transformation of their input, followed by a potentially nonlinear 26 | :ref:`activation function ` performed elementwise on the 27 | transformed input. 28 | 29 | Feedforward layers are the fundamental building block on which most neural 30 | network models are built. 31 | 32 | Notes 33 | ----- 34 | 35 | This layer can be constructed using the forms ``'feedforward'`` or ``'ff'``. 36 | 37 | *Parameters* 38 | 39 | - With one input: 40 | 41 | - ``b`` --- bias 42 | - ``w`` --- weights 43 | 44 | - With :math:`N>1` inputs: 45 | 46 | - ``b`` --- bias 47 | - ``w_1`` --- weight for input 1 48 | - ``w_2`` ... 49 | - ``w_N`` --- weight for input :math:`N` 50 | 51 | *Outputs* 52 | 53 | - ``out`` --- the post-activation state of the layer 54 | - ``pre`` --- the pre-activation state of the layer 55 | ''' 56 | 57 | __extra_registration_keys__ = ['ff'] 58 | 59 | def _weight_for_input(self, name): 60 | return 'w' if len(self._input_shapes) == 1 else 'w_{}'.format(name) 61 | 62 | def transform(self, inputs): 63 | def _dot(x, y): 64 | if isinstance(x, SS.SparseVariable): 65 | return SS.structured_dot(x, y) 66 | else: 67 | return TT.dot(x, y) 68 | 69 | xws = ((inputs[name], self.find(self._weight_for_input(name))) 70 | for name in self._input_shapes) 71 | pre = sum(_dot(x, w) for x, w in xws) + self.find('b') 72 | return dict(pre=pre, out=self.activate(pre)), [] 73 | 74 | def setup(self): 75 | for name, shape in self._input_shapes.items(): 76 | label = self._weight_for_input(name) 77 | self.add_weights(label, shape[-1], self.output_size) 78 | self.add_bias('b', self.output_size) 79 | 80 | 81 | class Classifier(Feedforward): 82 | '''A classifier layer performs a softmax over a linear input transform. 83 | 84 | Classifier layers are typically the "output" layer of a classifier network. 85 | 86 | This layer type really only wraps the output activation of a standard 87 | :class:`Feedforward` layer. 88 | 89 | Notes 90 | ----- 91 | 92 | The classifier layer is just a vanilla :class:`Feedforward` layer that uses 93 | a ``'softmax'`` output :ref:`activation `. 94 | ''' 95 | 96 | __extra_registration_keys__ = ['softmax'] 97 | 98 | def __init__(self, **kwargs): 99 | kwargs['activation'] = 'softmax' 100 | super(Classifier, self).__init__(**kwargs) 101 | 102 | 103 | class Tied(base.Layer): 104 | '''A tied-weights feedforward layer shadows weights from another layer. 105 | 106 | Notes 107 | ----- 108 | 109 | Tied weights are typically featured in some types of autoencoder models 110 | (e.g., PCA). A layer with tied weights requires a "partner" layer -- the 111 | tied layer borrows the weights from its partner and uses the transpose of 112 | them to perform its feedforward mapping. Thus, tied layers do not have their 113 | own weights. On the other hand, tied layers do have their own bias values, 114 | but these can be fixed to zero during learning to simulate networks with no 115 | bias (e.g., PCA on mean-centered data). 116 | 117 | *Parameters* 118 | 119 | - ``b`` --- bias 120 | 121 | *Outputs* 122 | 123 | - ``out`` --- the post-activation state of the layer 124 | - ``pre`` --- the pre-activation state of the layer 125 | 126 | Parameters 127 | ---------- 128 | partner : str or :class:`theanets.layers.base.Layer` 129 | The "partner" layer to which this layer is tied. 130 | 131 | Attributes 132 | ---------- 133 | partner : :class:`theanets.layers.base.Layer` 134 | The "partner" layer to which this layer is tied. 135 | ''' 136 | 137 | def __init__(self, partner, **kwargs): 138 | self.partner = partner 139 | kwargs['size'] = kwargs['shape'] = None 140 | if isinstance(partner, base.Layer): 141 | kwargs['shape'] = partner.input_shape 142 | super(Tied, self).__init__(**kwargs) 143 | 144 | def transform(self, inputs): 145 | x = inputs[self.input_name] 146 | pre = TT.dot(x, self.partner.find('w').T) + self.find('b') 147 | return dict(pre=pre, out=self.activate(pre)), [] 148 | 149 | def resolve_inputs(self, layers): 150 | super(Tied, self).resolve_inputs(layers) 151 | if isinstance(self.partner, util.basestring): 152 | # if the partner is named, just get that layer. 153 | matches = [l for l in layers if l.name == self.partner] 154 | if len(matches) != 1: 155 | raise util.ConfigurationError( 156 | 'tied layer "{}": cannot find partner "{}"' 157 | .format(self.name, self.partner)) 158 | self.partner = matches[0] 159 | 160 | def resolve_outputs(self): 161 | self._output_shapes['out'] = self.partner.input_shape 162 | 163 | def setup(self): 164 | # this layer does not create a weight matrix! 165 | self.add_bias('b', self.output_size) 166 | 167 | def log(self): 168 | inputs = ', '.join('"{0}" {1}'.format(*ns) for ns in self._input_shapes.items()) 169 | util.log('layer {0.__class__.__name__} "{0.name}" ' 170 | '(tied to "{0.partner.name}") {0.output_shape} {1} from {2}', 171 | self, getattr(self.activate, 'name', self.activate), inputs) 172 | util.log('learnable parameters: {}', self.log_params()) 173 | 174 | def to_spec(self): 175 | spec = super(Tied, self).to_spec() 176 | spec['partner'] = self.partner.name 177 | return spec 178 | -------------------------------------------------------------------------------- /theanets/main.py: -------------------------------------------------------------------------------- 1 | '''This module contains some glue code encapsulating a "main" process. 2 | 3 | The code here wraps the most common tasks involved in creating and, especially, 4 | training a neural network model. 5 | ''' 6 | 7 | import os 8 | 9 | from . import graph 10 | from . import util 11 | 12 | 13 | class Experiment: 14 | '''This class encapsulates tasks for training and evaluating a network. 15 | 16 | Parameters 17 | ---------- 18 | model : :class:`Network ` or str 19 | A specification for obtaining a model. If a string is given, it is 20 | assumed to name a file containing a pickled model; this file will be 21 | loaded and used. If a network instance is provided, it will be used 22 | as the model. If a callable (such as a subclass) is provided, it 23 | will be invoked using the provided keyword arguments to create a 24 | network instance. 25 | ''' 26 | 27 | def __init__(self, network, *args, **kwargs): 28 | if isinstance(network, util.basestring) and os.path.isfile(network): 29 | self.load(network) 30 | elif isinstance(network, graph.Network): 31 | self.network = network 32 | else: 33 | assert network is not graph.Network, \ 34 | 'use a concrete theanets.Network subclass ' \ 35 | 'like theanets.{Autoencoder,Regressor,...}' 36 | self.network = network(*args, **kwargs) 37 | 38 | def train(self, *args, **kwargs): 39 | '''Train the network until the trainer converges. 40 | 41 | All arguments are passed to :func:`train 42 | `. 43 | 44 | Returns 45 | ------- 46 | training : dict 47 | A dictionary of monitor values computed using the training dataset, 48 | at the conclusion of training. This dictionary will at least contain 49 | a 'loss' key that indicates the value of the loss function. Other 50 | keys may be available depending on the trainer being used. 51 | validation : dict 52 | A dictionary of monitor values computed using the validation 53 | dataset, at the conclusion of training. 54 | ''' 55 | return self.network.train(*args, **kwargs) 56 | 57 | def itertrain(self, *args, **kwargs): 58 | '''Train the network iteratively. 59 | 60 | All arguments are passed to :func:`itertrain 61 | `. 62 | 63 | Yields 64 | ------ 65 | training : dict 66 | A dictionary of monitor values computed using the training dataset, 67 | at the conclusion of training. This dictionary will at least contain 68 | a 'loss' key that indicates the value of the loss function. Other 69 | keys may be available depending on the trainer being used. 70 | validation : dict 71 | A dictionary of monitor values computed using the validation 72 | dataset, at the conclusion of training. 73 | ''' 74 | return self.network.itertrain(*args, **kwargs) 75 | 76 | def save(self, path): 77 | '''Save the current network to a pickle file on disk. 78 | 79 | Parameters 80 | ---------- 81 | path : str 82 | Location of the file to save the network. 83 | ''' 84 | self.network.save(path) 85 | 86 | def load(self, path): 87 | '''Load a saved network from a pickle file on disk. 88 | 89 | This method sets the ``network`` attribute of the experiment to the 90 | loaded network model. 91 | 92 | Parameters 93 | ---------- 94 | filename : str 95 | Load the keyword arguments and parameters of a network from a pickle 96 | file at the named path. If this name ends in ".gz" then the input 97 | will automatically be gunzipped; otherwise the input will be treated 98 | as a "raw" pickle. 99 | 100 | Returns 101 | ------- 102 | network : :class:`Network ` 103 | A newly-constructed network, with topology and parameters loaded 104 | from the given pickle file. 105 | ''' 106 | self.network = graph.Network.load(path) 107 | return self.network 108 | -------------------------------------------------------------------------------- /theanets/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | '''Utility functions and classes.''' 4 | 5 | import click 6 | import datetime 7 | import fnmatch 8 | import inspect 9 | import numpy as np 10 | import theano 11 | import theano.tensor as TT 12 | 13 | try: 14 | basestring = basestring 15 | except NameError: 16 | basestring = str 17 | 18 | FLOAT = theano.config.floatX 19 | 20 | FLOAT_CONTAINERS = (TT.scalar, TT.vector, TT.matrix, TT.tensor3, TT.tensor4) 21 | 22 | INT_CONTAINERS = (TT.iscalar, TT.ivector, TT.imatrix, TT.itensor3, TT.itensor4) 23 | 24 | 25 | class Error(Exception): 26 | pass 27 | 28 | 29 | class ConfigurationError(Error): 30 | pass 31 | 32 | 33 | class Registrar(type): 34 | '''A metaclass that builds a registry of its subclasses.''' 35 | 36 | def __init__(cls, name, bases, dct): 37 | if not hasattr(cls, '_registry'): 38 | cls._registry = {} 39 | else: 40 | cls._registry[name.lower()] = cls 41 | for name in getattr(cls, '__extra_registration_keys__', ()): 42 | cls._registry[name.lower()] = cls 43 | super(Registrar, cls).__init__(name, bases, dct) 44 | 45 | def build(cls, key, *args, **kwargs): 46 | return cls._registry[key.lower()](*args, **kwargs) 47 | 48 | def get_class(cls, key): 49 | return cls._registry[key.lower()] 50 | 51 | def is_registered(cls, key): 52 | return key.lower() in cls._registry 53 | 54 | 55 | def random_matrix(rows, cols, mean=0, std=1, sparsity=0, radius=0, diagonal=0, rng=None): 56 | '''Create a matrix of randomly-initialized weights. 57 | 58 | Parameters 59 | ---------- 60 | rows : int 61 | Number of rows of the weight matrix -- equivalently, the number of 62 | "input" units that the weight matrix connects. 63 | cols : int 64 | Number of columns of the weight matrix -- equivalently, the number 65 | of "output" units that the weight matrix connects. 66 | mean : float, optional 67 | Draw initial weight values from a normal with this mean. Defaults to 0. 68 | std : float, optional 69 | Draw initial weight values from a normal with this standard deviation. 70 | Defaults to 1. 71 | sparsity : float in (0, 1), optional 72 | If given, ensure that the given fraction of the weight matrix is 73 | set to zero. Defaults to 0, meaning all weights are nonzero. 74 | radius : float, optional 75 | If given, rescale the initial weights to have this spectral radius. 76 | No scaling is performed by default. 77 | diagonal : float, optional 78 | If nonzero, create a matrix containing all zeros except for this value 79 | along the diagonal. If nonzero, other arguments (except for rows and 80 | cols) will be ignored. 81 | rng : :class:`numpy.random.RandomState` or int, optional 82 | A random number generator, or an integer seed for a random number 83 | generator. If not provided, the random number generator will be created 84 | with an automatically chosen seed. 85 | 86 | Returns 87 | ------- 88 | matrix : numpy array 89 | An array containing random values. These often represent the weights 90 | connecting each "input" unit to each "output" unit in a layer. 91 | ''' 92 | if rng is None or isinstance(rng, int): 93 | rng = np.random.RandomState(rng) 94 | arr = mean + std * rng.randn(rows, cols) 95 | if 1 > sparsity > 0: 96 | k = min(rows, cols) 97 | mask = rng.binomial(n=1, p=1 - sparsity, size=(rows, cols)).astype(bool) 98 | mask[:k, :k] |= np.eye(k).astype(bool) 99 | arr *= mask 100 | if radius > 0: 101 | # rescale weights to have the appropriate spectral radius. 102 | u, s, vT = np.linalg.svd(arr, full_matrices=False) 103 | arr = np.dot(np.dot(u, np.diag(radius * s / abs(s[0]))), vT) 104 | if diagonal != 0: 105 | # generate a diagonal weight matrix. ignore other options. 106 | arr = diagonal * np.eye(max(rows, cols))[:rows, :cols] 107 | return arr.astype(FLOAT) 108 | 109 | 110 | def random_vector(size, mean=0, std=1, rng=None): 111 | '''Create a vector of randomly-initialized values. 112 | 113 | Parameters 114 | ---------- 115 | size : int 116 | Length of vecctor to create. 117 | mean : float, optional 118 | Mean value for initial vector values. Defaults to 0. 119 | std : float, optional 120 | Standard deviation for initial vector values. Defaults to 1. 121 | rng : :class:`numpy.random.RandomState` or int, optional 122 | A random number generator, or an integer seed for a random number 123 | generator. If not provided, the random number generator will be created 124 | with an automatically chosen seed. 125 | 126 | Returns 127 | ------- 128 | vector : numpy array 129 | An array containing random values. This often represents the bias for a 130 | layer of computation units. 131 | ''' 132 | if rng is None or isinstance(rng, int): 133 | rng = np.random.RandomState(rng) 134 | return (mean + std * rng.randn(size)).astype(FLOAT) 135 | 136 | 137 | def outputs_matching(outputs, patterns): 138 | '''Get the outputs from a network that match a pattern. 139 | 140 | Parameters 141 | ---------- 142 | outputs : dict or sequence of (str, theano expression) 143 | Output expressions to filter for matches. If this is a dictionary, its 144 | ``items()`` will be processed for matches. 145 | patterns : sequence of str 146 | A sequence of glob-style patterns to match against. Any parameter 147 | matching any pattern in this sequence will be included in the match. 148 | 149 | Yields 150 | ------ 151 | matches : pair of str, theano expression 152 | Generates a sequence of (name, expression) pairs. The name is the name 153 | of the output that matched, and the expression is the symbolic output in 154 | the network graph. 155 | ''' 156 | if isinstance(patterns, basestring): 157 | patterns = (patterns, ) 158 | if isinstance(outputs, dict): 159 | outputs = outputs.items() 160 | for name, expr in outputs: 161 | for pattern in patterns: 162 | if fnmatch.fnmatch(name, pattern): 163 | yield name, expr 164 | break 165 | 166 | 167 | def params_matching(layers, patterns): 168 | '''Get the parameters from a network that match a pattern. 169 | 170 | Parameters 171 | ---------- 172 | layers : list of :class:`theanets.layers.Layer` 173 | A list of network layers to retrieve parameters from. 174 | patterns : sequence of str 175 | A sequence of glob-style patterns to match against. Any parameter 176 | matching any pattern in this sequence will be included in the match. 177 | 178 | Yields 179 | ------ 180 | matches : pair of str, theano expression 181 | Generates a sequence of (name, expression) pairs. The name is the name 182 | of the parameter that matched, and the expression represents the 183 | parameter symbolically. 184 | ''' 185 | if isinstance(patterns, basestring): 186 | patterns = (patterns, ) 187 | for layer in layers: 188 | for param in layer.params: 189 | name = param.name 190 | for pattern in patterns: 191 | if fnmatch.fnmatch(name, pattern): 192 | yield name, param 193 | break 194 | 195 | 196 | _detailed_callsite = False 197 | 198 | 199 | def enable_detailed_callsite_logging(): 200 | '''Enable detailed callsite logging.''' 201 | global _detailed_callsite 202 | _detailed_callsite = True 203 | 204 | 205 | def log(msg, *args, **kwargs): 206 | '''Log a message to the console. 207 | 208 | Parameters 209 | ---------- 210 | msg : str 211 | A string to display on the console. This can contain {}-style 212 | formatting commands; the remaining positional and keyword arguments 213 | will be used to fill them in. 214 | ''' 215 | now = datetime.datetime.now() 216 | module = 'theanets' 217 | if _detailed_callsite: 218 | caller = inspect.stack()[1] 219 | parts = caller.filename.replace('.py', '').split('/') 220 | module = '{}:{}'.format( 221 | '.'.join(parts[parts.index('theanets')+1:]), caller.lineno) 222 | click.echo(' '.join(( 223 | click.style(now.strftime('%Y%m%d'), fg='blue'), 224 | click.style(now.strftime('%H%M%S'), fg='cyan'), 225 | click.style(module, fg='green'), 226 | msg.format(*args, **kwargs), 227 | ))) 228 | --------------------------------------------------------------------------------