├── .coveragerc
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.rst
├── docs
    ├── Makefile
    ├── _bin
    │   └── tikz2svg
    ├── _static
    │   ├── feedforward_layers.svg
    │   ├── feedforward_layers.tikz
    │   ├── feedforward_neuron.svg
    │   ├── feedforward_neuron.tikz
    │   ├── mnist-digits-small.png
    │   ├── mnist-digits.png
    │   └── style-tweaks.css
    ├── _templates
    │   └── gitwidgets.html
    ├── api
    │   ├── activations.rst
    │   ├── layers.rst
    │   ├── losses.rst
    │   ├── models.rst
    │   ├── reference.rst
    │   ├── regularizers.rst
    │   ├── trainers.rst
    │   └── utils.rst
    ├── conf.py
    ├── examples
    │   ├── mnist-classifier.rst
    │   └── recurrent-memory.rst
    ├── guide.rst
    ├── index.rst
    ├── make.bat
    └── requirements.txt
├── examples
    ├── cifar-autoencoder.py
    ├── lstm-chime.py
    ├── mnist-autoencoder.py
    ├── mnist-classifier.py
    ├── mnist-convolution.py
    ├── mnist-deep-autoencoder.py
    ├── mnist-deep-classifier.py
    ├── mnist-rica.py
    ├── recurrent-addition.py
    ├── recurrent-autoencoder.py
    ├── recurrent-memory.py
    ├── recurrent-sinusoid.py
    ├── recurrent-text.py
    ├── utils.py
    ├── weighted-classification.py
    └── xor-classifier.py
├── scripts
    └── theanets-char-rnn
├── setup.cfg
├── setup.py
├── test
    ├── activations_test.py
    ├── convolution_test.py
    ├── feedforward_test.py
    ├── graph_test.py
    ├── layers_test.py
    ├── losses_test.py
    ├── main_test.py
    ├── recurrent_test.py
    ├── regularizers_test.py
    ├── sparse_test.py
    ├── trainer_test.py
    ├── util.py
    └── util_test.py
└── theanets
    ├── __init__.py
    ├── activations.py
    ├── convolution.py
    ├── feedforward.py
    ├── graph.py
    ├── layers
        ├── __init__.py
        ├── base.py
        ├── convolution.py
        ├── feedforward.py
        └── recurrent.py
    ├── losses.py
    ├── main.py
    ├── recurrent.py
    ├── regularizers.py
    ├── trainer.py
    └── util.py


/.coveragerc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmjohns3/theanets/79db9f878ef2071f2f576a1cf5d43a752a55894a/.coveragerc


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Downloaded files for examples
 2 | mnist.pkl.gz
 3 | hf.py
 4 | 
 5 | *.py[cod]
 6 | 
 7 | # C extensions
 8 | *.so
 9 | 
10 | # Packages
11 | *.egg
12 | *.egg-info
13 | dist
14 | build
15 | eggs
16 | parts
17 | bin
18 | var
19 | sdist
20 | develop-eggs
21 | .installed.cfg
22 | lib
23 | lib64
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | 
28 | # Unit test / coverage reports
29 | .coverage
30 | .tox
31 | nosetests.xml
32 | 
33 | # Translations
34 | *.mo
35 | 
36 | # Mr Developer
37 | .mr.developer.cfg
38 | .project
39 | .pydevproject
40 | 
41 | # Sphinx
42 | docs/_build
43 | docs/api/generated
44 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | cache: apt
 2 | sudo: false
 3 | language: python
 4 | python:
 5 |   - "2.7"
 6 |   - "3.4"
 7 | addons:
 8 |   apt:
 9 |     packages:
10 |     - libatlas-dev
11 |     - libatlas-base-dev
12 |     - liblapack-dev
13 |     - gfortran
14 | before_install:
15 |   - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
16 |   - bash miniconda.sh -b -p $HOME/miniconda
17 |   - export PATH="$HOME/miniconda/bin:$PATH"
18 |   - conda update --yes conda
19 | install:
20 |   - conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy
21 |   - pip install pytest-pep8 pytest-cov python-coveralls
22 |   - python setup.py develop
23 | script:
24 |   - THEANO_FLAGS=floatX=float32 py.test -v --pep8 --cov=theanets --cov-report=term-missing
25 | after_success:
26 |   - coveralls
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2012-2015 lmjohns3
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  zip        to make standalone HTML files and zip them up"
 23 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 24 | 	@echo "  singlehtml to make a single large HTML file"
 25 | 	@echo "  pickle     to make pickle files"
 26 | 	@echo "  json       to make JSON files"
 27 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 28 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 29 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 30 | 	@echo "  epub       to make an epub"
 31 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 32 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 33 | 	@echo "  text       to make text files"
 34 | 	@echo "  man        to make manual pages"
 35 | 	@echo "  texinfo    to make Texinfo files"
 36 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 37 | 	@echo "  gettext    to make PO message catalogs"
 38 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 39 | 	@echo "  linkcheck  to check all external links for integrity"
 40 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 41 | 
 42 | clean:
 43 | 	-rm -rf $(BUILDDIR)/*
 44 | 	-rm docs.zip
 45 | 
 46 | zip: html
 47 | 	cd $(BUILDDIR)/html && zip -r docs.zip . && mv docs.zip ../..
 48 | 
 49 | html:
 50 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 53 | 
 54 | dirhtml:
 55 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 58 | 
 59 | singlehtml:
 60 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 63 | 
 64 | pickle:
 65 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the pickle files."
 68 | 
 69 | json:
 70 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 71 | 	@echo
 72 | 	@echo "Build finished; now you can process the JSON files."
 73 | 
 74 | htmlhelp:
 75 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 76 | 	@echo
 77 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 78 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 79 | 
 80 | qthelp:
 81 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 82 | 	@echo
 83 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 84 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 85 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/theanets.qhcp"
 86 | 	@echo "To view the help file:"
 87 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/theanets.qhc"
 88 | 
 89 | devhelp:
 90 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 91 | 	@echo
 92 | 	@echo "Build finished."
 93 | 	@echo "To view the help file:"
 94 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/theanets"
 95 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/theanets"
 96 | 	@echo "# devhelp"
 97 | 
 98 | epub:
 99 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
100 | 	@echo
101 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
102 | 
103 | latex:
104 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
105 | 	@echo
106 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
107 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
108 | 	      "(use \`make latexpdf' here to do that automatically)."
109 | 
110 | latexpdf:
111 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
112 | 	@echo "Running LaTeX files through pdflatex..."
113 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
114 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
115 | 
116 | text:
117 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
118 | 	@echo
119 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
120 | 
121 | man:
122 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
123 | 	@echo
124 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
125 | 
126 | texinfo:
127 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
128 | 	@echo
129 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
130 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
131 | 	      "(use \`make info' here to do that automatically)."
132 | 
133 | info:
134 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
135 | 	@echo "Running Texinfo files through makeinfo..."
136 | 	make -C $(BUILDDIR)/texinfo info
137 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
138 | 
139 | gettext:
140 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
141 | 	@echo
142 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
143 | 
144 | changes:
145 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
146 | 	@echo
147 | 	@echo "The overview file is in $(BUILDDIR)/changes."
148 | 
149 | linkcheck:
150 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
151 | 	@echo
152 | 	@echo "Link check complete; look for any errors in the above output " \
153 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
154 | 
155 | doctest:
156 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
157 | 	@echo "Testing of doctests in the sources finished, look at the " \
158 | 	      "results in $(BUILDDIR)/doctest/output.txt."
159 | 


--------------------------------------------------------------------------------
/docs/_bin/tikz2svg:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # author: github.com/jbenet
 4 | # license: MIT
 5 | #
 6 | # tikz2svg: convert tikz input into svg
 7 | # depends on:
 8 | # - pdflatex: comes with your tex dist
 9 | # - pdf2svg: brew install pdf2svg
10 | 
11 | import fileinput
12 | import os
13 | import shutil
14 | import subprocess
15 | import sys
16 | import tempfile
17 | 
18 | PDFLATEX = 'pdflatex --shell-escape -file-line-error -interaction=nonstopmode --'
19 | PDF2SVG = 'pdf2svg texput.pdf out.svg'
20 | 
21 | LATEX = r'''
22 | \documentclass[border=2bp]{standalone}
23 | \usepackage{tikz}
24 | \usepackage{pgfplots}
25 | \usetikzlibrary{arrows}
26 | \begin{document}
27 | \begingroup
28 | \tikzset{every picture/.style={scale=1}}
29 | \begin{tikzpicture}%(content)s\end{tikzpicture}
30 | \endgroup
31 | \end{document}
32 | '''
33 | 
34 | def run(cmd, stdin=None, exit_on_error=True):
35 |   CECI = subprocess.PIPE
36 |   p = subprocess.Popen(cmd, shell=True, stdin=CECI, stdout=CECI, stderr=CECI)
37 |   if stdin:
38 |     p.stdin.write(stdin)
39 |     p.stdin.close()
40 |   p.wait()
41 |   if p.returncode != 0 and exit_on_error:
42 |     print('>', cmd)
43 |     print('Error.')
44 |     print('-' * 20, 'STDIN')
45 |     print(stdin)
46 |     print('-' * 20, 'STDOUT')
47 |     print(p.stdout.read())
48 |     print('-' * 20, 'STDERR')
49 |     print(p.stderr.read())
50 |     sys.exit(p.returncode)
51 |   return p.stdout.read()
52 | 
53 | 
54 | def tikz2tex(tikz):
55 |   return LATEX % dict(content=tikz)
56 | 
57 | def tex2pdf(tex):
58 |   with open('figure.tex', 'w') as h:
59 |     h.write(tex)
60 |   return run(PDFLATEX.split(' '), stdin=tex.encode('utf8'))
61 | 
62 | def pdf2svg(pdf):
63 |   run(PDF2SVG)
64 |   with open('out.svg') as f:
65 |     return f.read()
66 | 
67 | def tikz2svg(tikz):
68 |   return pdf2svg(tex2pdf(tikz2tex(tikz)))
69 | 
70 | 
71 | if __name__ == '__main__':
72 |   # move to tmp because latex litters :(
73 |   tmp = tempfile.mkdtemp()
74 |   cwd = os.getcwd()
75 |   os.chdir(tmp)
76 |   print(tikz2svg(''.join(fileinput.input())))
77 |   os.chdir(cwd)
78 |   shutil.rmtree(tmp)
79 | 


--------------------------------------------------------------------------------
/docs/_static/feedforward_layers.tikz:
--------------------------------------------------------------------------------
1 | [thick,->,>=stealth',rectangle,minimum size=10mm,node distance=25mm,rounded corners=3mm]
2 | \node (dots) at (0, 0) {$\dots$};
3 | \node[draw] (h1) [left of=dots] {Layer 1} edge (dots);
4 | \node[draw] (input) [left of=h1] {Input} edge (h1);
5 | \node[draw] (hkm1) [right of=dots] {Layer $k-1$} edge[<-] (dots);
6 | \node[draw] (output) [right of=hkm1] {Output} edge[<-] (hkm1);
7 | 


--------------------------------------------------------------------------------
/docs/_static/feedforward_neuron.tikz:
--------------------------------------------------------------------------------
 1 | [thick,->,>=stealth',circle,minimum size=10mm,node distance=10mm,below,near start]
 2 | \node[draw] (z) at (0, 0) {$\sum$};
 3 | \node[draw] (x) at (20mm, 1.5mm) {$z_i^k$} edge[<-] (z);
 4 | \node[draw] (b) at (-30mm, 0) {$z_j^{k-1}$} edge node {$w^k_{ji}$} (z);
 5 | \node (adots) [above of=b] {$\vdots$};
 6 | \node[draw] (a) [above of=adots] {$z_1^{k-1}$} edge node {$w^k_{1i}$} (z);
 7 | \node (cdots) [below of=b] {$\vdots$};
 8 | \node[draw] (c) [below of=cdots] {$z_{n_{k-1}}^{k-1}$} edge node [midway] {$w^k_{n_{k-1}i}$} (z);
 9 | \node[draw] (bias) at (0, -20mm) {$b^k_i$} edge (z);
10 | 


--------------------------------------------------------------------------------
/docs/_static/mnist-digits-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmjohns3/theanets/79db9f878ef2071f2f576a1cf5d43a752a55894a/docs/_static/mnist-digits-small.png


--------------------------------------------------------------------------------
/docs/_static/mnist-digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmjohns3/theanets/79db9f878ef2071f2f576a1cf5d43a752a55894a/docs/_static/mnist-digits.png


--------------------------------------------------------------------------------
/docs/_static/style-tweaks.css:
--------------------------------------------------------------------------------
 1 | a, a:visited { color: #258; }
 2 | a tt, a:visited tt, a:active tt { color: #258; }
 3 | 
 4 | .banana { float: right; max-width: 45%; }
 5 | .banana img { width: 100%; }
 6 | 
 7 | pre { font-size: 0.9rem; line-height: 1.25; }
 8 | span.pre { background: #eee; font-size: 0.95rem; padding: 0.1rem 0.2rem; }
 9 | 
10 | a.internal span.pre {
11 |     background: inherit;
12 |     font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
13 |     font-size: inherit;
14 |     padding: inherit;
15 | }
16 | 
17 | th.field-name { background: #ffd; }
18 | 
19 | dl.method dt { background: #def; }
20 | dl.attribute dt { background: #efd; }
21 | dl.classmethod dt { background: #fed; }
22 | 
23 | .rubric { font-size: 2rem; font-weight: bold; }
24 | 
25 | .sphinxsidebar ul + ul:before { content: 'Examples'; font-weight: bold; }
26 | .sphinxsidebar ul + ul + ul:before { content: 'API'; }
27 | 


--------------------------------------------------------------------------------
/docs/_templates/gitwidgets.html:
--------------------------------------------------------------------------------
1 | <div style="margin-top:20px">
2 |   <iframe src="https://ghbtns.com/github-btn.html?user=lmjohns3&repo=theanets&type=star&count=false" frameborder="0" scrolling="0" width="55px" height="20px" style="float:left"></iframe>
3 |   <div class="fb-share-button" style="float:left;margin-right:5px" data-layout="button"></div>
4 |   <a href="https://twitter.com/share" class="twitter-share-button" data-count="none" style="float:left">Tweet</a>
5 |   <script>(function(d, s, id) {var js,fjs=d.getElementsByTagName(s)[0];if(d.getElementById(id))return;js=d.createElement(s);js.id=id;js.src="//connect.facebook.net/en_US/sdk.js#xfbml=1&version=v2.3";fjs.parentNode.insertBefore(js, fjs);}(document,'script','facebook-jssdk'));</script>
6 |   <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document,'script','twitter-wjs');</script>
7 |   <div id="fb-root" style="clear:left"></div>
8 | </div>
9 | 


--------------------------------------------------------------------------------
/docs/api/activations.rst:
--------------------------------------------------------------------------------
 1 | .. _activations:
 2 | 
 3 | ====================
 4 | Activation Functions
 5 | ====================
 6 | 
 7 | An activation function (sometimes also called a transfer function) specifies how
 8 | the final output of a layer is computed from the weighted sums of the inputs.
 9 | 
10 | By default, hidden layers in ``theanets`` use a rectified linear activation
11 | function: :math:`g(z) = \max(0, z)`.
12 | 
13 | Output layers in :class:`theanets.Regressor <theanets.feedforward.Regressor>`
14 | and :class:`theanets.Autoencoder <theanets.feedforward.Autoencoder>` models use
15 | linear activations (i.e., the output is just the weighted sum of the inputs from
16 | the previous layer: :math:`g(z) = z`), and the output layer in
17 | :class:`theanets.Classifier <theanets.feedforward.Classifier>` models uses a
18 | softmax activation: :math:`g(z) = \exp(z) / \sum\exp(z)`.
19 | 
20 | To specify a different activation function for a layer, include an activation
21 | key chosen from the table below, or :ref:`create a custom activation
22 | <activations-custom>`. As described in :ref:`guide-creating-specifying-layers`,
23 | the activation key can be included in your model specification either using the
24 | ``activation`` keyword argument in a layer dictionary, or by including the key
25 | in a tuple with the layer size:
26 | 
27 | .. code:: python
28 | 
29 |   net = theanets.Regressor([10, (10, 'tanh'), 10])
30 | 
31 | The activations that ``theanets`` provides are:
32 | 
33 | =========    ============================  ===============================================
34 | Key          Description                   :math:`g(z) =`
35 | =========    ============================  ===============================================
36 | linear       linear                        :math:`z`
37 | sigmoid      logistic sigmoid              :math:`(1 + \exp(-z))^{-1}`
38 | logistic     logistic sigmoid              :math:`(1 + \exp(-z))^{-1}`
39 | tanh         hyperbolic tangent            :math:`\tanh(z)`
40 | softplus     smooth relu approximation     :math:`\log(1 + \exp(z))`
41 | softmax      categorical distribution      :math:`\exp(z) / \sum\exp(z)`
42 | relu         rectified linear              :math:`\max(0, z)`
43 | rect:min     truncation                    :math:`\min(1, z)`
44 | rect:max     rectification                 :math:`\max(0, z)`
45 | rect:minmax  truncation and rectification  :math:`\max(0, \min(1, z))`
46 | norm:mean    mean-normalization            :math:`z - \bar{z}`
47 | norm:max     max-normalization             :math:`z / \max |z|`
48 | norm:std     variance-normalization        :math:`z / \mathbb{E}[(z-\bar{z})^2]`
49 | norm:z       z-score normalization         :math:`(z-\bar{z}) / \mathbb{E}[(z-\bar{z})^2]`
50 | prelu_       relu with parametric leak     :math:`\max(0, z) - \max(0, -rz)`
51 | lgrelu_      relu with leak and gain       :math:`\max(0, gz) - \max(0, -rz)`
52 | maxout_      piecewise linear              :math:`\max_i m_i z`
53 | =========    ============================  ===============================================
54 | 
55 | .. _prelu: generated/theanets.activations.Prelu.html
56 | .. _lgrelu: generated/theanets.activations.LGrelu.html
57 | .. _maxout: generated/theanets.activations.Maxout.html
58 | 
59 | Composition
60 | ===========
61 | 
62 | Activation functions can also be composed by concatenating multiple function
63 | names togather using a ``+``. For example, to create a layer that uses a
64 | batch-normalized hyperbolic tangent activation:
65 | 
66 | .. code:: python
67 | 
68 |   net = theanets.Regressor([10, (10, 'tanh+norm:z'), 10])
69 | 
70 | Just like function composition, the order of the components matters! Unlike the
71 | notation for mathematical function composition, the functions will be applied
72 | from left-to-right.
73 | 
74 | .. _activations-custom:
75 | 
76 | Custom Activations
77 | ==================
78 | 
79 | To define a custom activation, create a subclass of :class:`theanets.Activation
80 | <theanets.activations.Activation>`, and implement the ``__call__`` method to
81 | make the class instance callable. The callable will be given one argument, the
82 | array of layer outputs to activate.
83 | 
84 | .. code:: python
85 | 
86 |   class ThresholdedLinear(theanets.Activation):
87 |       def __call__(self, x):
88 |           return x * (x > 1)
89 | 
90 | This example activation returns 0 if a layer output is less than 1, or the
91 | output value itself otherwise. In effect it is a linear activation for "large"
92 | outputs (i.e., greater than 1) and zero otherwise. To use it in a model, give
93 | the name of the activation:
94 | 
95 | .. code:: python
96 | 
97 |   net = theanets.Regressor([10, (10, 'thresholdedlinear'), 10])
98 | 


--------------------------------------------------------------------------------
/docs/api/layers.rst:
--------------------------------------------------------------------------------
  1 | .. _layers:
  2 | 
  3 | ======
  4 | Layers
  5 | ======
  6 | 
  7 | .. image:: ../_static/feedforward_neuron.svg
  8 | 
  9 | In a standard feedforward neural network layer, each node :math:`i` in layer
 10 | :math:`k` receives inputs from all nodes in layer :math:`k-1`, then transforms
 11 | the weighted sum of these inputs:
 12 | 
 13 | .. math::
 14 |    z_i^k = \sigma\left( b_i^k + \sum_{j=1}^{n_{k-1}} w^k_{ji} z_j^{k-1} \right)
 15 | 
 16 | where :math:`\sigma: \mathbb{R} \to \mathbb{R}` is an :ref:`activation function
 17 | <activations>`.
 18 | 
 19 | In addition to standard feedforward layers, other types of layers are also
 20 | commonly used:
 21 | 
 22 | - For recurrent models, :mod:`recurrent layers <theanets.layers.recurrent>`
 23 |   permit a cycle in the computation graph that depends on a previous time step.
 24 | 
 25 | - For models that process images, :mod:`convolution layers
 26 |   <theanets.layers.convolution>` are common.
 27 | 
 28 | - For some types of autoencoder models, it is common to :class:`tie layer weights to
 29 |   another layer <theanets.layers.feedforward.Tied>`.
 30 | 
 31 | .. _layers-available:
 32 | 
 33 | Available Layers
 34 | ================
 35 | 
 36 | .. automodule:: theanets.layers.base
 37 |    :no-members:
 38 |    :no-inherited-members:
 39 | 
 40 | .. autosummary::
 41 |    :toctree: generated/
 42 | 
 43 |    Layer
 44 |    Input
 45 |    Concatenate
 46 |    Flatten
 47 |    Product
 48 |    Reshape
 49 | 
 50 | Feedforward
 51 | -----------
 52 | 
 53 | .. automodule:: theanets.layers.feedforward
 54 |    :no-members:
 55 |    :no-inherited-members:
 56 | 
 57 | .. autosummary::
 58 |    :toctree: generated/
 59 | 
 60 |    Classifier
 61 |    Feedforward
 62 |    Tied
 63 | 
 64 | Convolution
 65 | -----------
 66 | 
 67 | .. automodule:: theanets.layers.convolution
 68 |    :no-members:
 69 |    :no-inherited-members:
 70 | 
 71 | .. autosummary::
 72 |    :toctree: generated/
 73 | 
 74 |    Conv1
 75 |    Conv2
 76 |    Pool1
 77 |    Pool2
 78 | 
 79 | Recurrent
 80 | ---------
 81 | 
 82 | .. automodule:: theanets.layers.recurrent
 83 |    :no-members:
 84 |    :no-inherited-members:
 85 | 
 86 | .. autosummary::
 87 |    :toctree: generated/
 88 | 
 89 |    RNN
 90 |    RRNN
 91 |    MUT1
 92 |    GRU
 93 |    LSTM
 94 |    MRNN
 95 |    SCRN
 96 |    Clockwork
 97 |    Bidirectional
 98 | 
 99 | .. _layers-attributes:
100 | 
101 | Layer Attributes
102 | ================
103 | 
104 | Now that we've seen how to specify values for the attributes of each layer in
105 | your model, we'll look at the available attributes that can be customized. For
106 | many of these settings, you'll want to use a dictionary (or create a
107 | :class:`theanets.Layer <theanets.layers.base.Layer>` instance yourself) to
108 | specify non-default values.
109 | 
110 | - ``size``: The number of "neurons" in the layer. This value must be specified
111 |   by the modeler when creating the layer. It can be specified by providing an
112 |   integer, or as a tuple that contains an integer.
113 | 
114 | - ``form``: A string specifying the type of layer to use (see above). This
115 |   defaults to "feedforward" but can be the name of any existing
116 |   :class:`theanets.Layer <theanets.layers.base.Layer>` subclass (including
117 |   :ref:`layers-custom` that you have defined).
118 | 
119 | - ``name``: A string name for the layer. If this isn't provided when creating a
120 |   layer, the layer will be assigned a default name. The default names for the
121 |   first and last layers in a network are ``'in'`` and ``'out'`` respectively,
122 |   and the layers in between are assigned the name "hidN" where N is the number
123 |   of existing layers.
124 | 
125 |   If you create a layer instance manually, the default name is ``'layerN'``
126 |   where N is the number of existing layers.
127 | 
128 | - ``activation``: A string describing the :ref:`activation function
129 |   <activations>` to use for the layer. This defaults to ``'relu'``.
130 | 
131 | - ``inputs``: An integer or dictionary describing the sizes of the inputs that
132 |   this layer expects. This is normally optional and defaults to the size of the
133 |   preceding layer in a chain-like model. However, providing a dictionary here
134 |   permits arbitrary layer interconnections. See :ref:`guide-advanced-graphs` for
135 |   more details.
136 | 
137 | - ``mean``: A float specifying the mean of the initial parameter values to use
138 |   in the layer. Defaults to 0. This value applies to all parameters in the model
139 |   that don't have mean values specified for them directly.
140 | 
141 | - ``mean_ABC``: A float specifying the mean of the initial parameter values to
142 |   use in the layer's ``'ABC'`` parameter. Defaults to 0. This can be used to
143 |   specify the mean of the initial values used for a specific parameter in the
144 |   model.
145 | 
146 | - ``std``: A float specifying the standard deviation of the initial parameter
147 |   values to use in the layer. Defaults to 1. This value applies to all
148 |   parameters in the model that don't have standard deviations specified
149 |   directly.
150 | 
151 | - ``std_ABC``: A float specifying the standard deviation of the initial
152 |   parameter values to use in the layer's ``'ABC'`` parameter. Defaults to 1.
153 |   This can be used to specify the standard deviation of the initial values used
154 |   for a specific parameter in the model.
155 | 
156 | - ``sparsity``: A float giving the proportion of parameter values in the layer
157 |   that should be initialized to zero. Nonzero values in the parameters will be
158 |   drawn from a Gaussian with the specified mean and standard deviation as above,
159 |   and then an appropriate number of these parameter values will randomly be
160 |   reset to zero to make the parameter "sparse."
161 | 
162 | - ``sparsity_ABC``: A float or vector of floats used to initialize the
163 |   parameters in the layer's ``'ABC'`` parameter. This can be used to set the
164 |   initial sparsity level for a particular parameter in the layer.
165 | 
166 | - ``diagonal``: A float or vector of floats used to initialize the parameters in
167 |   the layer. If this is provided, weight matrices in the layer will be
168 |   initialized to all zeros, with this value or values placed along the diagonal.
169 | 
170 | - ``diagonal_ABC``: A float or vector of floats used to initialize the
171 |   parameters in the layer's ``'ABC'`` parameter. If this is provided, the
172 |   relevant weight matrix in the layer will be initialized to all zeros, with
173 |   this value or values placed along the diagonal.
174 | 
175 | - ``rng``: An integer or ``numpy`` random number generator. If specified the
176 |   given random number generator will be used to create the initial values for
177 |   the parameters in the layer. This can be useful for repeatable runs of a
178 |   model.
179 | 
180 | In addition to these configuration values, each layer can also be provided with
181 | keyword arguments specific to that layer. For example, the :class:`MRNN
182 | <theanets.layers.recurrent.MRNN>` recurrent layer type requires a ``factors``
183 | argument, and the :class:`Conv1 <theanets.layers.convolution.Conv1>` 1D
184 | convolutional layer requires a ``filter_size`` argument.
185 | 
186 | .. _layers-custom:
187 | 
188 | Custom Layers
189 | =============
190 | 
191 | Layers are the real workhorse in ``theanets``; custom layers can be created to
192 | do all sorts of fun stuff. To create a custom layer, just create a subclass of
193 | :class:`theanets.Layer <theanets.layers.base.Layer>` and give it the
194 | functionality you want.
195 | 
196 | As a very simple example, let's suppose you wanted to create a normal
197 | feedforward layer but did not want to include a bias term:
198 | 
199 | .. code:: python
200 | 
201 |   import theanets
202 |   import theano.tensor as TT
203 | 
204 |   class NoBias(theanets.Layer):
205 |       def transform(self, inputs):
206 |           return TT.dot(inputs, self.find('w'))
207 | 
208 |       def setup(self):
209 |           self.add_weights('w', nin=self.input_size, nout=self.output_size)
210 | 
211 | Once you've set up your new layer class, it will automatically be registered and
212 | available in :func:`theanets.Layer.build <theanets.layers.base.Layer.build>`
213 | using the name of your class:
214 | 
215 | .. code:: python
216 | 
217 |   layer = theanets.Layer.build('nobias', size=4)
218 | 
219 | or, while creating a model:
220 | 
221 | .. code:: python
222 | 
223 |   net = theanets.Autoencoder(
224 |       layers=(4, (3, 'nobias', 'linear'), (4, 'tied', 'linear')),
225 |   )
226 | 
227 | This example shows how fast it is to create a PCA-like model that will learn the
228 | subspace of your dataset that spans the most variance---the same subspace
229 | spanned by the principal components.
230 | 


--------------------------------------------------------------------------------
/docs/api/losses.rst:
--------------------------------------------------------------------------------
  1 | .. _losses:
  2 | 
  3 | ==============
  4 | Loss Functions
  5 | ==============
  6 | 
  7 | A loss function is used to optimize the parameter values in a neural network
  8 | model. Loss functions map a set of parameter values for the network onto a
  9 | scalar value that indicates how well those parameter accomplish the task the
 10 | network is intended to do.
 11 | 
 12 | There are several common loss functions provided by ``theanets``. These losses
 13 | often measure the :class:`squared <theanets.losses.MeanSquaredError>` or
 14 | :class:`absolute <theanets.losses.MeanAbsoluteError>` error between a network's
 15 | output and some target or desired output. Other loss functions are designed
 16 | specifically for classification models; the :class:`cross-entropy
 17 | <theanets.losses.CrossEntropy>` is a common loss designed to minimize the
 18 | distance between the network's distribution over class labels and the
 19 | distribution that the dataset defines.
 20 | 
 21 | :ref:`models` in ``theanets`` have at least one loss to optimize during
 22 | training. There are default losses for each of the built-in model types, but you
 23 | can often override these defaults just by providing a non-default value for the
 24 | ``loss`` keyword argument when creating your model. For example, to create a
 25 | regression model with a mean absolute error loss:
 26 | 
 27 | .. code:: python
 28 | 
 29 |   net = theanets.Regressor([10, 20, 3], loss='mae')
 30 | 
 31 | This will create the regression model with the specified loss.
 32 | 
 33 | .. _losses-predefined:
 34 | 
 35 | Predefined Losses
 36 | =================
 37 | 
 38 | .. automodule:: theanets.losses
 39 |    :no-members:
 40 |    :no-inherited-members:
 41 | 
 42 | .. autosummary::
 43 |    :toctree: generated/
 44 | 
 45 |    Loss
 46 |    CrossEntropy
 47 |    GaussianLogLikelihood
 48 |    Hinge
 49 |    KullbackLeiblerDivergence
 50 |    MaximumMeanDiscrepancy
 51 |    MeanAbsoluteError
 52 |    MeanSquaredError
 53 | 
 54 | .. _losses-multiple:
 55 | 
 56 | Multiple Losses
 57 | ===============
 58 | 
 59 | A ``theanets`` model can actually have more than one loss that it attempts to
 60 | optimize simultaneously, and these losses can change between successive calls to
 61 | :func:`train() <theanets.graph.Network.train>`. In fact, a model has a
 62 | ``losses`` attribute that's just a list of :class:`theanets.Loss
 63 | <theanets.losses.Loss>` instances; these losses are weighted by a ``weight``
 64 | attribute, then summed and combined with any applicable :ref:`regularizers
 65 | <regularizers>` during each call to ``train()``.
 66 | 
 67 | Let's say that you want to optimize a model using both the mean absolute and the
 68 | mean squared error. You could first create a regular regression model:
 69 | 
 70 | .. code:: python
 71 | 
 72 |   net = theanets.Regressor([10, 20, 3])
 73 | 
 74 | and then add a new loss to the model:
 75 | 
 76 | .. code:: python
 77 | 
 78 |   net.add_loss('mse')
 79 | 
 80 | Then, when you call:
 81 | 
 82 | .. code:: python
 83 | 
 84 |   net.train(...)
 85 | 
 86 | the model will attempt to minimize the sum of the two losses.
 87 | 
 88 | You can specify the relative weight of the two losses by manipulating the
 89 | ``weight`` attribute of each loss instance. For instance, if you want the MAE
 90 | loss to be twice as strong as the MSE loss:
 91 | 
 92 | .. code:: python
 93 | 
 94 |   net.losses[1].weight = 2
 95 |   net.train(...)
 96 | 
 97 | Finally, if you want to reset the loss to the standard MSE:
 98 | 
 99 | .. code:: python
100 | 
101 |   net.set_loss('mse', weight=1)
102 | 
103 | (Here we've also shown how to specify the weight of the loss when adding or
104 | setting it to the model.)
105 | 
106 | .. _losses-weighted:
107 | 
108 | Using Weighted Targets
109 | ======================
110 | 
111 | By default, the network models available in ``theanets`` treat all inputs as
112 | equal when computing the loss for the model. For example, a regression model
113 | treats an error of 0.1 in component 2 of the output just the same as an error of
114 | 0.1 in component 3, and each example of a minibatch is treated with equal
115 | importance when training a classifier.
116 | 
117 | However, there are times when all inputs to a neural network model are not to be
118 | treated equally. This is especially evident in recurrent models: sometimes, the
119 | inputs to a recurrent network might not contain the same number of time steps,
120 | but because the inputs are presented to the model using a rectangular minibatch
121 | array, all inputs must somehow be made to have the same size. One way to address
122 | this would be to cut off all inputs at the length of the shortest input, but
123 | then the network is not exposed to all input/output pairs during training.
124 | 
125 | Weighted targets can be used for any model in ``theanets``. For example, an
126 | :class:`autoencoder <theanets.feedforward.Autoencoder>` could use an array of
127 | weights containing zeros and ones to solve a matrix completion task, where the
128 | input array contains some "unknown" values. In such a case, the network is
129 | required to reproduce the known values exactly (so these could be presented to
130 | the model with weight 1), while filling in the unknowns with statistically
131 | reasonable values (which could be presented to the model during training with
132 | weight 0).
133 | 
134 | As another example, suppose a :class:`classifier
135 | <theanets.feedforward.Classifier>` model is being trained in a binary
136 | classification task where one of the classes---say, class A---is only present
137 | 0.1% of the time. In such a case, the network can achieve 99.9% accuracy by
138 | always predicting class B, so during training it might be important to ensure
139 | that errors in predicting A are "amplified" when computing the loss. You could
140 | provide a large weight for training examples in class A to encourage the model
141 | not to miss these examples.
142 | 
143 | All of these cases are possible to model in ``theanets``; just include
144 | ``weighted=True`` when you create your model:
145 | 
146 | .. code:: python
147 | 
148 |   net = theanets.recurrent.Autoencoder([3, (10, 'rnn'), 3], weighted=True)
149 | 
150 | When training a weighted model, the training and validation datasets require an
151 | additional component: an array of floating-point values with the same shape as
152 | the expected output of the model. For example, a non-recurrent Classifier model
153 | would require a weight vector with each minibatch, of the same shape as the
154 | labels array, so that the training and validation datasets would each have three
155 | pieces: ``sample``, ``label``, and ``weight``. Each value in the weight array is
156 | used as the weight for the corresponding error when computing the loss.
157 | 
158 | .. _losses-custom:
159 | 
160 | Custom Losses
161 | =============
162 | 
163 | It's pretty straightforward to create models in ``theanets`` that use different
164 | losses from the predefined :class:`theanets.Classifier
165 | <theanets.feedforward.Classifier>` and :class:`theanets.Autoencoder
166 | <theanets.feedforward.Autoencoder>` and :class:`theanets.Regressor
167 | <theanets.feedforward.Regressor>` models. (The classifier uses categorical
168 | cross-entropy (XE) as its default loss, and the other two both use mean squared
169 | error, MSE.)
170 | 
171 | To define a model with a new loss, just create a new :class:`theanets.Loss
172 | <theanets.losses.Loss>` subclass and specify its name when you create your
173 | model. For example, to create a regression model that uses a step function
174 | averaged over all of the model inputs:
175 | 
176 | .. code:: python
177 | 
178 |   class Step(theanets.Loss):
179 |       def __call__(self, outputs):
180 |           return (outputs[self.output_name] > 0).mean()
181 | 
182 |   net = theanets.Regressor([5, 6, 7], loss='step')
183 | 
184 | Your loss function implementation must return a Theano expression that reflects
185 | the loss for your model. If you wish to make your loss work with weighted
186 | outputs, you will also need to include a case for having weights:
187 | 
188 | .. code:: python
189 | 
190 |   class Step(theanets.Loss):
191 |       def __call__(self, outputs):
192 |           step = outputs[self.output_name] > 0
193 |           if self._weights:
194 |               return (self._weights * step).sum() / self._weights.sum()
195 |           else:
196 |               return step.mean()
197 | 


--------------------------------------------------------------------------------
/docs/api/models.rst:
--------------------------------------------------------------------------------
  1 | .. _models:
  2 | 
  3 | ======
  4 | Models
  5 | ======
  6 | 
  7 | There are three major types of neural network models, each defined primarily by
  8 | the :ref:`loss function <losses>` that the model attempts to optimize. While
  9 | other types of models are certainly possible, ``theanets`` only tries to handle
 10 | the common cases with built-in model classes. If you want to define a new type
 11 | of model, see :ref:`models-custom`.
 12 | 
 13 | To describe the predefined models, we assume that a neural network has some set
 14 | of parameters :math:`\theta`. In the feedforward pass, the network computes some
 15 | function of an input vector :math:`x \in \mathbb{R}^n` using these parameters;
 16 | we represent this feedforward function using the notation :math:`y =
 17 | F_\theta(x)`.
 18 | 
 19 | Autoencoder
 20 | ===========
 21 | 
 22 | An :class:`autoencoder <theanets.feedforward.Autoencoder>` takes an array of
 23 | :math:`m` arbitrary data vectors :math:`X \in \mathbb{R}^{m \times n}` as input,
 24 | transforms it in some way, and then attempts to recreate the original input as
 25 | the output of the network.
 26 | 
 27 | To evaluate the loss for an autoencoder, only the input data is required. The
 28 | default autoencoder model computes the loss using the mean squared error between
 29 | the network's output and the input:
 30 | 
 31 | .. math::
 32 |    \mathcal{L}(X, \theta) = \frac{1}{mn} \sum_{i=1}^m \left\|
 33 |       F_\theta(x_i) - x_i \right\|_2^2 + R(X, \theta)
 34 | 
 35 | Autoencoders simply try to adjust their model parameters :math:`\theta` to
 36 | minimize this squared error between the true inputs and the values that the
 37 | network produces.
 38 | 
 39 | In theory this could be trivial---if, for example, :math:`F_\theta(x) = x`---but
 40 | in practice this doesn't actually happen very often. In addition, a
 41 | :ref:`regularizer <guide-training-specifying-regularizers>` :math:`R(X, \theta)`
 42 | can be added to the overall loss for the model to prevent this sort of trivial
 43 | solution.
 44 | 
 45 | To create an autoencoder in ``theanets``, just create an instance of the
 46 | appropriate network subclass:
 47 | 
 48 | .. code:: python
 49 | 
 50 |   net = theanets.Autoencoder()
 51 | 
 52 | Of course you'll also need to specify which types of layers you'd like in your
 53 | model; this is discussed in :ref:`guide-creating-specifying-layers`.
 54 | 
 55 | Regression
 56 | ==========
 57 | 
 58 | A :class:`regression <theanets.feedforward.Regressor>` model is much like an
 59 | autoencoder. Like an autoencoder, a regression model takes as input an array of
 60 | arbitrary data :math:`X \in \mathbb{R}^{m \times n}`. However, at training time,
 61 | a regression model also requires an array of expected target outputs :math:`Y
 62 | \in \mathbb{R}^{m \times o}`. Like an autoencoder, the error between the
 63 | network's output and the target is computed using the mean squared error:
 64 | 
 65 | .. math::
 66 |    \mathcal{L}(X, Y, \theta) = \frac{1}{mn} \sum_{i=1}^m \left\|
 67 |       F_\theta(x_i) - y_i \right\|_2^2 + R(X, \theta)
 68 | 
 69 | The difference here is that instead of trying to produce the input, the
 70 | regression model is trying to match the target output.
 71 | 
 72 | To create a regression model in theanets, just invoke the constructor:
 73 | 
 74 | .. code:: python
 75 | 
 76 |   net = theanets.Regressor()
 77 | 
 78 | Again, you'll need to specify which types of layers you'd like in your model;
 79 | this is discussed in :ref:`guide-creating-specifying-layers`.
 80 | 
 81 | Classification
 82 | ==============
 83 | 
 84 | A :class:`classification <theanets.feedforward.Classifier>` model takes as input
 85 | some piece of data that you want to classify (e.g., the pixels of an image, word
 86 | counts from a document, etc.) and outputs a probability distribution over
 87 | available labels.
 88 | 
 89 | At training time, this type of model requires an array of input data :math:`X
 90 | \in \mathbb{R}^{m \times n}` and a corresponding set of integer labels :math:`Y
 91 | \in \{1,\dots,k\}^m`. The error is then computed as the cross-entropy between
 92 | the network output and the true target labels:
 93 | 
 94 | .. math::
 95 |    \mathcal{L}(X, Y, \theta) = -\frac{1}{m} \sum_{i=1}^m \sum_{j=1}^k
 96 |       \delta_{j,y_i} \log F_\theta(x_i)_j + R(X, \theta)
 97 | 
 98 | where :math:`\delta{a,b}` is the Kronecker delta, which is 1 if :math:`a=b` and
 99 | 0 otherwise.
100 | 
101 | To create a classifier model in ``theanets``, invoke its constructor:
102 | 
103 | .. code:: python
104 | 
105 |   net = theanets.Classifier()
106 | 
107 | As with the other models, you'll need to specify which types of layers you'd
108 | like in your model; this is discussed in
109 | :ref:`guide-creating-specifying-layers`.
110 | 
111 | Recurrent Models
112 | ================
113 | 
114 | The three predefined models described above also exist in recurrent
115 | formulations. In recurrent networks, time is an explicit part of the model. In
116 | ``theanets``, if you wish to include recurrent layers in your model, you must
117 | use a model class from the :mod:`theanets.recurrent` module; this is because
118 | recurrent models require input and output data matrices with an additional
119 | dimension to represent time. In general,
120 | 
121 | - the data shapes required for a recurrent layer are all one
122 |   dimension larger than the corresponding shapes for a feedforward network,
123 | 
124 | - the extra dimension represents time, and
125 | 
126 | - the extra dimension is located on:
127 | 
128 |   - the first (0) axis in ``theanets`` versions through 0.6, or
129 |   - the second (1) axis in ``theanets`` versions 0.7 and up.
130 | 
131 | .. warning::
132 | 
133 |    Starting with release 0.7.0 of ``theanets``, recurrent models have changed
134 |    the expected axis ordering for data arrays! The axis ordering before version
135 |    0.7.0 was ``(time, batch, variables)``, and the axis ordering starting in the
136 |    0.7.0 release is ``(batch, time, variables)``.
137 | 
138 |    The new ordering is more consistent with other models in ``theanets``.
139 |    Starting in the 0.7 release, the first axis (index 0) of data arrays for all
140 |    model types represents the examples in a batch, and the last axis (index -1)
141 |    represents the input variables. For recurrent models, the axis in the middle
142 |    of a batch (index 1) represents time.
143 | 
144 | .. note::
145 | 
146 |    In recurrent models, the batch size is currently required to be greater than
147 |    one. If you wish to run a recurrent model on a single sample, just create a
148 |    batch with two copies of the same sample.
149 | 
150 | Autoencoding
151 | ------------
152 | 
153 | A :class:`recurrent autoencoder <theanets.recurrent.Autoencoder>`, just like its
154 | feedforward counterpart, takes as input a single array of data :math:`X \in
155 | \mathbb{R}^{m \times t \times n}` and attempts to recreate the same data at the
156 | output, under a squared-error loss.
157 | 
158 | To create a model of this type, just invoke its constructor:
159 | 
160 | .. code:: python
161 | 
162 |    net = theanets.recurrent.Autoencoder()
163 | 
164 | Regression
165 | ----------
166 | 
167 | A :class:`recurrent regression <theantes.recurrent.Regressor>` model is also
168 | just like its feedforward counterpart. It requires two inputs at training time:
169 | an array of input data :math:`X \in \mathbb{R}^{m \times t \times n}` and a
170 | corresponding array of output data :math:`Y \in \mathbb{R}^{m \times t \times
171 | o}`. Like the feedforward regression models, the recurrent version attempts to
172 | produce the target outputs under a squared-error loss.
173 | 
174 | To create a model of this type, just invoke its constructor:
175 | 
176 | .. code:: python
177 | 
178 |    net = theanets.recurrent.Regressor()
179 | 
180 | Classification
181 | --------------
182 | 
183 | A :class:`recurrent classification <theanets.recurrent.Classifier>` model is
184 | like a feedforward classifier in that it takes as input some piece of data that
185 | you want to classify (e.g., the pixels of an image, word counts from a document,
186 | etc.) and outputs a probability distribution over available labels. Computing
187 | the error for this type of model requires an input dataset :math:`X \in
188 | \mathbb{R}^{m \times t \times n}` and a corresponding set of integer labels
189 | :math:`Y \in \mathbb{Z}^{t \times m}`; the error is then computed as the
190 | cross-entropy between the network output and the target labels.
191 | 
192 | To create a model of this type, just invoke its constructor:
193 | 
194 | .. code:: python
195 | 
196 |    net = theanets.recurrent.Classifier()
197 | 
198 | .. _models-custom:
199 | 
200 | Custom Models
201 | =============
202 | 
203 | To create a custom model, just define a new subclass of :class:`theanets.Network
204 | <theanets.graph.Network>`.
205 | 
206 | For instance, the :class:`feedforward autoencoder
207 | <theanets.feedforward.Autoencoder>` model is defined basically like this:
208 | 
209 | .. code:: python
210 | 
211 |   class Autoencoder(theanets.Network):
212 |       def __init__(self, layers=(), loss='mse', weighted=False):
213 |           super(Autoencoder, self).__init__(
214 |               layers=layers, loss=loss, weighted=weighted)
215 | 
216 | Essentially this model just defines a default loss on top of the functionality
217 | in :class:`theanets.Network <theanets.graph.Network>` for creating and managing
218 | layers and loss functions, training the model, making predictions, and so on.
219 | 
220 | By defining a custom model class, you can also implement whatever helper
221 | functionality you think will be useful for your task. With the programming power
222 | of Python, the sky's the limit!
223 | 


--------------------------------------------------------------------------------
/docs/api/reference.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | Reference
 3 | =========
 4 | 
 5 | .. automodule:: theanets
 6 |    :no-members:
 7 |    :no-inherited-members:
 8 | 
 9 | .. autosummary::
10 |    :toctree: generated/
11 | 
12 |    theanets.activations.Activation
13 |    theanets.activations.LGrelu
14 |    theanets.activations.Maxout
15 |    theanets.activations.Prelu
16 |    theanets.activations.build
17 |    theanets.feedforward.Autoencoder
18 |    theanets.feedforward.Classifier
19 |    theanets.feedforward.Regressor
20 |    theanets.graph.Network
21 |    theanets.layers.base.Concatenate
22 |    theanets.layers.base.Flatten
23 |    theanets.layers.base.Input
24 |    theanets.layers.base.Layer
25 |    theanets.layers.base.Product
26 |    theanets.layers.base.Reshape
27 |    theanets.layers.convolution.Conv1
28 |    theanets.layers.feedforward.Classifier
29 |    theanets.layers.feedforward.Feedforward
30 |    theanets.layers.feedforward.Tied
31 |    theanets.layers.recurrent.Bidirectional
32 |    theanets.layers.recurrent.Clockwork
33 |    theanets.layers.recurrent.GRU
34 |    theanets.layers.recurrent.LSTM
35 |    theanets.layers.recurrent.MRNN
36 |    theanets.layers.recurrent.MUT1
37 |    theanets.layers.recurrent.RNN
38 |    theanets.layers.recurrent.RRNN
39 |    theanets.layers.recurrent.SCRN
40 |    theanets.losses.CrossEntropy
41 |    theanets.losses.GaussianLogLikelihood
42 |    theanets.losses.Hinge
43 |    theanets.losses.KullbackLeiblerDivergence
44 |    theanets.losses.Loss
45 |    theanets.losses.MaximumMeanDiscrepancy
46 |    theanets.losses.MeanAbsoluteError
47 |    theanets.losses.MeanSquaredError
48 |    theanets.recurrent.Autoencoder
49 |    theanets.recurrent.Classifier
50 |    theanets.recurrent.Regressor
51 |    theanets.recurrent.Text
52 |    theanets.recurrent.batches
53 |    theanets.regularizers.BernoulliDropout
54 |    theanets.regularizers.Contractive
55 |    theanets.regularizers.GaussianNoise
56 |    theanets.regularizers.HiddenL1
57 |    theanets.regularizers.Regularizer
58 |    theanets.regularizers.RecurrentNorm
59 |    theanets.regularizers.RecurrentState
60 |    theanets.regularizers.WeightL1
61 |    theanets.regularizers.WeightL2
62 |    theanets.trainer.DownhillTrainer
63 |    theanets.trainer.SampleTrainer
64 |    theanets.trainer.SupervisedPretrainer
65 |    theanets.trainer.UnsupervisedPretrainer
66 | 


--------------------------------------------------------------------------------
/docs/api/regularizers.rst:
--------------------------------------------------------------------------------
  1 | .. _regularizers:
  2 | 
  3 | ============
  4 | Regularizers
  5 | ============
  6 | 
  7 | The goal of training a neural network model is to minimize the loss function by
  8 | making adjustments to the model parameters. In most practical applications, the
  9 | loss is not known a priori, but an estimate of it is computed using a set of
 10 | data (the "training data") that has been gathered from the problem being
 11 | modeled.
 12 | 
 13 | If a model has many parameters compared with the size of the training dataset,
 14 | then many machine learning models exhibit a phenomenon called *overfitting*: the
 15 | model may learn to predict the training data with no measurable error, but then
 16 | if it is applied to a new dataset, it makes lots of mistakes. In such a case,
 17 | the model has essentially memorized the training data at the cost of not being
 18 | able to *generalize* to new and unseen, yet similar, datasets. The risk of
 19 | overfitting usually increases with the size of the model and decreases with the
 20 | size of the training dataset.
 21 | 
 22 | A heuristic that can prevent models from overfitting on small datasets is based
 23 | on the observation that "good" parameter values in most models are typically
 24 | small: large parameter values often indicate overfitting.
 25 | 
 26 | One way to encourage a model to use small parameter values is to assume that the
 27 | parameter values are sampled from some prior distribution, rather than assuming
 28 | that all parameter values in the model are equally likely. In this way of
 29 | thinking about parameters, we can manipulate the prior distribution of the
 30 | parameter values to express our knowledge as modelers of the problem at hand.
 31 | 
 32 | In ``theanets``, regularization hyperparameters are provided when you train your
 33 | model:
 34 | 
 35 | .. code:: python
 36 | 
 37 |   net = theanets.Classifier(layers=[784, 1000, 784])
 38 |   net.train(..., hidden_l1=0.1)
 39 | 
 40 | Here we've specified that our model has a single, overcomplete hidden layer, and
 41 | then when we train it, we specify that the activity of the hidden units in the
 42 | network will be penalized with a 0.1 coefficient. The rest of this section
 43 | details the built-in regularizers that are available in ``theanets``.
 44 | 
 45 | Decay
 46 | =====
 47 | 
 48 | Using "weight decay," we assume that parameters in a model are drawn from a
 49 | zero-mean Gaussian distribution with an isotropic, modeler-specified standard
 50 | deviation. In terms of loss functions, this equates to adding a term to the loss
 51 | function that computes the :math:`L_2` norm of the parameter values in the
 52 | model:
 53 | 
 54 | .. math::
 55 |    \mathcal{L}(\cdot) = \dots + \lambda \| \theta \|_2^2
 56 | 
 57 | If the loss :math:`\mathcal{L}(\cdot)` represents some approximation to the
 58 | log-posterior distribution of the model parameters given the data
 59 | 
 60 | .. math::
 61 |    \mathcal{L}(\cdot) = \log p(\theta|x) \propto \dots + \lambda \| \theta \|_2^2
 62 | 
 63 | then the term with the :math:`L_2` norm on the parameters is like an unscaled
 64 | Gaussian distribution.
 65 | 
 66 | This type of regularization is specified using the ``weight_l2`` keyword
 67 | argument during training:
 68 | 
 69 | .. code:: python
 70 | 
 71 |   net.train(..., weight_l2=1e-4)
 72 | 
 73 | The value of the argument is the strength of the regularizer in the loss for the
 74 | model. Larger values create more pressure for small model weights.
 75 | 
 76 | Sparsity
 77 | ========
 78 | 
 79 | Sparse models have been shown to capture regularities seen in the mammalian
 80 | visual cortex. In addition, sparse models in machine learning are often more
 81 | performant than "dense" models (i.e., models without restriction on the hidden
 82 | representation). Furthermore, sparse models tend to yield latent representations
 83 | that are easier for humans to interpret than dense models.
 84 | 
 85 | There are two main types of sparsity regularizers provided with ``theanets``:
 86 | parameter sparsity and representation sparsity.
 87 | 
 88 | The first type of sparse regularizer is just like weight decay, but instead of
 89 | assuming that weights are drawn from a Gaussian distribution, here we assume
 90 | that weights in the model are drawn from a distribution with a taller peak at
 91 | zero and heavier tails, like a Laplace distribution. In terms of loss function,
 92 | this regularizer adds a term with an :math:`L_1` norm to the model:
 93 | 
 94 | .. math::
 95 |    \mathcal{L}(\cdot) = \dots + \lambda \| \theta \|_1
 96 | 
 97 | If the loss :math:`\mathcal{L}(\cdot)` represents some approximation to the
 98 | log-posterior distribution of the model parameters given the data
 99 | 
100 | .. math::
101 |    \mathcal{L}(\cdot) = \log p(\theta|x) \propto \dots + \lambda \| \theta \|_1
102 | 
103 | then this term is like an unscaled Laplace distribution. In practice, this
104 | regularizer encourages many of the model *parameters* to be zero.
105 | 
106 | In ``theanets``, this sparse parameter regularization is specified using the
107 | ``weight_l1`` keyword argument during training:
108 | 
109 | .. code:: python
110 | 
111 |   net.train(..., weight_l1=1e-4)
112 | 
113 | The value of the argument is the strength of the regularizer in the loss for the
114 | model. The larger the regularization parameter, the more pressure for
115 | zero-valued weights.
116 | 
117 | The second type of sparsity regularization puts pressure on the model to develop
118 | hidden *representations* that are mostly zero-valued. In this type of
119 | regularization, the model weights are penalized indirectly, since the hidden
120 | representation (i.e., the values of the hidden layer neurons in the network) are
121 | functions of both the model weights and the input data. In terms of loss
122 | functions, this regularizer adds a term to the loss that penalizes the
123 | :math:`L_1` norm of the hidden layer activations
124 | 
125 | .. math::
126 |    \mathcal{L}(\cdot) = \dots + \lambda \sum_{i=2}^{N-1} \| f_i(x) \|_1
127 | 
128 | where :math:`f_i(x)` represents the neuron activations of hidden layer
129 | :math:`i`.
130 | 
131 | Sparse hidden activations have shown much promise in computational neural
132 | networks. In ``theanets`` this type of regularization is specified using the
133 | ``hidden_l1`` keyword argument during training:
134 | 
135 | .. code:: python
136 | 
137 |   net.train(..., hidden_l1=0.1)
138 | 
139 | The value of the argument is the strength of the regularizer in the loss for the
140 | model. Large values create more pressure for hidden representations that use
141 | mostly zeros.
142 | 
143 | Noise
144 | =====
145 | 
146 | Another way of regularizing a model to prevent overfitting is to inject noise
147 | into the data or the representations during training. While noise could always
148 | be injected into the training batches manually, ``theanets`` provides two types
149 | of noise regularizers: additive Gaussian noise and multiplicative dropout
150 | (binary) noise.
151 | 
152 | In one method, zero-mean Gaussian noise is added to the input data or hidden
153 | representations. These are specified during training using the ``input_noise``
154 | and ``hidden_noise`` keyword arguments, respectively:
155 | 
156 | .. code:: python
157 | 
158 |   net.train(..., input_noise=0.1)
159 |   net.train(..., hidden_noise=0.1)
160 | 
161 | The value of the argument specifies the standard deviation of the noise.
162 | 
163 | In the other input regularization method, some of the inputs are randomly set to
164 | zero during training (this is sometimes called "dropout" or "multiplicative
165 | masking noise"). This type of noise is specified using the ``input_dropout`` and
166 | ``hidden_dropout`` keyword arguments, respectively:
167 | 
168 | .. code:: python
169 | 
170 |   net.train(..., input_dropout=0.3)
171 |   net.train(..., hidden_dropout=0.3)
172 | 
173 | The value of the argument specifies the fraction of values in each input or
174 | hidden activation that are randomly set to zero.
175 | 
176 | Instead of adding additional terms like the other regularizers, the noise
177 | regularizers can be seen as modifying the original loss for a model. For
178 | instance, consider an autoencoder model with two hidden layers:
179 | 
180 | .. code:: python
181 | 
182 |   net = theanets.Autoencoder([
183 |       100,
184 |       dict(size=50, name='a'),
185 |       dict(size=80, name='b'),
186 |       dict(size=100, name='o')])
187 | 
188 | The loss for this model, without regularization, can be written as:
189 | 
190 | .. math::
191 |    \mathcal{L}(X, \theta_a, \theta_b, \theta_o) = \frac{1}{mn} \sum_{i=1}^m \left\|
192 |       \sigma_b(\sigma_a(x_i\theta_a)\theta_b)\theta_o - x_i \right\|_2^2
193 | 
194 | where we've ignored the bias terms, and :math:`\theta_a`, :math:`\theta_b`, and
195 | :math:`\theta_o` are the parameters for layers a, b, and o, respectively. Also,
196 | :math:`\sigma_a` and :math:`\sigma_b` are the activation functions for their
197 | respective hidden layers.
198 | 
199 | If we train this model using input and hidden noise:
200 | 
201 | .. code:: python
202 | 
203 |   net.train(..., input_noise=q, hidden_noise=r)
204 | 
205 | then the loss becomes:
206 | 
207 | .. math::
208 |    \mathcal{L}(X, \theta_a, \theta_b, \theta_o) = \frac{1}{mn} \sum_{i=1}^m \left\|
209 |       \left( \sigma_b\left(
210 |       (\sigma_a((x_i+\epsilon_q)\theta_a)+\epsilon_r)\theta_b \right) +
211 |       \epsilon_r \right)\theta_o - x_i \right\|_2^2
212 | 
213 | where :math:`\epsilon_q` is white Gaussian noise drawn from
214 | :math:`\mathcal{N}(0, qI)` and :math:`\epsilon_r` is white Gaussian noise drawn
215 | separately for each hidden layer from :math:`\mathcal{N}(0, rI)`. The additive
216 | noise pushes the data and the representations off of their respective manifolds,
217 | but the loss is computed with respect to the uncorrupted input. This is thought
218 | to encourage the model to develop representations that push towards the true
219 | manifold of the data.
220 | 
221 | Predefined Regularizers
222 | =======================
223 | 
224 | .. automodule:: theanets.regularizers
225 |    :no-members:
226 |    :no-inherited-members:
227 | 
228 | .. autosummary::
229 |    :toctree: generated/
230 | 
231 |    Regularizer
232 |    HiddenL1
233 |    WeightL1
234 |    WeightL2
235 |    Contractive
236 |    RecurrentNorm
237 |    RecurrentState
238 |    BernoulliDropout
239 |    GaussianNoise
240 | 
241 | .. _regularizers-custom:
242 | 
243 | Custom Regularizers
244 | ===================
245 | 
246 | To create a custom regularizer in ``theanets``, you need to create a custom
247 | subclass of the :class:`theanets.Regularizer
248 | <theanets.regularizers.Regularizer>` class, and then provide this regularizer
249 | when you run your model.
250 | 
251 | To illustrate, let's suppose you created a linear autoencoder model that had a
252 | larger hidden layer than your dataset:
253 | 
254 | .. code:: python
255 | 
256 |   net = theanets.Autoencoder([4, (8, 'linear'), (4, 'tied')])
257 | 
258 | Then, at least in theory, you risk learning an uninteresting "identity" model
259 | such that some hidden units are never used, and the ones that are have weights
260 | equal to the identity matrix. To prevent this from happening, you can impose a
261 | sparsity penalty when you train your model:
262 | 
263 | .. code:: python
264 | 
265 |   net.train(..., hidden_l1=0.001)
266 | 
267 | But then you might run into a situation where the sparsity penalty drives some
268 | of the hidden units in the model to zero, to "save" loss during training.
269 | Zero-valued features are probably not so interesting, so we can introduce
270 | another penalty to prevent feature weights from going to zero:
271 | 
272 | .. code:: python
273 | 
274 |   class WeightInverse(theanets.Regularizer):
275 |       def loss(self, layers, outputs):
276 |           return sum((1 / (p * p).sum(axis=0)).sum()
277 |                      for l in layers for p in l.params
278 |                      if p.ndim == 2)
279 | 
280 |   net = theanets.Autoencoder([4, (8, 'linear'), (4, 'tied')])
281 |   net.train(..., hidden_l1=0.001, weightinverse=0.001)
282 | 
283 | This code adds a new regularizer that penalizes the inverse of the squared
284 | length of each of the weights in the model's layers. Here we detect weights by
285 | only including parameters with 2 dimensions.
286 | 


--------------------------------------------------------------------------------
/docs/api/trainers.rst:
--------------------------------------------------------------------------------
 1 | .. _trainers:
 2 | 
 3 | ========
 4 | Trainers
 5 | ========
 6 | 
 7 | The most common method for training a neural network model is to use a
 8 | stochastic gradient-based optimizer. In ``theanets`` many of these algorithms
 9 | are available by interfacing with the ``downhill`` package:
10 | 
11 | - ``sgd``: `Stochastic gradient descent`_
12 | - ``nag``: `Nesterov's accelerated gradient`_
13 | - ``rprop``: `Resilient backpropagation`_
14 | - ``rmsprop``: RMSProp_
15 | - ``adadelta``: ADADELTA_
16 | - ``esgd``: `Equilibrated SGD`_
17 | - ``adam``: Adam_
18 | 
19 | .. _Stochastic gradient descent: http://downhill.readthedocs.org/en/stable/generated/downhill.first_order.SGD.html
20 | .. _Nesterov's accelerated gradient: http://downhill.readthedocs.org/en/stable/generated/downhill.first_order.NAG.html
21 | .. _Resilient backpropagation: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.RProp.html
22 | .. _RMSProp: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.RMSProp.html
23 | .. _ADADELTA: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.ADADELTA.html
24 | .. _Equilibrated SGD: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.ESGD.html
25 | .. _Adam: http://downhill.readthedocs.org/en/stable/generated/downhill.adaptive.Adam.html
26 | 
27 | In addition to the optimization algorithms provided by ``downhill``,
28 | ``theanets`` defines a few algorithms that are more specific to neural networks.
29 | These trainers tend to take advantage of the layered structure of the loss
30 | function for a network.
31 | 
32 | - ``sample``: :class:`Sample trainer <theanets.trainer.SampleTrainer>`
33 | 
34 | This trainer sets model parameters directly to samples drawn from the training
35 | data. This is a very fast "training" algorithm since all updates take place at
36 | once; however, often features derived directly from the training data require
37 | further tuning to perform well.
38 | 
39 | - ``layerwise``: :class:`Layerwise (supervised) pretrainer <theanets.trainer.SupervisedPretrainer>`
40 | 
41 | Greedy supervised layerwise pre-training: This trainer applies RMSProp to each
42 | layer sequentially.
43 | 
44 | - ``pretrain``: :class:`Unsupervised pretrainer <theanets.trainer.UnsupervisedPretrainer>`
45 | 
46 | Greedy unsupervised layerwise pre-training: This trainer applies RMSProp to a
47 | tied-weights "shadow" autoencoder using an unlabeled dataset, and then transfers
48 | the learned autoencoder weights to the model being trained.
49 | 


--------------------------------------------------------------------------------
/docs/api/utils.rst:
--------------------------------------------------------------------------------
 1 | .. _utilities:
 2 | 
 3 | =========
 4 | Utilities
 5 | =========
 6 | 
 7 | Recurrent helpers
 8 | =================
 9 | 
10 | .. automodule:: theanets.recurrent
11 |    :no-members:
12 |    :no-inherited-members:
13 | 
14 | .. autosummary::
15 |    :toctree: generated/
16 | 
17 |    batches
18 |    Text
19 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | import better
 5 | 
 6 | extensions = [
 7 |     'sphinx.ext.autodoc',
 8 |     'sphinx.ext.autosummary',
 9 |     'sphinx.ext.intersphinx',
10 |     'sphinx.ext.mathjax',
11 |     'sphinx.ext.viewcode',
12 |     'numpydoc',
13 |     ]
14 | autosummary_generate = True
15 | autodoc_default_flags = ['members']
16 | numpydoc_show_class_members = False
17 | numpydoc_show_inherited_class_members = True
18 | source_suffix = '.rst'
19 | source_encoding = 'utf-8-sig'
20 | master_doc = 'index'
21 | project = u'Theanets'
22 | copyright = u'2015, Leif Johnson'
23 | version = '0.8'
24 | release = '0.8.0pre'
25 | exclude_patterns = ['_build']
26 | templates_path = ['_templates']
27 | pygments_style = 'tango'
28 | 
29 | html_theme = 'better'
30 | html_theme_path = [better.better_theme_path]
31 | html_theme_options = dict(
32 |   rightsidebar=False,
33 |   inlinecss='',
34 |   cssfiles=['_static/style-tweaks.css'],
35 |   showheader=True,
36 |   showrelbartop=True,
37 |   showrelbarbottom=True,
38 |   linktotheme=True,
39 |   sidebarwidth='15rem',
40 |   textcolor='#111',
41 |   headtextcolor='#333',
42 |   footertextcolor='#333',
43 |   ga_ua='',
44 |   ga_domain='',
45 | )
46 | html_short_title = 'Home'
47 | html_static_path = ['_static']
48 | 
49 | 
50 | def h(xs):
51 |     return ['{}.html'.format(x) for x in xs.split()]
52 | html_sidebars = {
53 |     'index': h('gitwidgets globaltoc sourcelink searchbox'),
54 |     '**': h('gitwidgets localtoc sourcelink searchbox'),
55 | }
56 | 
57 | intersphinx_mapping = {
58 |     'python': ('https://docs.python.org/3.4/', None),
59 |     'downhill': ('http://downhill.readthedocs.org/en/stable/', None),
60 |     'numpy': ('http://docs.scipy.org/doc/numpy/', None),
61 |     'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None),
62 | }
63 | 


--------------------------------------------------------------------------------
/docs/examples/mnist-classifier.rst:
--------------------------------------------------------------------------------
  1 | ========================
  2 | Classifying MNIST Digits
  3 | ========================
  4 | 
  5 | A standard benchmark for neural network :class:`classification
  6 | <theanets.feedforward.Classifier>` is the `MNIST digits dataset
  7 | <http://yann.lecun.com/exdb/mnist/>`_, a set of 70,000 28×28 images of
  8 | hand-written digits. Each MNIST digit is labeled with the correct digit class
  9 | (0, 1, ... 9). This example shows how to use ``theanets`` to create and train a
 10 | model that can perform this task.
 11 | 
 12 | .. image:: ../_static/mnist-digits-small.png
 13 | 
 14 | Networks for classification map a layer of continuous-valued inputs, through one
 15 | or more hidden layers, to an output layer that is activated through the `softmax
 16 | function`_. The softmax generates output that can be treated as a categorical
 17 | distribution over the digit labels given the input image.
 18 | 
 19 | .. _softmax function: http://en.wikipedia.org/wiki/Softmax_function
 20 | 
 21 | Defining the model
 22 | ------------------
 23 | 
 24 | Now that you know which model to use for this task, you'll need to define some
 25 | hyperparameters that determine the structure of your network. The most important
 26 | of these is choosing a set of layer sizes that you want in your model.
 27 | 
 28 | The first ("input") and last ("output") layers in your network must match the
 29 | size of the data you'll be providing. For an MNIST classification task, this
 30 | means your network must have 784 inputs (one for each image pixel) and 10
 31 | outputs (one for each class).
 32 | 
 33 | Between the input and output layers, on the other hand, can be any number of
 34 | so-called "hidden" layers, in almost any configuration. Models with more than
 35 | about two hidden layers are commonly called "deep" models and have been quite
 36 | popular recently due to their success on a variety of difficult machine learning
 37 | problems. For now, though, to keep things simple, let's start out with a model
 38 | that just has one hidden layer with 100 units.
 39 | 
 40 | Once you've chosen the layers you want in your model, you typically pass the
 41 | layers to the model constructor::
 42 | 
 43 |   net = theanets.Classifier(layers=[784, 100, 10])
 44 | 
 45 | This is all that's required to get started. There are many different
 46 | hyperparameters that can also be useful when constructing a model; see
 47 | :ref:`guide-creating` for more information. Particularly useful to know will be
 48 | the different ways of creating layers; see
 49 | :ref:`guide-creating-specifying-layers` for details.
 50 | 
 51 | Preparing the data
 52 | ------------------
 53 | 
 54 | In ``theanets``, the parameters of a model are initialized randomly. To improve
 55 | the model's performance on the task, you'll need to train the model parameters.
 56 | This training process requires a dataset to compute gradient and loss function
 57 | values.
 58 | 
 59 | In the case of the MNIST digits, our classifier model will consume a dataset
 60 | consisting of two parts---"samples" (image pixels) and corresponding "labels"
 61 | (integer class values). Each of these parts is provided as a ``numpy`` array:
 62 | the samples are a two-dimensional array, with vectorized MNIST pixels arranged
 63 | along the first axis and pixel data arranged along the second axis; the labels
 64 | are a one-dimensional array, with one integer value per MNIST image.
 65 | 
 66 | For easy access to the MNIST digits dataset, we'll use the ``skdata`` package
 67 | and write a little bit of glue code to get the data into the desired format::
 68 | 
 69 |   def load_mnist():
 70 |       mnist = skdata.mnist.dataset.MNIST()
 71 |       mnist.meta  # trigger download if needed.
 72 |       def arr(n, dtype):
 73 |           # convert an array to the proper shape and dtype
 74 |           arr = mnist.arrays[n]
 75 |           return arr.reshape((len(arr), -1)).astype(dtype)
 76 |       train_images = arr('train_images', 'f') / 255.
 77 |       train_labels = arr('train_labels', np.uint8)
 78 |       test_images = arr('test_images', 'f') / 255.
 79 |       test_labels = arr('test_labels', np.uint8)
 80 |       return ((train_images[:50000], train_labels[:50000, 0]),
 81 |               (train_images[50000:], train_labels[50000:, 0]),
 82 |               (test_images, test_labels[:, 0]))
 83 | 
 84 | Here we've rescaled the image data so that each pixel lies in the interval [0,
 85 | 1] instead of the default [0, 255]. (In general, it's a good idea to standardize
 86 | the data for your problem so that each dimension has approximately the same
 87 | scale.) We've also reshaped the data as described above.
 88 | 
 89 | .. note::
 90 | 
 91 |    Because ``theanets`` uses Theano for its computations, most datasets need to
 92 |    be cast to a value that is compatible with your setting for
 93 |    `Theano's "floatX" configuration parameter`_. Unless you have a really
 94 |    expensive GPU, this is likely to mean that you need to use 32-bit floats.
 95 | 
 96 | .. _Theano's "floatX" configuration parameter: http://deeplearning.net/software/theano/library/config.html#config.floatX
 97 | 
 98 | The load function returns a training split (the first 50000 examples), a
 99 | validation split (the remainder of the training data from ``skdata``, containing
100 | 10000 examples), and a test split (the test split from ``skdata``, containing
101 | 10000 examples). The training dataset is used to compute parameter updates, and
102 | the validation dataset is used to determine when the model has stopped
103 | improving during training.
104 | 
105 | There are other ways to provide data to your model during training; for a more
106 | complete description, see :ref:`guide-training-providing-data`.
107 | 
108 | Training the model
109 | ------------------
110 | 
111 | Now that you have a model and some data, you're ready to train the model so that
112 | it performs the classification task as well as possible. Models are set up to
113 | handle training with fairly little work.
114 | 
115 | The main decision to make during training is to choose the training algorithm to
116 | use, along with values for any associated hyperparameters. This is most
117 | naturally accomplished using the :func:`Network.train()
118 | <theanets.graph.Network.train>` method::
119 | 
120 |   train, valid, test = load_mnist()
121 | 
122 |   net.train(train,
123 |             valid,
124 |             algo='nag',
125 |             learning_rate=1e-3,
126 |             momentum=0.9)
127 | 
128 | The first positional argument to this method is the training dataset, and the
129 | second (if provided) is a validation dataset. If a validation dataset is not
130 | provided, the training dataset will be used for validation.
131 | 
132 | The ``algo`` keyword argument specifies an algorithm to use for training. If you
133 | do not provide a value for this argument, :class:`RMSProp
134 | <downhill.adaptive.RMSProp>` is currently used as the default training
135 | algorithm. Any subsequent keyword arguments will be passed to the training
136 | algorithm; these arguments typically specify hyperparameters of the algorithm
137 | like the learning rate and so forth.
138 | 
139 | The available training methods are described in :ref:`trainers`; here we've
140 | specified Nesterov's Accelerated Gradient, a type of stochastic gradient descent
141 | with momentum.
142 | 
143 | Visualizing features
144 | --------------------
145 | 
146 | Once you've trained a classification model for MNIST digits, it can be
147 | informative to visually inspect the features that the model has learned. Because
148 | the model was trained using the MNIST digits, you can reshape the learned
149 | features and visualize them as though they were 28×28 images::
150 | 
151 |   img = np.zeros((28 * 10, 28 * 10), dtype='f')
152 |   for i, pix in enumerate(net.find('hid1', 'w').get_value().T):
153 |       r, c = divmod(i, 10)
154 |       img[r * 28:(r+1) * 28, c * 28:(c+1) * 28] = pix.reshape((28, 28))
155 |   plt.imshow(img, cmap=plt.cm.gray)
156 |   plt.show()
157 | 
158 | In this example, the weights in layer 1 connect the inputs to the first hidden
159 | layer; these weights have one column of 784 values for each hidden node in the
160 | network, so we can iterate over the transpose and put each column---properly
161 | reshaped---into a giant image.
162 | 
163 | The trained model can also be used to predict the class for a new MNIST digit::
164 | 
165 |   predicted_class = net.predict(new_digit)
166 | 
167 | For more information on the things you can do with a model, see
168 | :ref:`guide-using`.
169 | 


--------------------------------------------------------------------------------
/docs/examples/recurrent-memory.rst:
--------------------------------------------------------------------------------
 1 | ==========================
 2 | Remembering Network Inputs
 3 | ==========================
 4 | 
 5 | Recurrent neural networks are a family of network models whose computation graph
 6 | contains a cycle---that is, there are some layers in a recurrent network whose
 7 | outputs at a certain time step depend not only on the inputs at that time step,
 8 | but also on the state of the network at some previous time step as well.
 9 | 
10 | Recurrent networks, while often quite tricky to train, can be used to solve
11 | difficult modeling tasks. Thanks to recent advances in optimization algorithms,
12 | recurrent networks are enjoying a resurgence in popularity and have been shown
13 | to be quite effective at a number of different temporal modeling tasks.
14 | 
15 | In this section we consider a classic task for a recurrent network: remembering
16 | data from past inputs. In this task, a network model receives one input value at
17 | each time step. The network is to remember the first :math:`k` values, then wait
18 | for :math:`t` time steps, and then reproduce the first :math:`k` values that it
19 | saw. Effectively the model must ignore the inputs after time step :math:`k` and
20 | start producing the desired output at time step :math:`k + t`.
21 | 
22 | Defining the model
23 | ==================
24 | 
25 | We'll set up a recurrent model by creating a :class:`recurrent regression
26 | <theanets.recurrent.Regressor>` instance::
27 | 
28 |   net = theanets.recurrent.Regressor(layers=[1, ('lstm', 10), 1])
29 | 
30 | Our network has three layers: the first just has one input unit, the next is a
31 | Long Short-Term Memory (LSTM) recurrent layer with ten units, and the output is
32 | a linear layer with just one output unit. This is just one way of specifying
33 | layers in a network; for more details see
34 | :ref:`guide-creating-specifying-layers`.
35 | 
36 | Training the model
37 | ==================
38 | 
39 | The most difficult part of training this model is creating the required data. To
40 | compute the loss for a recurrent regression model in ``theanets``, we need to
41 | provide two arrays of data---one input array, and one target output array. Each
42 | of these arrays must have three dimensions: the first is time, the second is the
43 | batch size, and the third is the number of inputs/outputs in the dataset.
44 | 
45 | For the memory task, we can easily create random arrays with the appropriate
46 | shape. We just need to make sure that the last :math:`k` time steps of the
47 | output are set to the first :math:`k` time steps of the input::
48 | 
49 |   T = 20
50 |   K = 3
51 |   BATCH_SIZE = 32
52 | 
53 |   def generate():
54 |       s, t = np.random.randn(2, T, BATCH_SIZE, 1).astype('f')
55 |       s[:K] = t[-K:] = np.random.randn(K, BATCH_SIZE, 1)
56 |       return [s, t]
57 | 
58 | In ``theanets``, data can be provided to a trainer in several ways; here we've
59 | used a callable that generates batches of data for us. See
60 | :ref:`guide-training-providing-data` for more information.
61 | 
62 | Having set up a way to create training data, we just need to pass this along to
63 | our training algorithm::
64 | 
65 |   net.train(generate, algo='rmsprop')
66 | 
67 | This process will adjust the weights in the model so that the outputs of the
68 | model, given the inputs, will be closer and closer to the targets that we
69 | provide.
70 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | THEANETS
  3 | ========
  4 | 
  5 | The ``theanets`` package is a deep learning and neural network toolkit. It is
  6 | written in Python to interoperate with excellent tools like ``numpy`` and
  7 | ``scikit-learn``, and it uses Theano_ to accelerate computations when possible
  8 | using your GPU. The package aims to provide:
  9 | 
 10 | - a simple API for building and training common types of neural network models;
 11 | - thorough documentation;
 12 | - easy-to-read code;
 13 | - and, under the hood, a fully expressive graph computation framework.
 14 | 
 15 | The package strives to "make the easy things easy and the difficult things
 16 | possible." Please try it out, and let us know what you think!
 17 | 
 18 | The source code for ``theanets`` lives at http://github.com/lmjohns3/theanets,
 19 | the documentation lives at http://theanets.readthedocs.org, and announcements
 20 | and discussion happen on the `mailing list`_.
 21 | 
 22 | .. _Theano: http://deeplearning.net/software/theano/
 23 | .. _mailing list: https://groups.google.com/forum/#!forum/theanets
 24 | 
 25 | Quick Start: Classification
 26 | ===========================
 27 | 
 28 | Suppose you want to create a classifier and train it on some 100-dimensional
 29 | data points that you've classified into 10 categories. No problem! With just a
 30 | few lines you can (a) provide some data, (b) build and (c) train a model,
 31 | and (d) evaluate the model::
 32 | 
 33 |   import theanets
 34 |   from sklearn.datasets import make_classification
 35 |   from sklearn.metrics import confusion_matrix
 36 | 
 37 |   # Create a classification dataset.
 38 |   X, y = make_classification(
 39 |       n_samples=3000, n_features=100, n_classes=10, n_informative=10)
 40 |   X = X.astype('f')
 41 |   y = y.astype('i')
 42 |   cut = int(len(X) * 0.8)  # training / validation split
 43 |   train = X[:cut], y[:cut]
 44 |   valid = X[cut:], y[cut:]
 45 | 
 46 |   # Build a classifier model with 100 inputs and 10 outputs.
 47 |   net = theanets.Classifier([100, 10])
 48 | 
 49 |   # Train the model using SGD with momentum.
 50 |   net.train(train, valid, algo='sgd', learning_rate=1e-4, momentum=0.9)
 51 | 
 52 |   # Show confusion matrices on the training/validation splits.
 53 |   for label, (X, y) in (('training:', train), ('validation:', valid)):
 54 |       print(label)
 55 |       print(confusion_matrix(y, net.predict(X)))
 56 | 
 57 | Layers
 58 | ------
 59 | 
 60 | The model above is quite simplistic! Make it a bit more sophisticated by adding
 61 | a hidden layer::
 62 | 
 63 |   net = theanets.Classifier([100, 1000, 10])
 64 | 
 65 | In fact, you can just as easily create 3 (or any number of) hidden layers::
 66 | 
 67 |   net = theanets.Classifier([
 68 |       100, 1000, 1000, 1000, 10])
 69 | 
 70 | By default, hidden layers use the relu transfer function. By passing a tuple
 71 | instead of just an integer, you can change some of these layers to use different
 72 | :mod:`activations <theanets.activations>`::
 73 | 
 74 |   maxout = (1000, 'maxout:4')  # maxout with 4 pieces.
 75 |   net = theanets.Classifier([
 76 |       100, 1000, maxout, (1000, 'tanh'), 10])
 77 | 
 78 | By passing a dictionary instead, you can specify even more attributes of each
 79 | :mod:`layer <theanets.layers.base>`, like how its parameters are initialized::
 80 | 
 81 |   # Sparsely-initialized layer with large nonzero weights.
 82 |   foo = dict(name='foo', size=1000, std=1, sparsity=0.9)
 83 |   net = theanets.Classifier([
 84 |       100, foo, (1000, 'maxout:4'), (1000, 'tanh'), 10])
 85 | 
 86 | Specifying layers is the heart of building models in ``theanets``. Read more
 87 | about this in :ref:`guide-creating-specifying-layers`.
 88 | 
 89 | Regularization
 90 | --------------
 91 | 
 92 | Adding regularizers is easy, too! Just pass them to the training method. For
 93 | instance, you can train up a sparse classification model with weight decay::
 94 | 
 95 |   # Penalize hidden-unit activity (L1 norm) and weights (L2 norm).
 96 |   net.train(train, valid, hidden_l1=0.001, weight_l2=0.001)
 97 | 
 98 | In ``theanets`` dropout is treated as a regularizer and can be set on many
 99 | layers at once::
100 | 
101 |   net.train(train, valid, hidden_dropout=0.5)
102 | 
103 | or just on a specific layer::
104 | 
105 |   net.train(train, valid, dropout={'foo:out': 0.5})
106 | 
107 | Similarly, you can add Gaussian noise to any of the layers (here, just to the
108 | input layer)::
109 | 
110 |   net.train(train, valid, input_noise=0.3)
111 | 
112 | Optimization Algorithms
113 | -----------------------
114 | 
115 | You can optimize your model using any of the algorithms provided by downhill_
116 | (SGD, NAG, RMSProp, ADADELTA, etc.), or additionally using a couple of
117 | :mod:`pretraining methods <theanets.trainer>` specific to neural networks.
118 | 
119 | .. _downhill: http://downhill.readthedocs.org/
120 | .. _pretraining methods: http://theanets.readthedocs.org/en/latest/reference.html#module-theanets.trainer
121 | 
122 | You can also make as many successive calls to :func:`train()
123 | <theanets.graph.Network.train>` as you like. Each call can include different
124 | training algorithms::
125 | 
126 |   net.train(train, valid, algo='rmsprop')
127 |   net.train(train, valid, algo='nag')
128 | 
129 | different learning hyperparameters::
130 | 
131 |   net.train(train, valid, algo='rmsprop', learning_rate=0.1)
132 |   net.train(train, valid, algo='rmsprop', learning_rate=0.01)
133 | 
134 | and different regularization hyperparameters::
135 | 
136 |   net.train(train, valid, input_noise=0.7)
137 |   net.train(train, valid, input_noise=0.3)
138 | 
139 | Training models is a bit more art than science, but ``theanets`` tries to make
140 | it easy to evaluate different training approaches. Read more about this in
141 | :ref:`guide-training`.
142 | 
143 | Quick Start: Recurrent Models
144 | =============================
145 | 
146 | Recurrent neural networks are becoming quite important for many sequence-based
147 | tasks in machine learning; one popular toy example for recurrent models is to
148 | generate text that's similar to some body of training text.
149 | 
150 | In these models, a recurrent classifier is set up to predict the identity of the
151 | next character in a sequence of text, given all of the preceding characters. The
152 | inputs to the model are the one-hot encodings of a sequence of characters from
153 | the text, and the corresponding outputs are the class labels of the subsequent
154 | character. The ``theanets`` code has a :class:`Text <theanets.recurrent.Text>`
155 | helper class that provides easy encoding and decoding of text to and from
156 | integer classes; using the helper makes the top-level code look like::
157 | 
158 |   import numpy as np, re, theanets
159 | 
160 |   chars = re.sub(r'\s+', ' ', open('corpus.txt').read().lower())
161 |   txt = theanets.recurrent.Text(chars, min_count=10)
162 |   A = 1 + len(txt.alpha)  # of letter classes
163 | 
164 |   # create a model to train: input -> gru -> relu -> softmax.
165 |   net = theanets.recurrent.Classifier([
166 |       A, (100, 'gru'), (1000, 'relu'), A])
167 | 
168 |   # train the model iteratively; draw a sample after every epoch.
169 |   seed = txt.encode(txt.text[300017:300050])
170 |   for tm, _ in net.itertrain(txt.classifier_batches(100, 32), momentum=0.9):
171 |       print('{}|{} ({:.1f}%)'.format(
172 |           txt.decode(seed),
173 |           txt.decode(net.predict_sequence(seed, 40)),
174 |           100 * tm['acc']))
175 | 
176 | This example uses several features of ``theanets`` that make modeling neural
177 | networks fun and interesting. The model uses a layer of :class:`Gated Recurrent
178 | Units <theanets.layers.recurrent.GRU>` to capture the temporal dependencies in
179 | the data. It also `uses a callable`_ to provide data to the model, and takes
180 | advantage of `iterative training`_ to sample an output from the model after each
181 | training epoch.
182 | 
183 | .. _uses a callable: http://downhill.readthedocs.org/en/stable/guide.html#data-using-callables
184 | .. _iterative training: http://downhill.readthedocs.org/en/stable/guide.html#iterative-optimization
185 | 
186 | To run this example, download a text you'd like to model (e.g., Herman
187 | Melville's *Moby Dick*) and save it in ``corpus.txt``::
188 | 
189 |   curl http://www.gutenberg.org/cache/epub/2701/pg2701.txt > corpus.txt
190 | 
191 | Then when you run the script, the output might look something like this
192 | (abbreviated to show patterns)::
193 | 
194 |   used for light, but only as an oi|pr vgti ki nliiariiets-a, o t.;to niy  , (16.6%)
195 |   used for light, but only as an oi|s bafsvim-te i"eg nadg tiaraiatlrekls tv (20.2%)
196 |   used for light, but only as an oi|vetr uob bsyeatit is-ad. agtat girirole, (28.5%)
197 |   used for light, but only as an oi|siy thinle wonl'th, in the begme sr"hey  (29.9%)
198 |   used for light, but only as an oi|nr. bonthe the tuout honils ohe thib th  (30.5%)
199 |   used for light, but only as an oi|kg that mand sons an, of,rtopit bale thu (31.0%)
200 |   used for light, but only as an oi|nsm blasc yan, ang theate thor wille han (32.1%)
201 |   used for light, but only as an oi|b thea mevind, int amat ars sif istuad p (33.3%)
202 |   used for light, but only as an oi|msenge bie therale hing, aik asmeatked s (34.1%)
203 |   used for light, but only as an oi|ge," rrermondy ghe e comasnig that urle  (35.5%)
204 |   used for light, but only as an oi|s or thartich comase surt thant seaiceng (36.1%)
205 |   used for light, but only as an oi|s lot fircennor, unding dald bots trre i (37.1%)
206 |   used for light, but only as an oi|st onderass noptand. "peles, suiondes is (38.2%)
207 |   used for light, but only as an oi|gnith. s. lited, anca! stobbease so las, (39.3%)
208 |   used for light, but only as an oi|chics fleet dong berieribus armor has or (40.1%)
209 |   used for light, but only as an oi|cs and quirbout detom tis glome dold pco (41.1%)
210 |   used for light, but only as an oi|nht shome wand, the your at movernife lo (42.0%)
211 |   used for light, but only as an oi|r a reald hind the, with of the from sti (43.0%)
212 |   used for light, but only as an oi|t beftect. how shapellatgen the fortower (44.0%)
213 |   used for light, but only as an oi|rtucated fanns dountetter from fom to wi (45.2%)
214 |   used for light, but only as an oi|r the sea priised tay queequings hearhou (46.8%)
215 |   used for light, but only as an oi|ld, wode, i long ben! but the gentived.  (48.0%)
216 |   used for light, but only as an oi|r wide-no nate was him. "a king to had o (49.1%)
217 |   used for light, but only as an oi|l erol min't defositanable paring our. 4 (50.0%)
218 |   used for light, but only as an oi|l the motion ahab, too, and relay in aha (51.0%)
219 |   used for light, but only as an oi|n dago, and contantly used the coil; but (52.3%)
220 |   used for light, but only as an oi|l starbuckably happoss of the fullies ti (52.4%)
221 |   used for light, but only as an oi|led-bubble most disinuan into the mate-- (53.3%)
222 |   used for light, but only as an oi|len. ye?' 'tis though moby starbuck, and (53.6%)
223 |   used for light, but only as an oi|l, and the pequodeers. but was all this: (53.9%)
224 |   used for light, but only as an oi|ling his first repore to the pequod, sym (54.4%)
225 |   used for light, but only as an oi|led escried; we they like potants--old s (54.3%)
226 |   used for light, but only as an oi|l-ginqueg! i save started her supplain h (54.3%)
227 |   used for light, but only as an oi|l is, the captain all this mildly bounde (54.9%)
228 | 
229 | Here, the seed text is shown left of the pipe character, and the randomly
230 | sampled sequence follows. In parantheses are the per-character accuracy values
231 | on the training set while training the model. The pattern of learning proceeds
232 | from almost-random character generation, to producing groups of letters
233 | separated by spaces, to generating words that seem like they might belong in
234 | *Moby Dick*, things like "captain," "ahab, too," and "constantly used the coil."
235 | 
236 | Much amusement can be derived from a temporal model extending itself forward in
237 | this way. After all, how else would we ever think of "Pequodeers,"
238 | "Starbuckably," or "Ginqueg"?!
239 | 
240 | User Guide
241 | ==========
242 | 
243 | .. toctree::
244 |    :maxdepth: 2
245 | 
246 |    guide
247 | 
248 | Examples
249 | ========
250 | 
251 | .. toctree::
252 |    :maxdepth: 2
253 |    :glob:
254 | 
255 |    examples/*
256 | 
257 | API Documentation
258 | =================
259 | 
260 | .. toctree::
261 |    :maxdepth: 2
262 |    :glob:
263 | 
264 |    api/models
265 |    api/layers
266 |    api/activations
267 |    api/losses
268 |    api/regularizers
269 |    api/trainers
270 |    api/utils
271 | 
272 | .. toctree::
273 |    :hidden:
274 | 
275 |    api/reference
276 | 
277 | Indices and tables
278 | ==================
279 | 
280 | - :ref:`genindex`
281 | - :ref:`modindex`
282 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\theanets.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\theanets.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpydoc
2 | sphinx-better-theme
3 | 


--------------------------------------------------------------------------------
/examples/cifar-autoencoder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import click
 4 | import matplotlib.pyplot as plt
 5 | import numpy as np
 6 | import theanets
 7 | 
 8 | from utils import load_cifar, plot_layers, plot_images
 9 | 
10 | K = 655  # this value of K retains 99% of the variance in the cifar images.
11 | 
12 | def pca(dataset):
13 |     mean = dataset[:3000].mean(axis=0)
14 | 
15 |     theanets.log('computing whitening transform')
16 |     x = dataset[:3000] - mean
17 |     vals, vecs = np.linalg.eigh(np.dot(x.T, x) / len(x))
18 |     vals = vals[::-1]
19 |     vecs = vecs[:, ::-1]
20 | 
21 |     vals = np.sqrt(vals[:K])
22 |     vecs = vecs[:, :K]
23 | 
24 |     def whiten(x):
25 |         return np.dot(x, np.dot(vecs, np.diag(1. / vals)))
26 | 
27 |     def color(z):
28 |         return np.dot(z, np.dot(np.diag(vals), vecs.T))
29 | 
30 |     return whiten, color
31 | 
32 | 
33 | @click.command()
34 | @click.option('--features', default=None, type=int, metavar='N',
35 |               help='Train a model with NxN hidden features.')
36 | def main(features):
37 |     train, valid, _ = load_cifar()
38 | 
39 |     whiten, color = pca(train[0])
40 | 
41 |     feat = features or int(np.sqrt(2 * K))
42 |     n = theanets.Autoencoder([K, feat ** 2, K])
43 |     n.train(whiten(train), whiten(valid), input_noise=1, train_batches=313)
44 | 
45 |     plot_layers([
46 |         color(n.find('hid1', 'w').get_value().T).T,
47 |         color(n.find('out', 'w').get_value())], channels=3)
48 |     plt.tight_layout()
49 |     plt.show()
50 | 
51 |     valid = whiten(valid[:100])
52 |     plot_images(color(valid), 121, 'Sample data', channels=3)
53 |     plot_images(color(n.predict(valid)), 122,
54 |                 'Reconstructed data', channels=3)
55 |     plt.tight_layout()
56 |     plt.show()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/examples/lstm-chime.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''Theanets example using a deep bidirectional LSTM for phoneme classification.
  4 | 
  5 | This example loads an audio classification benchmark from github, defines a
  6 | callable for extracting batches from the downloaded dataset, and trains a deep
  7 | classifier network on the data. The network that is evaluated as part of the
  8 | benchmarks is a three-layer bidirectional LSTM. Typically the model exceeds 90%
  9 | accuracy on the training set, but reaches only about 70% accuracy on the
 10 | validation set. Clearly overtraining is a critical issue here.
 11 | 
 12 | This example only works with Python 2 at the moment.
 13 | '''
 14 | 
 15 | import io
 16 | import numpy as np
 17 | import theanets
 18 | import scipy.io
 19 | import os
 20 | import tempfile
 21 | import urllib
 22 | import zipfile
 23 | 
 24 | BATCH_SIZE = 32
 25 | TRAIN_NC = os.path.join(tempfile.gettempdir(), 'chime1_train.nc')
 26 | VALID_NC = os.path.join(tempfile.gettempdir(), 'chime1_valid.nc')
 27 | ZIPURL = 'https://github.com/craffel/lstm_benchmarks/archive/master.zip'
 28 | 
 29 | # If needed, get the data files from https://github.com/craffel/lstm_benchmarks.
 30 | if not os.path.isfile(TRAIN_NC) or not os.path.isfile(VALID_NC):
 31 |     theanets.log('attempting data copy from url: {}', ZIPURL)
 32 |     z = zipfile.ZipFile(io.BytesIO(urllib.urlopen(ZIPURL).read()))
 33 |     with open(TRAIN_NC, 'wb') as savefile:
 34 |         savefile.write(z.read('lstm_benchmarks-master/data/train_1_speaker.nc'))
 35 |     with open(VALID_NC, 'wb') as savefile:
 36 |         savefile.write(z.read('lstm_benchmarks-master/data/val_1_speaker.nc'))
 37 |     z.close()
 38 | 
 39 | 
 40 | def batch_at(features, labels, seq_begins, seq_lengths):
 41 |     '''Extract a single batch of data to pass to the model being trained.
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     features, labels : ndarray
 46 |         Arrays of the input features and target labels.
 47 |     seq_begins : ndarray
 48 |         Array of the start offsets of the speech segments to include.
 49 |     seq_lengths : ndarray
 50 |         Array of the lengths of the speech segments to include in the batch.
 51 | 
 52 |     Returns
 53 |     -------
 54 |     features, labels, mask : ndarrays
 55 |         A triple of arrays for training a network. The first element contains
 56 |         input features, the second contains target labels, and the third
 57 |         contains a "mask" consisting of ones where there is valid data and zeros
 58 |         everywhere else.
 59 |     '''
 60 |     length = seq_lengths.max()
 61 |     feat = np.zeros((BATCH_SIZE, length, features.shape[-1]), 'f')
 62 |     labl = np.zeros((BATCH_SIZE, length), 'i')
 63 |     mask = np.zeros((BATCH_SIZE, length), 'f')
 64 |     for b, (begin, length) in enumerate(zip(seq_begins, seq_lengths)):
 65 |         feat[b, :length] = features[begin:begin+length]
 66 |         labl[b, :length] = labels[begin:begin+length]
 67 |         mask[b, :length] = 1
 68 |     return [feat, labl, mask]
 69 | 
 70 | 
 71 | def batches(dataset):
 72 |     '''Returns a callable that chooses sequences from netcdf data.'''
 73 |     seq_lengths = dataset.variables['seqLengths'].data
 74 |     seq_begins = np.concatenate(([0], np.cumsum(seq_lengths)[:-1]))
 75 | 
 76 |     def sample():
 77 |         chosen = np.random.choice(
 78 |             list(range(len(seq_lengths))), BATCH_SIZE, replace=False)
 79 |         return batch_at(dataset.variables['inputs'].data,
 80 |                         dataset.variables['targetClasses'].data,
 81 |                         seq_begins[chosen],
 82 |                         seq_lengths[chosen])
 83 | 
 84 |     return sample
 85 | 
 86 | 
 87 | # Now that we can load data, we construct a recurrent classifier model and then
 88 | # train it up! Training progress will be displayed on the console. This example
 89 | # can take a good while to run, especially the first time it is run (it takes
 90 | # about 20min to compile the model from scratch, but only a few minutes if all
 91 | # of the compiler targets are cached).
 92 | 
 93 | def layer(n):
 94 |     '''Helper for building a bidirectional LSTM layer with n cells.'''
 95 |     return dict(form='bidirectional', worker='lstm', size=n)
 96 | 
 97 | n = theanets.recurrent.Classifier(
 98 |     layers=(39, layer(156), layer(300), layer(102), (51, 'softmax')),
 99 |     weighted=True,
100 | )
101 | 
102 | n.train(
103 |     batches(scipy.io.netcdf_file(TRAIN_NC)),
104 |     batches(scipy.io.netcdf_file(VALID_NC)),
105 |     algo='rmsprop',
106 |     learning_rate=0.0001,
107 |     momentum=0.9,
108 |     max_gradient_clip=1,
109 |     input_noise=0.6,
110 |     train_batches=30,
111 |     valid_batches=3,
112 |     batch_size=BATCH_SIZE,
113 | )
114 | 


--------------------------------------------------------------------------------
/examples/mnist-autoencoder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''Single-layer autoencoder example using MNIST digit data.
 4 | 
 5 | This example shows one way to train a single-layer autoencoder model using the
 6 | handwritten MNIST digits.
 7 | 
 8 | This example also shows the use of command-line arguments.
 9 | '''
10 | 
11 | import click
12 | import matplotlib.pyplot as plt
13 | import theanets
14 | 
15 | from utils import load_mnist, plot_layers, plot_images
16 | 
17 | @click.command()
18 | @click.option('--features', default=16, type=int, metavar='N',
19 |               help='Train a model with NxN hidden features.')
20 | def main(features):
21 |     # load up the MNIST digit dataset.
22 |     train, valid, _ = load_mnist()
23 | 
24 |     net = theanets.Autoencoder([784, features ** 2, 784])
25 |     net.train(train, valid,
26 |               train_batches=100,
27 |               input_noise=0.1,
28 |               weight_l2=0.0001,
29 |               algo='rmsprop',
30 |               momentum=0.9,
31 |               min_improvement=0.1)
32 | 
33 |     plot_layers([net.find('hid1', 'w'), net.find('out', 'w')])
34 |     plt.tight_layout()
35 |     plt.show()
36 | 
37 |     v = valid[0][:100]
38 |     plot_images(v, 121, 'Sample data')
39 |     plot_images(net.predict(v), 122, 'Reconstructed data')
40 |     plt.tight_layout()
41 |     plt.show()
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     main()
46 | 


--------------------------------------------------------------------------------
/examples/mnist-classifier.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import theanets
 5 | 
 6 | from utils import load_mnist, plot_layers
 7 | 
 8 | 
 9 | train, valid, _ = load_mnist(labels=True)
10 | 
11 | N = 10
12 | 
13 | net = theanets.Classifier([784, N * N, ('softmax', 10)])
14 | net.train(train, valid, min_improvement=0.001, train_batches=100)
15 | 
16 | plot_layers([net.find('hid1', 'w'), net.find('out', 'w')])
17 | plt.tight_layout()
18 | plt.show()
19 | 


--------------------------------------------------------------------------------
/examples/mnist-convolution.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import theanets
 5 | 
 6 | from utils import load_mnist, plot_filters
 7 | 
 8 | 
 9 | SHAPE = (28, 28, 1)
10 | 
11 | train, valid, _ = load_mnist(labels=True)
12 | 
13 | X, y = train
14 | train = X.reshape((-1, ) + SHAPE), y
15 | 
16 | X, y = valid
17 | valid = X.reshape((-1, ) + SHAPE), y
18 | 
19 | net = theanets.convolution.Classifier([
20 |     SHAPE, dict(form='conv2', size=100, filter_size=(14, 14)), 'flatten', 10])
21 | net.train(train, valid, train_batches=100, valid_batches=100)
22 | 
23 | plot_filters(net.find('hid1', 'w'))
24 | plt.tight_layout()
25 | plt.show()
26 | 


--------------------------------------------------------------------------------
/examples/mnist-deep-autoencoder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import theanets
 5 | 
 6 | from utils import load_mnist, plot_layers, plot_images
 7 | 
 8 | 
 9 | train, valid, _ = load_mnist()
10 | 
11 | net = theanets.Autoencoder(
12 |     layers=(784, 256, 100, 64, ('tied', 100), ('tied', 256), ('tied', 784)),
13 | )
14 | net.train(train, valid,
15 |           algo='layerwise',
16 |           patience=1,
17 |           min_improvement=0.05,
18 |           train_batches=100)
19 | net.train(train, valid, min_improvment=0.01, train_batches=100)
20 | 
21 | plot_layers([net.find(i, 'w') for i in (1, 2, 3)], tied_weights=True)
22 | plt.tight_layout()
23 | plt.show()
24 | 
25 | valid = valid[0][:100]
26 | plot_images(valid, 121, 'Sample data')
27 | plot_images(net.predict(valid), 122, 'Reconstructed data')
28 | plt.tight_layout()
29 | plt.show()
30 | 


--------------------------------------------------------------------------------
/examples/mnist-deep-classifier.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import theanets
 5 | 
 6 | from utils import load_mnist, plot_layers, plot_images
 7 | 
 8 | 
 9 | net = theanets.Classifier(
10 |     layers=(784, 1024, 256, 64, ('softmax', 10)),
11 | )
12 | 
13 | # first, run an unsupervised layerwise pretrainer.
14 | train, valid, _ = load_mnist()
15 | net.train(train, valid,
16 |           algo='pretrain',
17 |           patience=1,
18 |           min_improvement=0.1,
19 |           train_batches=100)
20 | 
21 | # second, run a supervised trainer on the classifier model.
22 | train, valid, _ = load_mnist(labels=True)
23 | net.train(train, valid, min_improvement=0.01, train_batches=100)
24 | 
25 | plot_layers([net.find(i, 'w') for i in (1, 2, 3)])
26 | plt.tight_layout()
27 | plt.show()
28 | 


--------------------------------------------------------------------------------
/examples/mnist-rica.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import theanets
 6 | 
 7 | from utils import load_mnist, plot_layers, plot_images
 8 | 
 9 | 
10 | class WeightInverse(theanets.Regularizer):
11 |     def loss(self, layers, outputs):
12 |         return sum((1 / (w * w).sum(axis=0)).sum()
13 |                    for l in layers for w in l.params
14 |                    if w.ndim > 1)
15 | 
16 | 
17 | (train, ), (valid, ), _ = load_mnist()
18 | 
19 | # mean-center the digits and compute a pca whitening transform.
20 | 
21 | m = train.mean(axis=0)
22 | train -= m
23 | valid -= m
24 | 
25 | theanets.log('computing whitening transform')
26 | vals, vecs = np.linalg.eigh(np.dot(train.T, train) / len(train))
27 | vals = vals[::-1]
28 | vecs = vecs[:, ::-1]
29 | 
30 | K = 197  # this retains 99% of the variance in the digit data.
31 | vals = np.sqrt(vals[:K])
32 | vecs = vecs[:, :K]
33 | 
34 | 
35 | def whiten(x):
36 |     return np.dot(x, np.dot(vecs, np.diag(1. / vals)))
37 | 
38 | 
39 | def color(z):
40 |     return np.dot(z, np.dot(np.diag(vals), vecs.T))
41 | 
42 | # now train our model on the whitened dataset.
43 | 
44 | N = 20
45 | 
46 | net = theanets.Autoencoder([K, (N * N, 'linear'), (K, 'tied')])
47 | 
48 | net.train(whiten(train),
49 |           whiten(valid),
50 |           hidden_l1=0.5,
51 |           weightinverse=1e-6,
52 |           train_batches=300,
53 |           monitors={'hid1:out': (-0.9, -0.1, 0.1, 0.9)})
54 | 
55 | # color the network weights so they are viewable as digits.
56 | plot_layers([color(net.find('hid1', 'w').get_value().T).T], tied_weights=True)
57 | plt.tight_layout()
58 | plt.show()
59 | 
60 | plot_images(valid[:N*N], 121, 'Sample data')
61 | plot_images(color(net.predict(whiten(valid[:N*N]))), 122, 'Reconstructed data')
62 | plt.tight_layout()
63 | plt.show()
64 | 


--------------------------------------------------------------------------------
/examples/recurrent-addition.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import numpy.random as rng
 6 | import theanets
 7 | 
 8 | BATCH_SIZE = 32
 9 | STEPS = 20
10 | 
11 | weight = np.zeros((STEPS, BATCH_SIZE, 1), 'f')
12 | weight[-1:] = 1
13 | 
14 | 
15 | def examples():
16 |     x, z = rng.uniform(0, 1, size=(2, STEPS, BATCH_SIZE, 1))
17 |     y = np.zeros((STEPS, BATCH_SIZE, 1))
18 |     idx = list(range(STEPS - 1))
19 |     for b in range(BATCH_SIZE):
20 |         rng.shuffle(idx)
21 |         y[idx[0], b] = 1
22 |         y[idx[1], b] = 1
23 |         z[-1, b] = x[idx[0], b] + x[idx[1], b]
24 |     return np.concatenate([x, y], axis=2).astype('f'), z.astype('f'), weight
25 | 
26 | src, tgt, wgt = examples()
27 | theanets.log('data batches: {} -> {} @ {}', src.shape, tgt.shape, wgt.shape)
28 | 
29 | e = theanets.Experiment(
30 |     theanets.recurrent.Regressor,
31 |     layers=(2, dict(form='rnn', activation='relu', size=100, radius=1), 1),
32 |     weighted=True)
33 | e.train(examples)
34 | prd = e.network.transform(src)
35 | 


--------------------------------------------------------------------------------
/examples/recurrent-autoencoder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy.random as rng
 4 | import theanets
 5 | 
 6 | TIME = 10
 7 | BATCH_SIZE = 32
 8 | 
 9 | e = theanets.Experiment(
10 |     theanets.recurrent.Autoencoder,
11 |     layers=(3, ('rnn', 10), 3),
12 |     batch_size=BATCH_SIZE)
13 | 
14 | 
15 | def generate():
16 |     return [rng.randn(TIME, BATCH_SIZE, 3).astype('f')]
17 | 
18 | batch = generate()
19 | theanets.log('data batches: {}', batch[0].shape)
20 | 
21 | e.train(generate)
22 | 


--------------------------------------------------------------------------------
/examples/recurrent-memory.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''This example examines recurrent performance in a memory task.
 4 | 
 5 | In the memory task, a network is supposed to read in T frames of n-dimensional
 6 | data and reproduce the first t frames at the output of the network, after
 7 | reading in T - t additional frames of n-dimensional data.
 8 | 
 9 | This task is quite difficult for most neural network models, since the hidden
10 | layer in the network must effectively store the first inputs somewhere, preserve
11 | those values for an arbitrary amount of time (while also discarding any new
12 | inputs), and then reproduce the stored inputs in the proper order.
13 | 
14 | This example uses a vanilla RNN to perform this task, but the network
15 | configuration can easily be changed to test the performance of different
16 | layer types (or even multilayer architectures).
17 | '''
18 | 
19 | import matplotlib.pyplot as plt
20 | import numpy as np
21 | import theanets
22 | 
23 | TIME = 10  # Total numer of time steps.
24 | BITS = 3   # Number of steps to remember/reproduce.
25 | BATCH_SIZE = 32
26 | 
27 | 
28 | # Here we create a mask that will be used to weight the target outputs for the
29 | # network. These weights are zero everywhere except for the last BITS time
30 | # steps, which forces the network to do anything it can to reproduce the input
31 | # pattern at the end of the output.
32 | mask = np.ones((BATCH_SIZE, TIME, 1), bool)
33 | mask[:, :TIME - BITS - 1] = 0
34 | 
35 | 
36 | # We use a callable to generate a batch of random input data to present to our
37 | # network model. Each batch consists of a random input pattern, a random output
38 | # pattern whose final BITS elements correspond to the initial BITS elements of
39 | # the input, and the fixed weight mask from above.
40 | def generate():
41 |     s, t = np.random.randn(2, BATCH_SIZE, TIME, 1).astype('f')
42 |     s[:, :BITS] = t[:, -BITS:] = np.random.randn(BATCH_SIZE, BITS, 1)
43 |     return s, t, mask
44 | 
45 | src, tgt, msk = generate()
46 | theanets.log('data batches: {} -> {} @ {}', src.shape, tgt.shape, msk.shape)
47 | 
48 | 
49 | # Create a new recurrent regression model and train it up.
50 | net = theanets.recurrent.Regressor(
51 |     layers=(1, dict(form='rnn', activation='relu', size=10, diagonal=1), 1),
52 |     weighted=True)
53 | 
54 | net.train(generate,
55 |           batch_size=BATCH_SIZE,
56 |           algorithm='rmsprop',
57 |           max_gradient_norm=1,
58 |           learning_rate=0.001,
59 |           momentum=0.9,
60 |           monitor_gradients=True)
61 | 
62 | 
63 | # Now we plot the results. Our plot contains two rows. On the top row, a random
64 | # batch of input values are shown -- time is on the y-axis, and the examples are
65 | # laid out along the x-axis. On the bottom row, the outputs from the network
66 | # model are shown -- again, time and example are on the y- and x-axes,
67 | # respectively.
68 | def plot(n, z, label, rectangle):
69 |     ax = plt.subplot(2, 1, n)
70 |     ax.set_frame_on(False)
71 |     for loc, spine in ax.spines.items():
72 |         spine.set_color('none')
73 |     ax.imshow(z, cmap='gray', vmin=-vm, vmax=vm)
74 |     ax.fill_between([-0.5, BATCH_SIZE - 0.5],
75 |                     rectangle - 0.5,
76 |                     rectangle + BITS - 0.5,
77 |                     lw=0, color='#17becf', alpha=0.3)
78 |     ax.set_xticks([])
79 |     ax.set_yticks([])
80 |     if n == 2:
81 |         ax.set_xlabel('Example')
82 |     ax.set_ylabel(label)
83 | 
84 | out = net.predict(src)[:, :, 0].T
85 | vm = max(abs(src[:, :BITS]).max(), abs(out[:, -BITS]).max())
86 | 
87 | plot(1, src[:, :, 0].T, 'Input', 0)
88 | plot(2, out, 'Output', TIME - BITS)
89 | 
90 | plt.show()
91 | 


--------------------------------------------------------------------------------
/examples/recurrent-sinusoid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''This example compares recurrent layer performance on a sine-generation task.
 4 | 
 5 | The task is to generate a complex sine wave that is constructed as a
 6 | superposition of a small set of pure frequencies. All networks are constructed
 7 | with one input (which receives all zero values), one recurrent hidden layer, and
 8 | one output (which is tasked with matching the target sine wave). Each model is
 9 | trained and then its predicted output is plotted for easy visual comparison of
10 | the behavior of the different layer models.
11 | 
12 | For this task, the clockwork RNN layer tends to perform the best of the layer
13 | models, even though the clockwork layer uses the simplest activation (linear)
14 | and has the fewest parameters (~2000 for a 64-node hidden layer, versus ~4000
15 | for a vanilla RNN and ~17000 for an LSTM). The vanilla RNN layer tends to do the
16 | worst, or at the least is the most sensitive to the initialization of the
17 | parameters. The other layer models fall somewhere in the middle but tend only to
18 | match the dominant frequency in the target wave.
19 | '''
20 | 
21 | import matplotlib.pyplot as plt
22 | import numpy as np
23 | import theanets
24 | 
25 | COLORS = ['#d62728', '#1f77b4', '#2ca02c', '#9467bd', '#ff7f0e',
26 |           '#e377c2', '#8c564b', '#bcbd22', '#7f7f7f', '#17becf']
27 | 
28 | BATCH_SIZE = 2
29 | 
30 | 
31 | # Construct a complex sine wave as a sum of pure-frequency waves.
32 | TAU = 2 * np.pi
33 | T = np.linspace(0, TAU, 256)
34 | SIN = sum(c * np.sin(TAU * f * T) for c, f in ((2, 1.5), (3, 1.8), (4, 1.1)))
35 | 
36 | 
37 | # Create an input dataset consisting of all zeros, and an output dataset
38 | # containing the target sine wave. We have to stack the target sine wave here
39 | # because recurrent models expect a tensor with three dimensions, and the batch
40 | # size for recurrent networks must be greater than 1.
41 | ZERO = np.zeros((BATCH_SIZE, len(T), 1), 'f')
42 | WAVES = np.concatenate([SIN[None, :, None]] * BATCH_SIZE, axis=0).astype('f')
43 | 
44 | 
45 | # Set up plotting axes to show the output result and learning curves.
46 | _, (wave_ax, learn_ax) = plt.subplots(2, 1)
47 | 
48 | # Plot the target wave.
49 | wave_ax.plot(T, SIN, ':', label='Target', alpha=0.7, color='#111111')
50 | 
51 | 
52 | # For each layer type, train a model containing that layer, and plot its
53 | # predicted output.
54 | for i, layer in enumerate((
55 |         dict(form='rnn', activation='linear', diagonal=0.5),
56 |         dict(form='rnn', activation='relu', diagonal=0.5),
57 |         dict(form='rrnn', activation='relu', rate='vector', diagonal=0.5),
58 |         dict(form='scrn', activation='elu'),
59 |         dict(form='gru', activation='relu'),
60 |         dict(form='lstm', activation='tanh'),
61 |         dict(form='clockwork', activation='linear', periods=(1, 4, 16, 64)))):
62 |     name = '{form}+{activation}'.format(**layer)
63 |     layer['size'] = 64
64 |     theanets.log('training {} model', name)
65 |     net = theanets.recurrent.Regressor([1, layer, 1])
66 |     losses = []
67 |     for tm, _ in net.itertrain([ZERO, WAVES],
68 |                                monitor_gradients=True,
69 |                                batch_size=BATCH_SIZE,
70 |                                algorithm='rmsprop',
71 |                                learning_rate=0.0001,
72 |                                momentum=0.9,
73 |                                min_improvement=0.01):
74 |         losses.append(tm['loss'])
75 |     prd = net.predict(ZERO)
76 |     wave_ax.plot(T, prd[0, :, 0].flatten(), label=name, alpha=0.7, color=COLORS[i])
77 |     learn_ax.plot(losses, label=name, alpha=0.7, color=COLORS[i])
78 | 
79 | 
80 | # Make the plots look nice.
81 | for ax in [wave_ax, learn_ax]:
82 |     ax.xaxis.tick_bottom()
83 |     ax.yaxis.tick_left()
84 |     ax.spines['top'].set_color('none')
85 |     ax.spines['right'].set_color('none')
86 |     ax.spines['bottom'].set_position(('outward', 6))
87 |     ax.spines['left'].set_position(('outward', 6))
88 | 
89 | wave_ax.set_ylabel('Amplitude')
90 | wave_ax.set_xlabel('Time')
91 | 
92 | learn_ax.set_ylabel('Loss')
93 | learn_ax.set_xlabel('Training Epoch')
94 | learn_ax.grid(True)
95 | 
96 | plt.legend()
97 | plt.show()
98 | 


--------------------------------------------------------------------------------
/examples/recurrent-text.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import theanets
 6 | 
 7 | import utils
 8 | 
 9 | COLORS = ['#d62728', '#1f77b4', '#2ca02c', '#9467bd', '#ff7f0e',
10 |           '#e377c2', '#8c564b', '#bcbd22', '#7f7f7f', '#17becf']
11 | 
12 | URL = 'http://www.gutenberg.org/cache/epub/2701/pg2701.txt'
13 | 
14 | with open(utils.find('moby.txt', URL)) as handle:
15 |     text = theanets.recurrent.Text(handle.read().lower().replace('\n', ' '))
16 | 
17 | seed = text.encode(text.text[200000:200010])
18 | for i, layer in enumerate((
19 |         dict(form='rnn', activation='sigmoid', diagonal=0.99),
20 |         dict(form='gru', activation='sigmoid'),
21 |         dict(form='scrn', activation='sigmoid'),
22 |         dict(form='bcrnn', activation='sigmoid', num_modules=5),
23 |         dict(form='lstm'),
24 |         dict(form='mrnn', activation='sigmoid', factors=len(text.alpha)),
25 |         dict(form='clockwork', activation='sigmoid', periods=(1, 2, 4, 8, 16)))):
26 |     losses = []
27 |     layer.update(size=100)
28 |     net = theanets.recurrent.Classifier([
29 |         1 + len(text.alpha), layer, 1000, 1 + len(text.alpha)])
30 |     for tm, _ in net.itertrain(text.classifier_batches(30, 16),
31 |                                min_improvement=0.99,
32 |                                validate_every=50,
33 |                                patience=0,
34 |                                algo='adam',
35 |                                max_gradient_norm=1,
36 |                                learning_rate=0.01):
37 |         if np.isnan(tm['loss']):
38 |             break
39 |         print('{}|{} ({:.1f}%)'.format(
40 |             text.decode(seed),
41 |             text.decode(net.predict_sequence(seed, 30)),
42 |             100 * tm['acc']))
43 |         losses.append(tm['loss'])
44 | 
45 |     plt.plot(losses, label=layer['form'], alpha=0.7, color=COLORS[i])
46 | 
47 | plt.gca().xaxis.tick_bottom()
48 | plt.gca().yaxis.tick_left()
49 | plt.gca().spines['top'].set_color('none')
50 | plt.gca().spines['right'].set_color('none')
51 | plt.gca().spines['bottom'].set_position(('outward', 6))
52 | plt.gca().spines['left'].set_position(('outward', 6))
53 | 
54 | plt.gca().set_ylabel('Loss')
55 | plt.gca().set_xlabel('Training Epoch')
56 | plt.gca().grid(True)
57 | 
58 | plt.legend()
59 | plt.show()
60 | 


--------------------------------------------------------------------------------
/examples/utils.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import numpy as np
  3 | import os
  4 | import pickle
  5 | import sys
  6 | import tarfile
  7 | import tempfile
  8 | import urllib
  9 | 
 10 | try:
 11 |     import matplotlib.pyplot as plt
 12 | except ImportError:
 13 |     raise RuntimeError('please install matplotlib to run the examples!')
 14 | 
 15 | DATASETS = os.path.join(tempfile.gettempdir(), 'theanets-datasets')
 16 | 
 17 | 
 18 | def find(dataset, url):
 19 |     '''Find the location of a dataset on disk, downloading if needed.'''
 20 |     fn = os.path.join(DATASETS, dataset)
 21 |     dn = os.path.dirname(fn)
 22 |     if not os.path.exists(dn):
 23 |         print('creating dataset directory: %s', dn)
 24 |         os.makedirs(dn)
 25 |     if not os.path.exists(fn):
 26 |         if sys.version_info < (3, ):
 27 |             urllib.urlretrieve(url, fn)
 28 |         else:
 29 |             urllib.request.urlretrieve(url, fn)
 30 |     return fn
 31 | 
 32 | 
 33 | def load_mnist(flatten=True, labels=False):
 34 |     '''Load the MNIST digits dataset.'''
 35 |     fn = find('mnist.pkl.gz', 'http://deeplearning.net/data/mnist/mnist.pkl.gz')
 36 |     h = gzip.open(fn, 'rb')
 37 |     if sys.version_info < (3, ):
 38 |         (timg, tlab), (vimg, vlab), (simg, slab) = pickle.load(h)
 39 |     else:
 40 |         (timg, tlab), (vimg, vlab), (simg, slab) = pickle.load(h, encoding='bytes')
 41 |     h.close()
 42 |     if not flatten:
 43 |         timg = timg.reshape((-1, 28, 28, 1))
 44 |         vimg = vimg.reshape((-1, 28, 28, 1))
 45 |         simg = simg.reshape((-1, 28, 28, 1))
 46 |     if labels:
 47 |         return ((timg, tlab.astype('i')),
 48 |                 (vimg, vlab.astype('i')),
 49 |                 (simg, slab.astype('i')))
 50 |     return (timg, ), (vimg, ), (simg, )
 51 | 
 52 | 
 53 | def load_cifar(flatten=True, labels=False):
 54 |     '''Load the CIFAR10 image dataset.'''
 55 |     def extract(name):
 56 |         print('extracting data from {}'.format(name))
 57 |         h = tar.extractfile(name)
 58 |         if sys.version_info < (3, ):
 59 |             d = pickle.load(h)
 60 |         else:
 61 |             d = pickle.load(h, encoding='bytes')
 62 |             for k in list(d):
 63 |                 d[k.decode('utf8')] = d[k]
 64 |         h.close()
 65 |         img = d['data'].reshape(
 66 |             (-1, 3, 32, 32)).transpose((0, 2, 3, 1)).astype('f') / 128 - 1
 67 |         if flatten:
 68 |             img = img.reshape((-1, 32 * 32 * 3))
 69 |         d['data'] = img
 70 |         return d
 71 | 
 72 |     fn = find('cifar10.tar.gz', 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz')
 73 |     tar = tarfile.open(fn)
 74 | 
 75 |     imgs = []
 76 |     labs = []
 77 |     for i in range(1, 6):
 78 |         d = extract('cifar-10-batches-py/data_batch_{}'.format(i))
 79 |         imgs.extend(d['data'])
 80 |         labs.extend(d['labels'])
 81 |     timg = np.asarray(imgs[:40000])
 82 |     tlab = np.asarray(labs[:40000], 'i')
 83 |     vimg = np.asarray(imgs[40000:])
 84 |     vlab = np.asarray(labs[40000:], 'i')
 85 | 
 86 |     d = extract('cifar-10-batches-py/test_batch')
 87 |     simg = d['data']
 88 |     slab = d['labels']
 89 | 
 90 |     tar.close()
 91 | 
 92 |     if labels:
 93 |         return (timg, tlab), (vimg, vlab), (simg, slab)
 94 |     return (timg, ), (vimg, ), (simg, )
 95 | 
 96 | 
 97 | def plot_images(imgs, loc, title=None, channels=1):
 98 |     '''Plot an array of images.
 99 | 
100 |     We assume that we are given a matrix of data whose shape is (n*n, s*s*c) --
101 |     that is, there are n^2 images along the first axis of the array, and each
102 |     image is c squares measuring s pixels on a side. Each row of the input will
103 |     be plotted as a sub-region within a single image array containing an n x n
104 |     grid of images.
105 |     '''
106 |     n = int(np.sqrt(len(imgs)))
107 |     assert n * n == len(imgs), 'images array must contain a square number of rows!'
108 |     s = int(np.sqrt(len(imgs[0]) / channels))
109 |     assert s * s == len(imgs[0]) / channels, 'images must be square!'
110 | 
111 |     img = np.zeros(((s+1) * n - 1, (s+1) * n - 1, channels), dtype=imgs[0].dtype)
112 |     for i, pix in enumerate(imgs):
113 |         r, c = divmod(i, n)
114 |         img[r * (s+1):(r+1) * (s+1) - 1,
115 |             c * (s+1):(c+1) * (s+1) - 1] = pix.reshape((s, s, channels))
116 | 
117 |     img -= img.min()
118 |     img /= img.max()
119 | 
120 |     ax = plt.gcf().add_subplot(loc)
121 |     ax.xaxis.set_visible(False)
122 |     ax.yaxis.set_visible(False)
123 |     ax.set_frame_on(False)
124 |     ax.imshow(img.squeeze(), cmap=plt.cm.gray)
125 |     if title:
126 |         ax.set_title(title)
127 | 
128 | 
129 | def plot_layers(weights, tied_weights=False, channels=1):
130 |     '''Create a plot of weights, visualized as "bottom-level" pixel arrays.'''
131 |     if hasattr(weights[0], 'get_value'):
132 |         weights = [w.get_value() for w in weights]
133 |     k = min(len(weights), 9)
134 |     imgs = np.eye(weights[0].shape[0])
135 |     for i, weight in enumerate(weights[:-1]):
136 |         imgs = np.dot(weight.T, imgs)
137 |         plot_images(imgs,
138 |                     100 + 10 * k + i + 1,
139 |                     channels=channels,
140 |                     title='Layer {}'.format(i+1))
141 |     weight = weights[-1]
142 |     n = weight.shape[1] / channels
143 |     if int(np.sqrt(n)) ** 2 != n:
144 |         return
145 |     if tied_weights:
146 |         imgs = np.dot(weight.T, imgs)
147 |         plot_images(imgs,
148 |                     100 + 10 * k + k,
149 |                     channels=channels,
150 |                     title='Layer {}'.format(k))
151 |     else:
152 |         plot_images(weight,
153 |                     100 + 10 * k + k,
154 |                     channels=channels,
155 |                     title='Decoding weights')
156 | 
157 | 
158 | def plot_filters(filters):
159 |     '''Create a plot of conv filters, visualized as pixel arrays.'''
160 |     imgs = filters.get_value()
161 | 
162 |     N, channels, x, y = imgs.shape
163 |     n = int(np.sqrt(N))
164 |     assert n * n == N, 'filters must contain a square number of rows!'
165 |     assert channels == 1 or channels == 3, 'can only plot grayscale or rgb filters!'
166 | 
167 |     img = np.zeros(((y+1) * n - 1, (x+1) * n - 1, channels), dtype=imgs[0].dtype)
168 |     for i, pix in enumerate(imgs):
169 |         r, c = divmod(i, n)
170 |         img[r * (y+1):(r+1) * (y+1) - 1,
171 |             c * (x+1):(c+1) * (x+1) - 1] = pix.transpose((1, 2, 0))
172 | 
173 |     img -= img.min()
174 |     img /= img.max()
175 | 
176 |     ax = plt.gcf().add_subplot(111)
177 |     ax.xaxis.set_visible(False)
178 |     ax.yaxis.set_visible(False)
179 |     ax.set_frame_on(False)
180 |     ax.imshow(img.squeeze(), cmap=plt.cm.gray)
181 | 


--------------------------------------------------------------------------------
/examples/weighted-classification.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sklearn.datasets
 3 | import sklearn.metrics
 4 | import theanets
 5 | 
 6 | samples, labels = sklearn.datasets.make_classification(
 7 |     n_samples=10000,
 8 |     n_features=100,
 9 |     n_informative=30,
10 |     n_redundant=30,
11 |     n_repeated=0,
12 |     n_classes=2,
13 |     n_clusters_per_class=3,
14 |     weights=[0.99, 0.01],
15 |     flip_y=0.01,
16 | )
17 | 
18 | weights = np.ones_like(labels)
19 | weights[labels.nonzero()] *= 10
20 | 
21 | 
22 | def split(a, b):
23 |     return [samples[a:b].astype('float32'),
24 |             labels[a:b].astype('int32'),
25 |             weights[a:b].astype('float32')]
26 | 
27 | train = split(0, 9000)
28 | valid = split(9000, 10000)
29 | 
30 | net = theanets.Classifier(
31 |     layers=(100, 10, 2),
32 |     weighted=True,
33 | )
34 | 
35 | net.train(train, valid)
36 | 
37 | truth = valid[1]
38 | theanets.log('# of true 1s: {}', truth.sum())
39 | 
40 | guess = net.predict(valid[0])
41 | theanets.log('# of predicted 1s: {}', guess.sum())
42 | 
43 | cm = sklearn.metrics.confusion_matrix(truth, guess)
44 | theanets.log('confusion matrix (true class = rows, predicted class = cols):')
45 | theanets.log(str(cm))
46 | 


--------------------------------------------------------------------------------
/examples/xor-classifier.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | '''Example using the theanets package for learning the XOR relation.'''
 5 | 
 6 | import numpy as np
 7 | import theanets
 8 | 
 9 | X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype='f')
10 | Y = np.array([[0], [1], [1], [0]], dtype='f')
11 | 
12 | net = theanets.Regressor([dict(size=2, input_noise=0.3), 2, 1])
13 | net.train([X, Y], algo='rmsprop', patience=10, batch_size=4)
14 | 
15 | theanets.log('Input: {}', [list(x) for x in X])
16 | theanets.log('XOR output: {}', Y.T)
17 | theanets.log('NN XOR predictions: {}', net.predict(X.astype('f')).T.round(2))
18 | 


--------------------------------------------------------------------------------
/scripts/theanets-char-rnn:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import click
  4 | import logging
  5 | import numpy as np
  6 | import theanets
  7 | 
  8 | @click.command()
  9 | @click.option('-d', '--data', multiple=True, metavar='FILE',
 10 |               help='load text from FILE')
 11 | @click.option('-t', '--time', default=100, type=int, metavar='T',
 12 |               help='train on sequences of T characters')
 13 | @click.option('-a', '--alphabet', default='', metavar='CHARS',
 14 |               help='use CHARS for alphabet; defaults to all chars in text')
 15 | @click.option('-A', '--exclude-alphabet', default='', metavar='CHARS',
 16 |               help='discard CHARS from alphabet')
 17 | @click.option('-l', '--layers', multiple=True, type=int, default=[100], metavar='N',
 18 |               help='construct a network with layers of size N1, N2, ...')
 19 | @click.option('-L', '--layer-type', default='lstm', metavar='{rnn|gru|lstm|clockwork}',
 20 |               help='construct a network with this RNN layer type')
 21 | @click.option('-g', '--activation', default='relu', metavar='FUNC',
 22 |               help='function for hidden unit activations')
 23 | @click.option('-O', '--algorithm', default=['nag'], multiple=True, metavar='ALGO',
 24 |                help='train with the given optimization algorithm(s)')
 25 | @click.option('-p', '--patience', type=int, default=4, metavar='N',
 26 |                help='stop training if less than --min-improvement for N validations')
 27 | @click.option('-v', '--validate-every', type=int, default=10, metavar='N',
 28 |                help='validate the model every N updates')
 29 | @click.option('-b', '--batch-size', type=int, default=64, metavar='N',
 30 |                help='train with mini-batches of size N')
 31 | @click.option('-B', '--train-batches', type=int, metavar='N',
 32 |                help='use at most N batches during gradient computations')
 33 | @click.option('-V', '--valid-batches', type=int, metavar='N',
 34 |                help='use at most N batches during validation')
 35 | @click.option('-i', '--min-improvement', type=float, default=0, metavar='R',
 36 |                help='train until relative improvement is less than R')
 37 | @click.option('-x', '--max-gradient-norm', type=float, default=1, metavar='V',
 38 |                help='clip gradient norm to the interval [0, V]')
 39 | @click.option('-r', '--learning-rate', type=float, default=1e-4, metavar='V',
 40 |                help='train the network with a learning rate of V')
 41 | @click.option('-m', '--momentum', type=float, default=0.9, metavar='V',
 42 |                help='train the network with momentum of V')
 43 | @click.option('-n', '--nesterov/--no-nesterov', help='use Nesterov momentum')
 44 | @click.option('-s', '--save-progress', metavar='FILE',
 45 |                help='save the model periodically to FILE')
 46 | @click.option('-S', '--save-every', type=float, default=0, metavar='N',
 47 |                help='save the model every N iterations or -N minutes')
 48 | @click.option('--input-noise', type=float, default=0, metavar='S',
 49 |                help='add noise to network inputs drawn from N(0, S)')
 50 | @click.option('--input-dropouts', type=float, default=0, metavar='R',
 51 |                help='randomly set fraction R of input activations to 0')
 52 | @click.option('--hidden-noise', type=float, default=0, metavar='S',
 53 |                help='add noise to hidden activations drawn from N(0, S)')
 54 | @click.option('--hidden-dropouts', type=float, default=0, metavar='R',
 55 |                help='randomly set fraction R of hidden activations to 0')
 56 | @click.option('--hidden-l1', type=float, default=0, metavar='K',
 57 |                help='regularize hidden activity with K on the L1 term')
 58 | @click.option('--hidden-l2', type=float, default=0, metavar='K',
 59 |                help='regularize hidden activity with K on the L2 term')
 60 | @click.option('--weight-l1', type=float, default=0, metavar='K',
 61 |                help='regularize network weights with K on the L1 term')
 62 | @click.option('--weight-l2', type=float, default=0, metavar='K',
 63 |                help='regularize network weights with K on the L2 term')
 64 | @click.option('--rms-halflife', type=float, default=5, metavar='N',
 65 |                help='use a half-life of N for RMS exponential moving averages')
 66 | @click.option('--rms-regularizer', type=float, default=1e-8, metavar='N',
 67 |                help='regularize RMS exponential moving averages by N')
 68 | def main(**kwargs):
 69 |     corpus = []
 70 |     for f in kwargs['data']:
 71 |         corpus.append(open(f).read())
 72 |         logging.info('%s: loaded training document', f)
 73 |     logging.info('loaded %d training documents', len(corpus))
 74 | 
 75 |     alpha = set(kwargs['alphabet'])
 76 |     if not alpha:
 77 |         for c in corpus:
 78 |             alpha |= set(c)
 79 |     alpha -= set(kwargs['exclude_alphabet'])
 80 |     alpha = sorted(alpha)
 81 |     logging.info('character alphabet: %s', alpha)
 82 | 
 83 |     # encode document chars as integer alphabet index values.
 84 |     encoded = [np.array([alpha.index(c) for c in doc]) for doc in corpus]
 85 | 
 86 |     def batch():
 87 |         T, B = kwargs['time'], kwargs['batch_size']
 88 |         inputs = np.zeros((T, B, len(alpha)), 'f')
 89 |         outputs = np.zeros((T, B), 'i')
 90 |         enc = np.random.choice(encoded)
 91 |         for b in range(B):
 92 |             o = np.random.randint(len(enc) - T - 1)
 93 |             inputs[np.arange(T), b, enc[o:o+T]] = 1
 94 |             outputs[np.arange(T), b] = enc[o+1:o+T+1]
 95 |         return [inputs, outputs]
 96 | 
 97 |     layers = [len(alpha)]
 98 |     for l in kwargs['layers']:
 99 |         layers.append(dict(size=l,
100 |                            form=kwargs['layer_type'],
101 |                            activation=kwargs['activation']))
102 |     layers.append(len(alpha))
103 | 
104 |     exp = theanets.Experiment(theanets.recurrent.Classifier, layers=layers)
105 | 
106 |     exp.train(
107 |         batch,
108 |         algo=kwargs['algorithm'],
109 |         patience=kwargs['patience'],
110 |         min_improvement=kwargs['min_improvement'],
111 |         validate_every=kwargs['validate_every'],
112 |         batch_size=kwargs['batch_size'],
113 |         train_batches=kwargs['train_batches'],
114 |         valid_batches=kwargs['valid_batches'],
115 |         learning_rate=kwargs['learning_rate'],
116 |         momentum=kwargs['momentum'],
117 |         nesterov=kwargs['nesterov'],
118 |         save_progress=kwargs['save_progress'],
119 |         save_every=kwargs['save_every'],
120 |         weight_l1=kwargs['weight_l1'],
121 |         weight_l2=kwargs['weight_l2'],
122 |         hidden_l2=kwargs['hidden_l2'],
123 |         hidden_l1=kwargs['hidden_l1'],
124 |         input_noise=kwargs['input_noise'],
125 |         input_dropouts=kwargs['input_dropouts'],
126 |         hidden_noise=kwargs['hidden_noise'],
127 |         hidden_dropouts=kwargs['hidden_dropouts'],
128 |     )
129 | 
130 |     if kwargs['save_progress']:
131 |         exp.save(kwargs['save_progress'])
132 | 
133 | 
134 | if __name__ == '__main__':
135 |    main()
136 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [pytest]
2 | #pep8ignore = E226,E302,E41
3 | pep8maxlinelength = 90
4 | 
5 | [bdist_wheel]
6 | universal = 1
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import setuptools
 3 | 
 4 | README = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'README.rst')
 5 | 
 6 | setuptools.setup(
 7 |     name='theanets',
 8 |     version='0.8.0pre',
 9 |     packages=setuptools.find_packages(),
10 |     author='lmjohns3',
11 |     author_email='theanets@googlegroups.com',
12 |     description='Feedforward and recurrent neural nets using Theano',
13 |     long_description=open(README).read(),
14 |     license='MIT',
15 |     url='http://github.com/lmjohns3/theanets',
16 |     keywords=('machine-learning '
17 |               'neural-network '
18 |               'deep-neural-network '
19 |               'recurrent-neural-network '
20 |               'autoencoder '
21 |               'sparse-autoencoder '
22 |               'classifier '
23 |               'theano '
24 |               ),
25 |     install_requires=['click', 'downhill', 'theano',
26 |                       # TODO(leif): remove when theano is fixed.
27 |                       'nose-parameterized'],
28 |     classifiers=[
29 |         'Development Status :: 3 - Alpha',
30 |         'Intended Audience :: Science/Research',
31 |         'License :: OSI Approved :: MIT License',
32 |         'Operating System :: OS Independent',
33 |         'Topic :: Scientific/Engineering',
34 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
35 |         ],
36 |     )
37 | 


--------------------------------------------------------------------------------
/test/activations_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import io
 4 | import numpy as np
 5 | import pytest
 6 | import theanets
 7 | import theano
 8 | 
 9 | PROBE = np.array([-10, -1, -0.1, 0, 0.1, 1, 10], 'f')
10 | 
11 | 
12 | @pytest.mark.parametrize('activation, expected', [
13 |     ('linear', PROBE),
14 |     ('logistic', 1 / (1 + np.exp(-PROBE))),
15 |     ('sigmoid', 1 / (1 + np.exp(-PROBE))),
16 |     ('softmax', np.exp(PROBE) / sum(np.exp(PROBE))),
17 |     ('softplus', np.log1p(np.exp(PROBE))),
18 |     ('relu', np.clip(PROBE, 0, 100)),
19 |     ('rect:max', np.clip(PROBE, 0, 100)),
20 |     ('rect:min', np.clip(PROBE, -100, 1)),
21 |     ('rect:minmax', np.clip(np.clip(PROBE, 0, 100), -100, 1)),
22 |     ('norm:mean', PROBE - PROBE.mean()),
23 |     ('norm:max', PROBE / abs(PROBE).max()),
24 |     ('norm:std', PROBE / PROBE.std()),
25 |     ('norm:z', (PROBE - PROBE.mean()) / PROBE.std()),
26 | 
27 |     # values based on random initial parameters using seed below
28 |     ('elu', np.array([
29 |         -1.15013397, -0.74292195, -0.0999504, 0, 0.1, 1, 10], 'f')),
30 |     ('prelu', np.array([
31 |         -11.50186157, -1.17528522, -0.10503119, 0, 0.1, 1, 10], 'f')),
32 |     ('lgrelu', np.array([
33 |         -10.52778435, -1.04052365, -0.11633276, 0, 0.10640667,
34 |         1.04642045, 10.21983242], 'f')),
35 |     ('maxout:3', np.array([
36 |         16.60424042, 1.80405843, 1.99347568, 0.3595323, -0.513098,
37 |         2.77195668, 0.61599374], 'f')),
38 | 
39 |     # combo burgers
40 |     ('relu+tanh', np.tanh(np.clip(PROBE, 0, 100))),
41 |     ('softplus+norm:z', ((np.log1p(np.exp(PROBE)) -
42 |                           np.log1p(np.exp(PROBE)).mean()) /
43 |                          np.log1p(np.exp(PROBE)).std())),
44 | ])
45 | def test_activation(activation, expected):
46 |     layer = theanets.layers.Feedforward(inputs='in', size=7, rng=13)
47 |     layer.bind(theanets.Network([3]))
48 |     f = theanets.activations.build(activation, layer)
49 |     actual = f(theano.shared(PROBE))
50 |     if hasattr(actual, 'eval'):
51 |         actual = actual.eval()
52 |     assert np.allclose(actual, expected)
53 | 
54 | 
55 | def test_build():
56 |     layer = theanets.layers.Feedforward(inputs='in', size=3, activation='relu')
57 |     layer.bind(theanets.Network([3]))
58 |     a = layer.activate
59 |     assert callable(a)
60 |     assert a.name == 'relu'
61 |     assert a.params == []
62 | 
63 | 
64 | def test_build_composed():
65 |     layer = theanets.layers.Feedforward(
66 |         inputs='in', size=3, activation='relu+norm:z')
67 |     layer.bind(theanets.Network([3]))
68 |     a = layer.activate
69 |     assert callable(a)
70 |     assert a.name == 'norm:z(relu)', a.name
71 |     assert a.params == []
72 | 
73 | 
74 | def test_save_load_composed():
75 |     model = theanets.Network([3, (4, 'relu+norm:z')])
76 |     handle = io.BytesIO()
77 |     model.save(handle)
78 |     handle.seek(0)
79 |     second = theanets.Network.load(handle)
80 |     assert second.layers[1].activate.f.name == model.layers[1].activate.f.name
81 |     assert second.layers[1].activate.g.name == model.layers[1].activate.g.name
82 | 
83 | 
84 | @pytest.mark.parametrize('activation, expected', [
85 |     ('prelu', ['l.leak']),
86 |     ('lgrelu', ['l.gain', 'l.leak']),
87 |     ('maxout:4', ['l.intercept', 'l.slope']),
88 | ])
89 | def test_parameters(activation, expected):
90 |     layer = theanets.layers.Feedforward(
91 |         inputs='in', size=3, activation=activation, name='l')
92 |     layer.bind(theanets.Network([3, layer]))
93 |     assert sorted(p.name for p in layer.activate.params) == expected
94 | 


--------------------------------------------------------------------------------
/test/convolution_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import theanets
 4 | 
 5 | import util as u
 6 | 
 7 | REG_LAYERS = [
 8 |     (u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT, u.NUM_INPUTS),
 9 |     dict(size=u.NUM_HID1, form='conv2', filter_size=u.CNN.FILTER_SIZE),
10 |     dict(size=u.NUM_HID2, form='conv2', filter_size=u.CNN.FILTER_SIZE),
11 |     'flat',
12 |     u.NUM_OUTPUTS]
13 | 
14 | CLF_LAYERS = [
15 |     (u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT, u.NUM_INPUTS),
16 |     dict(size=u.NUM_HID1, form='conv2', filter_size=u.CNN.FILTER_SIZE),
17 |     dict(size=u.NUM_HID2, form='conv2', filter_size=u.CNN.FILTER_SIZE),
18 |     'flat',
19 |     u.NUM_CLASSES]
20 | 
21 | 
22 | def assert_shape(actual, width, height, channels):
23 |     assert actual == (u.NUM_EXAMPLES, width, height, channels)
24 | 
25 | 
26 | @pytest.mark.parametrize('Model, layers, weighted, data', [
27 |     (theanets.convolution.Regressor, REG_LAYERS, False, u.CNN.REG_DATA),
28 |     (theanets.convolution.Classifier, CLF_LAYERS, False, u.CNN.CLF_DATA),
29 |     (theanets.convolution.Regressor, REG_LAYERS, True, u.CNN.WREG_DATA),
30 |     (theanets.convolution.Classifier, CLF_LAYERS, True, u.CNN.WCLF_DATA),
31 | ])
32 | def test_sgd(Model, layers, weighted, data):
33 |     u.assert_progress(Model(layers, weighted=weighted), data)
34 | 
35 | 
36 | @pytest.mark.parametrize('Model, layers, output', [
37 |     (theanets.convolution.Regressor, REG_LAYERS, u.NUM_OUTPUTS),
38 |     (theanets.convolution.Classifier, CLF_LAYERS, (u.NUM_EXAMPLES, )),
39 | ])
40 | def test_predict(Model, layers, output):
41 |     u.assert_shape(Model(layers).predict(u.CNN.INPUTS).shape, output)
42 | 
43 | 
44 | @pytest.mark.parametrize('Model, layers, target, score', [
45 |     (theanets.convolution.Regressor, REG_LAYERS, u.OUTPUTS, -16.850263595581055),
46 |     (theanets.convolution.Classifier, CLF_LAYERS, u.CLASSES, 0.171875),
47 | ])
48 | def test_score(Model, layers, target, score):
49 |     assert Model(layers).score(u.CNN.INPUTS, target) == score
50 | 
51 | 
52 | @pytest.mark.parametrize('Model, layers, target', [
53 |     (theanets.convolution.Regressor, REG_LAYERS, u.NUM_OUTPUTS),
54 |     (theanets.convolution.Classifier, CLF_LAYERS, u.NUM_CLASSES),
55 | ])
56 | def test_predict(Model, layers, target):
57 |     outs = Model(layers).feed_forward(u.CNN.INPUTS)
58 |     assert len(list(outs)) == 8
59 |     W, H = u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT
60 |     w, h = u.CNN.FILTER_WIDTH, u.CNN.FILTER_HEIGHT
61 |     assert_shape(outs['in:out'].shape, W, H, u.NUM_INPUTS)
62 |     assert_shape(outs['hid1:out'].shape, W - w + 1, H - h + 1, u.NUM_HID1)
63 |     assert_shape(outs['hid2:out'].shape, W - 2 * w + 2, H - 2 * h + 2, u.NUM_HID2)
64 |     u.assert_shape(outs['out:out'].shape, target)
65 | 
66 | 
67 | class TestClassifier:
68 |     @pytest.fixture
69 |     def net(self):
70 |         return theanets.convolution.Classifier(CLF_LAYERS)
71 | 
72 |     def test_predict_proba(self, net):
73 |         u.assert_shape(net.predict_proba(u.CNN.INPUTS).shape, u.NUM_CLASSES)
74 | 
75 |     def test_predict_logit(self, net):
76 |         u.assert_shape(net.predict_logit(u.CNN.INPUTS).shape, u.NUM_CLASSES)
77 | 
78 |     def test_score(self, net):
79 |         w = 0.5 * np.ones(u.CLASSES.shape, 'f')
80 |         assert 0 <= net.score(u.CNN.INPUTS, u.CLASSES, w) <= 1
81 | 


--------------------------------------------------------------------------------
/test/feedforward_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | import theanets
  4 | 
  5 | import util as u
  6 | 
  7 | 
  8 | @pytest.mark.parametrize('Model, layers, weighted, data', [
  9 |     (theanets.Regressor, u.REG_LAYERS, False, u.REG_DATA),
 10 |     (theanets.Classifier, u.CLF_LAYERS, False, u.CLF_DATA),
 11 |     (theanets.Autoencoder, u.AE_LAYERS, False, u.AE_DATA),
 12 |     (theanets.Regressor, u.REG_LAYERS, True, u.WREG_DATA),
 13 |     (theanets.Classifier, u.CLF_LAYERS, True, u.WCLF_DATA),
 14 |     (theanets.Autoencoder, u.AE_LAYERS, True, u.WAE_DATA),
 15 | ])
 16 | def test_sgd(Model, layers, weighted, data):
 17 |     u.assert_progress(Model(layers, weighted=weighted), data)
 18 | 
 19 | 
 20 | @pytest.mark.parametrize('Model, layers, output', [
 21 |     (theanets.Regressor, u.REG_LAYERS, u.NUM_OUTPUTS),
 22 |     (theanets.Classifier, u.CLF_LAYERS, (u.NUM_EXAMPLES, )),
 23 |     (theanets.Autoencoder, u.AE_LAYERS, u.NUM_INPUTS),
 24 | ])
 25 | def test_predict(Model, layers, output):
 26 |     u.assert_shape(Model(layers).predict(u.INPUTS).shape, output)
 27 | 
 28 | 
 29 | @pytest.mark.parametrize('Model, layers, target, score', [
 30 |     (theanets.Regressor, u.REG_LAYERS, u.OUTPUTS, -1.0473043918609619),
 31 |     (theanets.Classifier, u.CLF_LAYERS, u.CLASSES, 0.171875),
 32 |     (theanets.Autoencoder, u.AE_LAYERS, u.INPUTS, 15.108331680297852),
 33 | ])
 34 | def test_score(Model, layers, target, score):
 35 |     assert Model(layers).score(u.INPUTS, target) == score
 36 | 
 37 | 
 38 | @pytest.mark.parametrize('Model, layers, target', [
 39 |     (theanets.Regressor, u.REG_LAYERS, u.NUM_OUTPUTS),
 40 |     (theanets.Classifier, u.CLF_LAYERS, u.NUM_CLASSES),
 41 |     (theanets.Autoencoder, u.AE_LAYERS, u.NUM_INPUTS),
 42 | ])
 43 | def test_feed_forward(Model, layers, target):
 44 |     outs = Model(layers).feed_forward(u.INPUTS)
 45 |     assert len(list(outs)) == 7
 46 |     u.assert_shape(outs['in:out'].shape, u.NUM_INPUTS)
 47 |     u.assert_shape(outs['hid1:out'].shape, u.NUM_HID1)
 48 |     u.assert_shape(outs['hid2:out'].shape, u.NUM_HID2)
 49 |     u.assert_shape(outs['out:out'].shape, target)
 50 | 
 51 | 
 52 | def test_decode_from_multiple_layers():
 53 |     net = theanets.Regressor([u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, dict(
 54 |         size=u.NUM_OUTPUTS, inputs=('hid2:out', 'hid1:out'))])
 55 |     outs = net.feed_forward(u.INPUTS)
 56 |     assert len(list(outs)) == 7
 57 |     u.assert_shape(outs['in:out'].shape, u.NUM_INPUTS)
 58 |     u.assert_shape(outs['hid1:out'].shape, u.NUM_HID1)
 59 |     u.assert_shape(outs['hid2:out'].shape, u.NUM_HID2)
 60 |     u.assert_shape(outs['out:out'].shape, u.NUM_OUTPUTS)
 61 | 
 62 | 
 63 | class TestClassifier:
 64 |     @pytest.fixture
 65 |     def net(self):
 66 |         return theanets.Classifier(u.CLF_LAYERS)
 67 | 
 68 |     def test_predict_proba(self, net):
 69 |         u.assert_shape(net.predict_proba(u.INPUTS).shape, u.NUM_CLASSES)
 70 | 
 71 |     def test_predict_logit(self, net):
 72 |         u.assert_shape(net.predict_logit(u.INPUTS).shape, u.NUM_CLASSES)
 73 | 
 74 |     def test_score(self, net):
 75 |         w = 0.5 * np.ones(u.CLASSES.shape, 'f')
 76 |         assert 0 <= net.score(u.INPUTS, u.CLASSES, w) <= 1
 77 | 
 78 | 
 79 | class TestAutoencoder:
 80 |     @pytest.fixture
 81 |     def net(self):
 82 |         return theanets.Autoencoder(u.AE_LAYERS)
 83 | 
 84 |     def test_encode_hid1(self, net):
 85 |         z = net.encode(u.INPUTS, 'hid1')
 86 |         u.assert_shape(z.shape, u.NUM_HID1)
 87 | 
 88 |     def test_encode_hid2(self, net):
 89 |         z = net.encode(u.INPUTS, 'hid2')
 90 |         u.assert_shape(z.shape, u.NUM_HID2)
 91 | 
 92 |     def test_decode_hid1(self, net):
 93 |         x = net.decode(net.encode(u.INPUTS))
 94 |         u.assert_shape(x.shape, u.NUM_INPUTS)
 95 | 
 96 |     def test_decode_hid2(self, net):
 97 |         x = net.decode(net.encode(u.INPUTS, 'hid2'), 'hid2')
 98 |         u.assert_shape(x.shape, u.NUM_INPUTS)
 99 | 
100 |     def test_score(self, net):
101 |         labels = np.random.randint(0, 2, size=u.INPUTS.shape)
102 |         assert net.score(u.INPUTS, labels) < 0
103 | 


--------------------------------------------------------------------------------
/test/graph_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import pytest
  4 | import theanets
  5 | 
  6 | try:
  7 |     from itertools import izip as zip
  8 | except ImportError:  # python3
  9 |     pass
 10 | 
 11 | import util as u
 12 | 
 13 | 
 14 | class TestNetwork:
 15 |     def test_layer_ints(self):
 16 |         model = theanets.Regressor((1, 2, 3))
 17 |         assert len(model.layers) == 3
 18 | 
 19 |     @pytest.mark.parametrize('layers', [
 20 |         (1, (2, 'relu'), 3),
 21 |         (1, dict(size=2, activation='relu', form='rnn'), 3),
 22 |         (1, 2, dict(size=3, inputs='hid1')),
 23 |         (1, 2, dict(size=3, inputs=('in', 'hid1'))),
 24 |         (1, 2, (1, 'tied')),
 25 |         (1, 2, dict(size=1, form='tied', partner='hid1')),
 26 |     ])
 27 |     def test_layer_tuples(self, layers):
 28 |         model = theanets.Regressor(layers)
 29 |         assert len(model.layers) == 3
 30 | 
 31 |         assert isinstance(model.layers[0], theanets.layers.Input)
 32 |         assert model.layers[0].kwargs['activation'] == 'linear'
 33 |         assert model.layers[0].output_shape == (1, )
 34 | 
 35 |         assert model.layers[1].kwargs['activation'] == 'relu'
 36 |         spec = layers[1]
 37 |         if isinstance(spec, dict) and spec.get('form') == 'rnn':
 38 |             assert isinstance(model.layers[1], theanets.layers.RNN)
 39 |         else:
 40 |             assert isinstance(model.layers[1], theanets.layers.Feedforward)
 41 | 
 42 |         assert model.layers[2].kwargs['activation'] == 'linear'
 43 |         spec = layers[2]
 44 |         if (isinstance(spec, tuple) and 'tied' in spec) or \
 45 |            (isinstance(spec, dict) and spec.get('form') == 'tied'):
 46 |             assert isinstance(model.layers[2], theanets.layers.Tied)
 47 |             assert model.layers[2].partner is model.layers[1]
 48 | 
 49 |     @pytest.mark.parametrize('layers', [
 50 |         (1, 2, dict(size=3, inputs='hid2')),
 51 |         (1, (2, 'tied'), (2, 'tied'), (1, 'tied')),
 52 |     ])
 53 |     def test_layers_raises(self, layers):
 54 |         with pytest.raises(theanets.util.ConfigurationError):
 55 |             theanets.Regressor(layers)
 56 | 
 57 |     @pytest.mark.parametrize('spec, cls, shape, act', [
 58 |         (6, theanets.layers.Feedforward, (6, ), None),
 59 |         ((6, ), theanets.layers.Feedforward, (6, ), None),
 60 |         ((6, 7), theanets.layers.Feedforward, (6, 7), None),
 61 |         ((6, 'linear'), theanets.layers.Feedforward, (6, ), 'linear'),
 62 |         ((6, 'linear', 'classifier'), theanets.layers.Classifier, (6, ), 'softmax'),
 63 |         (dict(size=6), theanets.layers.Feedforward, (6, ), None),
 64 |         (dict(size=6, form='ff'), theanets.layers.Feedforward, (6, ), None),
 65 |         (dict(size=6, activation='linear'), theanets.layers.Feedforward, (6, ), 'linear'),
 66 |         (dict(shape=(6, 7)), theanets.layers.Feedforward, (6, 7), None),
 67 |     ])
 68 |     def test_add_layer(self, spec, cls, shape, act):
 69 |         model = theanets.Regressor([3, spec, 4])
 70 |         layer = model.layers[1]
 71 |         assert len(model.layers) == 3
 72 |         assert isinstance(layer, cls)
 73 |         assert layer.output_shape == shape
 74 |         if act is not None:
 75 |             assert layer.kwargs['activation'] == act
 76 | 
 77 |     @pytest.mark.parametrize('spec', [
 78 |         (6, 'tied', 7),
 79 |         None,
 80 |         'ff',
 81 |         'tied',
 82 |         dict(form='ff'),
 83 |         dict(form='tied'),
 84 |         dict(form='tied', partner='hello'),
 85 |         dict(form='ff', inputs=('a', 'b')),
 86 |     ])
 87 |     def test_add_layer_errors(self, spec):
 88 |         with pytest.raises(theanets.util.ConfigurationError):
 89 |             theanets.Network([dict(form='input', name='a', shape=(3, 5)),
 90 |                               dict(form='input', name='b', shape=(4, 3)),
 91 |                               spec,
 92 |                               4])
 93 | 
 94 |     def test_updates(self):
 95 |         model = theanets.Regressor((15, 13))
 96 |         assert not model.updates()
 97 | 
 98 |     def test_default_output_name(self):
 99 |         model = theanets.Regressor((1, 2, dict(size=1, form='tied', name='foo')))
100 |         assert model.losses[0].output_name == 'foo:out'
101 |         model = theanets.Regressor((1, 2, 1))
102 |         assert model.losses[0].output_name == 'out:out'
103 | 
104 |     def test_find_number(self):
105 |         model = theanets.Regressor((1, 2, 1))
106 |         p = model.find(1, 0)
107 |         assert p.name == 'hid1.w'
108 |         p = model.find(2, 0)
109 |         assert p.name == 'out.w'
110 | 
111 |     def test_find_name(self):
112 |         model = theanets.Regressor((1, 2, 1))
113 |         p = model.find('hid1', 'w')
114 |         assert p.name == 'hid1.w'
115 |         p = model.find('out', 'w')
116 |         assert p.name == 'out.w'
117 | 
118 |     def test_find_missing(self):
119 |         model = theanets.Regressor((1, 2, 1))
120 |         try:
121 |             model.find('hid4', 'w')
122 |             assert False
123 |         except KeyError:
124 |             pass
125 |         try:
126 |             model.find(0, 0)
127 |             assert False
128 |         except KeyError:
129 |             pass
130 |         try:
131 |             model.find(1, 3)
132 |             assert False
133 |         except KeyError:
134 |             pass
135 | 
136 |     def test_train(self):
137 |         model = theanets.Regressor((1, 2, 1))
138 |         tm, vm = model.train([np.random.randn(100, 1).astype('f'),
139 |                               np.random.randn(100, 1).astype('f')])
140 |         assert tm['loss'] > 0
141 | 
142 | 
143 | class TestMonitors:
144 |     @pytest.fixture
145 |     def net(self):
146 |         return theanets.Regressor((10, 15, 14, 13))
147 | 
148 |     def assert_monitors(self, net, monitors, expected, sort=False):
149 |         mon = [k for k, v in net.monitors(monitors=monitors)]
150 |         if sort:
151 |             mon = sorted(mon)
152 |         assert mon == expected
153 | 
154 |     def test_dict(self, net):
155 |         self.assert_monitors(net, {'hid1:out': 1}, ['err', 'hid1:out<1'])
156 | 
157 |     def test_list(self, net):
158 |         self.assert_monitors(net, [('hid1:out', 1)], ['err', 'hid1:out<1'])
159 | 
160 |     def test_list_values(self, net):
161 |         self.assert_monitors(
162 |             net, {'hid1:out': [2, 1]}, ['err', 'hid1:out<2', 'hid1:out<1'])
163 | 
164 |     def test_dict_values(self, net):
165 |         self.assert_monitors(
166 |             net, {'hid1:out': dict(a=lambda e: e+1, b=lambda e: e+2)},
167 |             ['err', 'hid1:out:a', 'hid1:out:b'], sort=True)
168 | 
169 |     def test_not_found(self, net):
170 |         self.assert_monitors(net, {'hid10:out': 1}, ['err'])
171 | 
172 |     def test_param(self, net):
173 |         self.assert_monitors(net, {'hid1.w': 1}, ['err', 'hid1.w<1'])
174 | 
175 |     def test_wildcard(self, net):
176 |         self.assert_monitors(
177 |             net, {'*.w': 1}, ['err', 'hid1.w<1', 'hid2.w<1', 'out.w<1'])
178 |         self.assert_monitors(net, {'hid?.w': 1}, ['err', 'hid1.w<1', 'hid2.w<1'])
179 | 
180 | 
181 | def test_save_every(tmpdir):
182 |     net = theanets.Autoencoder((u.NUM_INPUTS, (3, 'prelu'), u.NUM_INPUTS))
183 |     p = tmpdir.mkdir('graph-test').join('model.pkl')
184 |     fn = os.path.join(p.dirname, p.basename)
185 |     train = net.itertrain([u.INPUTS], save_every=2, save_progress=fn)
186 |     for i, _ in enumerate(zip(train, range(9))):
187 |         if i == 3 or i == 5 or i == 7:
188 |             assert p.check()
189 |         else:
190 |             assert not p.check()
191 |         if p.check():
192 |             p.remove()
193 | 


--------------------------------------------------------------------------------
/test/layers_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | import theanets
  4 | import theano.tensor as TT
  5 | 
  6 | import util as u
  7 | 
  8 | NI = u.NUM_INPUTS
  9 | NH = u.NUM_HID1
 10 | 
 11 | 
 12 | class TestFeedforward:
 13 |     @pytest.mark.parametrize('form, name, params, count, outputs', [
 14 |         ('feedforward', 'feedforward', 'w b', 1 + NI, 'out pre'),
 15 |         ('ff', 'feedforward', 'w b', 1 + NI, 'out pre'),
 16 |         ('classifier', 'classifier', 'w b', 1 + NI, 'out pre'),
 17 |         ('flatten', 'flatten', '', 0, 'out'),
 18 |         ('flat', 'flatten', '', 0, 'out'),
 19 |         ('concatenate', 'concatenate', '', 0, 'out'),
 20 |         ('concat', 'concatenate', '', 0, 'out'),
 21 |         ('product', 'product', '', 0, 'out'),
 22 |         ('prod', 'product', '', 0, 'out'),
 23 |     ])
 24 |     def test_build(self, form, name, params, count, outputs):
 25 |         layer = theanets.Layer.build(form, size=NI, name='l', inputs='in')
 26 |         layer.bind(theanets.Network([NI]))
 27 | 
 28 |         assert layer.__class__.__name__.lower() == name
 29 | 
 30 |         assert sorted(p.name for p in layer.params) == \
 31 |             sorted('l.' + p for p in params.split())
 32 | 
 33 |         assert sum(np.prod(p.get_value().shape) for p in layer.params) == count * NI
 34 | 
 35 |         out, upd = layer.connect({'in:out': TT.matrix('x')})
 36 |         assert sorted(out) == sorted('l:' + o for o in outputs.split())
 37 |         assert sorted(upd) == []
 38 | 
 39 |         assert layer.to_spec() == dict(
 40 |             form=name, name='l', size=NI, inputs='in',
 41 |             activation=layer.kwargs.get('activation', 'relu'))
 42 | 
 43 |     @pytest.mark.parametrize('layer', [
 44 |         NH,
 45 |         dict(form='ff', inputs=('hid1', 'hid2'), size=NH),
 46 |         dict(form='tied', partner='hid1'),
 47 |         dict(form='prod', inputs=('hid1', 'hid2'), size=NH),
 48 |         dict(form='concat', inputs=('hid1', 'hid2'), size=2 * NH),
 49 |         ('flat', NH),
 50 |     ])
 51 |     def test_predict(self, layer):
 52 |         net = theanets.Autoencoder([NI, NH, NH, layer, NI])
 53 |         assert net.predict(u.INPUTS).shape == (u.NUM_EXAMPLES, NI)
 54 | 
 55 |     def test_multiple_inputs(self):
 56 |         layer = theanets.layers.Feedforward(inputs=('in', 'hid1'), size=NH, name='l')
 57 |         layer.bind(theanets.Network([NH, NH, NH]))
 58 | 
 59 |         total = sum(np.prod(p.get_value().shape) for p in layer.params)
 60 |         assert total == (1 + 2 * NH) * NH
 61 | 
 62 |         assert sorted(p.name for p in layer.params) == \
 63 |             ['l.b', 'l.w_hid1:out', 'l.w_in:out']
 64 | 
 65 |         assert layer.to_spec() == dict(
 66 |             form='feedforward', name='l', size=NH, activation='relu',
 67 |             inputs=('in', 'hid1'))
 68 | 
 69 |     def test_reshape(self):
 70 |         layer = theanets.layers.Reshape(inputs='in', shape=(4, 2), name='l')
 71 |         layer.bind(theanets.Network([8]))
 72 | 
 73 |         assert sum(np.prod(p.get_value().shape) for p in layer.params) == 0
 74 | 
 75 |         assert sorted(p.name for p in layer.params) == []
 76 | 
 77 |         assert layer.to_spec() == dict(
 78 |             form='reshape', name='l', shape=(4, 2), inputs='in',
 79 |             activation='relu')
 80 | 
 81 | 
 82 | class TestRecurrent:
 83 |     @pytest.mark.parametrize('form, kwargs, count, params, outputs', [
 84 |         ('rnn', {}, 1 + NI + NH, 'xh hh b', 'out pre'),
 85 |         ('clockwork', {'periods': (1, 2, 4, 8)}, 1 + NI + NH, 'xh hh b', 'out pre'),
 86 |         ('rrnn', {'rate': 'uniform'}, 1 + NI + NH, 'xh hh b', 'out pre rate hid'),
 87 |         ('rrnn', {'rate': 'log'}, 1 + NI + NH, 'xh hh b', 'out pre rate hid'),
 88 |         ('rrnn', {'rate': 'vector'}, 2 + NI + NH, 'xh hh b r', 'out pre rate hid'),
 89 |         ('rrnn', {'rate': 'matrix'}, 2 + NH + 2 * NI, 'xh hh b r xr', 'out pre rate hid'),
 90 |         ('gru', {}, 3 * (1 + NI + NH), 'b w hh hr hz', 'hid out pre rate'),
 91 |         ('mut1', {}, 3 + 3 * NI + 2 * NH, 'bh br bz hh hr xh xr xz', 'hid out pre rate'),
 92 |         ('scrn', {}, 2 * (1 + NI + 2 * NH), 'w ho so hh sh b r', 'out hid rate state'),
 93 |         ('lstm', {}, 7 + 4 * NH + 4 * NI, 'xh hh b cf ci co', 'out cell'),
 94 |         ('conv1', {'filter_size': 13}, 1 + 13 * NI, 'w b', 'pre out'),
 95 |         ('mrnn', {'factors': 3}, (7 + NI) * NH + 3 * NI, 'xh xf hf fh b',
 96 |          'out pre factors'),
 97 |         ('bidirectional', {}, 1 + NI + NH // 2,
 98 |          'l_bw.b l_bw.hh l_bw.xh l_fw.b l_fw.xh l_fw.hh',
 99 |          'bw_out bw_pre fw_out fw_pre out pre'),
100 |     ])
101 |     def test_build(self, form, kwargs, count, params, outputs):
102 |         layer = theanets.Layer.build(form, size=NH, name='l', inputs='in', **kwargs)
103 |         layer.bind(theanets.Network([dict(size=NI, ndim=3)]))
104 | 
105 |         assert layer.__class__.__name__.lower() == form
106 | 
107 |         expected = sorted('l.' + p for p in params.split())
108 |         if form == 'bidirectional':
109 |             expected = sorted(params.split())
110 |         assert sorted(p.name for p in layer.params) == expected
111 | 
112 |         expected = count * NH
113 |         if form == 'mrnn':
114 |             expected = count
115 |         assert sum(np.prod(p.get_value().shape) for p in layer.params) == expected
116 | 
117 |         out, upd = layer.connect({'in:out': TT.tensor3('x')})
118 |         assert sorted(out) == sorted('l:' + o for o in outputs.split())
119 |         assert sorted(upd) == []
120 | 
121 |         spec = {}
122 |         if form == 'mrnn':
123 |             spec['factors'] = 3
124 |         if form == 'bidirectional':
125 |             spec['worker'] = 'rnn'
126 |         if form == 'clockwork':
127 |             spec['periods'] = (1, 2, 4, 8)
128 |         if form == 'scrn':
129 |             spec['s_0'] = None
130 |             spec['context_size'] = int(1 + np.sqrt(NH))
131 |         if form == 'lstm':
132 |             spec['c_0'] = None
133 |         if form not in ('bidirectional', 'conv1'):
134 |             spec['h_0'] = None
135 |         assert layer.to_spec() == dict(
136 |             form=form, name='l', size=NH, inputs='in',
137 |             activation=layer.kwargs.get('activation', 'relu'), **spec)
138 | 
139 |     @pytest.mark.parametrize('layer', [
140 |         (NH, 'rnn'),
141 |         dict(size=NH, form='conv1', filter_size=13),
142 |     ])
143 |     def test_predict(self, layer):
144 |         T = u.RNN.NUM_TIMES
145 |         if isinstance(layer, dict) and layer.get('form') == 'conv1':
146 |             T -= layer['filter_size'] - 1
147 |         net = theanets.recurrent.Autoencoder([NI, NH, NH, layer, NI])
148 |         assert net.predict(u.RNN.INPUTS).shape == (u.NUM_EXAMPLES, T, NI)
149 | 
150 | 
151 | class TestConvolution:
152 |     @pytest.mark.parametrize('form, kwargs, count, params, outputs', [
153 |         ('conv2', {'filter_size': u.CNN.FILTER_SIZE},
154 |          1 + NI * u.CNN.FILTER_HEIGHT * u.CNN.FILTER_WIDTH, 'w b', 'out pre'),
155 |     ])
156 |     def test_build(self, form, kwargs, count, params, outputs):
157 |         layer = theanets.Layer.build(form, size=NH, name='l', inputs='in', **kwargs)
158 |         layer.bind(theanets.Network([dict(size=NI, ndim=4)]))
159 | 
160 |         assert layer.__class__.__name__.lower() == form
161 | 
162 |         expected = sorted('l.' + p for p in params.split())
163 |         assert sorted(p.name for p in layer.params) == expected
164 | 
165 |         expected = count * NH
166 |         assert sum(np.prod(p.get_value().shape) for p in layer.params) == expected
167 | 
168 |         out, upd = layer.connect({'in:out': TT.tensor4('x')})
169 |         assert sorted(out) == sorted('l:' + o for o in outputs.split())
170 |         assert sorted(upd) == []
171 | 
172 |         assert layer.to_spec() == dict(
173 |             form=form, name='l', size=NH, inputs='in', activation='relu')
174 | 
175 |     @pytest.mark.parametrize('layer', [
176 |         dict(size=NH, form='conv2', filter_size=u.CNN.FILTER_SIZE),
177 |     ])
178 |     def test_predict(self, layer):
179 |         net = theanets.convolution.Regressor([
180 |             (u.CNN.NUM_WIDTH, u.CNN.NUM_HEIGHT, NI),
181 |             NH, layer, 'flat', u.NUM_OUTPUTS])
182 |         assert net.predict(u.CNN.INPUTS).shape == (u.NUM_EXAMPLES, u.NUM_OUTPUTS)
183 | 


--------------------------------------------------------------------------------
/test/losses_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import theanets
 3 | 
 4 | import util as u
 5 | 
 6 | 
 7 | class TestBuild:
 8 |     def test_mse(self):
 9 |         l = theanets.Loss.build('mse', target=2)
10 |         assert callable(l)
11 |         assert len(l.variables) == 1
12 | 
13 |     def test_mse_weighted(self):
14 |         l = theanets.Loss.build('mse', target=2, weighted=True)
15 |         assert callable(l)
16 |         assert len(l.variables) == 2
17 | 
18 | 
19 | @pytest.mark.parametrize('loss', ['xe', 'hinge'])
20 | def test_classification(loss):
21 |     net = theanets.Classifier([
22 |         u.NUM_INPUTS, u.NUM_HID1, u.NUM_CLASSES], loss=loss)
23 |     u.assert_progress(net, u.CLF_DATA)
24 | 
25 | 
26 | @pytest.mark.parametrize('loss', ['mse', 'mae', 'mmd'])
27 | def test_regression(loss):
28 |     net = theanets.Regressor([
29 |         u.NUM_INPUTS, u.NUM_HID1, u.NUM_OUTPUTS], loss=loss)
30 |     u.assert_progress(net, u.REG_DATA)
31 | 
32 | 
33 | def test_kl():
34 |     net = theanets.Regressor([
35 |         u.NUM_INPUTS, u.NUM_HID1, (u.NUM_OUTPUTS, 'softmax')], loss='kl')
36 |     u.assert_progress(net, [u.INPUTS, abs(u.OUTPUTS)])
37 | 
38 | 
39 | def test_gll():
40 |     net = theanets.Regressor([
41 |         u.NUM_INPUTS,
42 |         dict(name='hid', size=u.NUM_HID1),
43 |         dict(name='covar', activation='relu', inputs='hid', size=u.NUM_OUTPUTS),
44 |         dict(name='mean', activation='linear', inputs='hid', size=u.NUM_OUTPUTS),
45 |     ])
46 |     net.set_loss('gll', target=2, mean_name='mean', covar_name='covar')
47 |     u.assert_progress(net, u.REG_DATA)
48 | 


--------------------------------------------------------------------------------
/test/main_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import tempfile
 4 | import theanets
 5 | 
 6 | 
 7 | class TestExperiment:
 8 |     def test_save_load(self):
 9 |         exp = theanets.Experiment(theanets.Autoencoder, layers=(10, 3, 4, 10))
10 |         net = exp.network
11 |         f, p = tempfile.mkstemp(suffix='pkl')
12 |         os.close(f)
13 |         os.unlink(p)
14 |         try:
15 |             exp.save(p)
16 |             assert os.path.isfile(p)
17 |             exp.load(p)
18 |             assert exp.network is not net
19 |             for lo, ln in zip(net.layers, exp.network.layers):
20 |                 assert lo.name == ln.name
21 |                 assert lo._input_shapes == ln._input_shapes
22 |             for po, pn in zip(net.params, exp.network.params):
23 |                 assert po.name == pn.name
24 |                 assert np.allclose(po.get_value(), pn.get_value())
25 |         finally:
26 |             if os.path.exists(p):
27 |                 os.unlink(p)
28 | 


--------------------------------------------------------------------------------
/test/recurrent_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | import theanets
  4 | 
  5 | import util as u
  6 | 
  7 | AE_LAYERS = [u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), (u.NUM_HID2, 'rnn'), u.NUM_INPUTS]
  8 | CLF_LAYERS = [u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), (u.NUM_HID2, 'rnn'), u.NUM_CLASSES]
  9 | REG_LAYERS = [u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), (u.NUM_HID2, 'rnn'), u.NUM_OUTPUTS]
 10 | 
 11 | 
 12 | def assert_shape(actual, expected):
 13 |     if not isinstance(expected, tuple):
 14 |         expected = (u.NUM_EXAMPLES, u.RNN.NUM_TIMES, expected)
 15 |     assert actual == expected
 16 | 
 17 | 
 18 | @pytest.mark.parametrize('Model, layers, weighted, data', [
 19 |     (theanets.recurrent.Regressor, REG_LAYERS, False, u.RNN.REG_DATA),
 20 |     (theanets.recurrent.Classifier, CLF_LAYERS, False, u.RNN.CLF_DATA),
 21 |     (theanets.recurrent.Autoencoder, AE_LAYERS, False, u.RNN.AE_DATA),
 22 |     (theanets.recurrent.Regressor, REG_LAYERS, True, u.RNN.WREG_DATA),
 23 |     (theanets.recurrent.Classifier, CLF_LAYERS, True, u.RNN.WCLF_DATA),
 24 |     (theanets.recurrent.Autoencoder, AE_LAYERS, True, u.RNN.WAE_DATA),
 25 | ])
 26 | def test_sgd(Model, layers, weighted, data):
 27 |     u.assert_progress(Model(layers, weighted=weighted), data)
 28 | 
 29 | 
 30 | @pytest.mark.parametrize('Model, layers', [
 31 |     (theanets.recurrent.Regressor, REG_LAYERS),
 32 |     (theanets.recurrent.Classifier, CLF_LAYERS),
 33 |     (theanets.recurrent.Autoencoder, AE_LAYERS),
 34 | ])
 35 | def test_predict(Model, layers):
 36 |     assert_shape(Model(layers).predict(u.INPUTS).shape, output)
 37 | 
 38 | 
 39 | @pytest.mark.parametrize('Model, layers, target, score', [
 40 |     (theanets.recurrent.Regressor, REG_LAYERS, u.RNN.OUTPUTS, -0.73883247375488281),
 41 |     (theanets.recurrent.Classifier, CLF_LAYERS, u.RNN.CLASSES, 0.0020161290322580645),
 42 |     (theanets.recurrent.Autoencoder, AE_LAYERS, u.RNN.INPUTS, 81.411415100097656),
 43 | ])
 44 | def test_score(Model, layers, target, score):
 45 |     assert Model(layers).score(u.RNN.INPUTS, target) == score
 46 | 
 47 | 
 48 | @pytest.mark.parametrize('Model, layers, target', [
 49 |     (theanets.recurrent.Regressor, REG_LAYERS, u.NUM_OUTPUTS),
 50 |     (theanets.recurrent.Classifier, CLF_LAYERS, u.NUM_CLASSES),
 51 |     (theanets.recurrent.Autoencoder, AE_LAYERS, u.NUM_INPUTS),
 52 | ])
 53 | def test_predict(Model, layers, target):
 54 |     outs = Model(layers).feed_forward(u.RNN.INPUTS)
 55 |     assert len(list(outs)) == 7
 56 |     assert_shape(outs['in:out'].shape, u.NUM_INPUTS)
 57 |     assert_shape(outs['hid1:out'].shape, u.NUM_HID1)
 58 |     assert_shape(outs['hid2:out'].shape, u.NUM_HID2)
 59 |     assert_shape(outs['out:out'].shape, target)
 60 | 
 61 | 
 62 | def test_symbolic_initial_state():
 63 |     net = theanets.recurrent.Regressor([
 64 |         dict(size=u.NUM_INPUTS, form='input', name='h0', ndim=2),
 65 |         dict(size=u.NUM_INPUTS, form='input', name='in'),
 66 |         dict(size=u.NUM_HID1, form='rnn', name='rnn', h_0='h0'),
 67 |         dict(size=u.NUM_OUTPUTS, form='ff', name='out'),
 68 |     ])
 69 |     H0 = np.random.randn(u.NUM_EXAMPLES, u.NUM_HID1).astype('f')
 70 |     u.assert_progress(net, [H0, u.RNN.INPUTS, u.RNN.OUTPUTS])
 71 | 
 72 | 
 73 | class TestClassifier:
 74 |     @pytest.fixture
 75 |     def net(self):
 76 |         return theanets.recurrent.Classifier(CLF_LAYERS)
 77 | 
 78 |     def test_predict_proba(self, net):
 79 |         assert_shape(net.predict_proba(u.RNN.INPUTS).shape, u.NUM_CLASSES)
 80 | 
 81 |     def test_predict_logit(self, net):
 82 |         assert_shape(net.predict_logit(u.RNN.INPUTS).shape, u.NUM_CLASSES)
 83 | 
 84 |     def test_score(self, net):
 85 |         w = 0.5 * np.ones(u.CLASSES.shape, 'f')
 86 |         assert 0 <= net.score(u.RNN.INPUTS, u.CLASSES, w) <= 1
 87 | 
 88 |     def test_predict_sequence(self, net):
 89 |         assert list(net.predict_sequence([0, 1, 2], 5, rng=13)) == [4, 5, 1, 3, 1]
 90 | 
 91 | 
 92 | class TestAutoencoder:
 93 |     @pytest.fixture
 94 |     def net(self):
 95 |         return theanets.recurrent.Autoencoder(AE_LAYERS)
 96 | 
 97 |     def test_encode_hid1(self, net):
 98 |         z = net.encode(u.RNN.INPUTS, 'hid1')
 99 |         assert_shape(z.shape, u.NUM_HID1)
100 | 
101 |     def test_encode_hid2(self, net):
102 |         z = net.encode(u.RNN.INPUTS, 'hid2')
103 |         assert_shape(z.shape, u.NUM_HID2)
104 | 
105 |     def test_decode_hid1(self, net):
106 |         x = net.decode(net.encode(u.RNN.INPUTS))
107 |         assert_shape(x.shape, u.NUM_INPUTS)
108 | 
109 |     def test_decode_hid2(self, net):
110 |         x = net.decode(net.encode(u.RNN.INPUTS, 'hid2'), 'hid2')
111 |         assert_shape(x.shape, u.NUM_INPUTS)
112 | 
113 |     def test_score(self, net):
114 |         labels = np.random.randint(0, 2, size=u.RNN.INPUTS.shape)
115 |         assert net.score(u.RNN.INPUTS, labels) < 0
116 | 
117 | 
118 | class TestFunctions:
119 |     @pytest.fixture
120 |     def samples(self):
121 |         return np.random.randn(2 * u.RNN.NUM_TIMES, u.NUM_INPUTS)
122 | 
123 |     @pytest.fixture
124 |     def labels(self):
125 |         return np.random.randn(2 * u.RNN.NUM_TIMES, u.NUM_OUTPUTS)
126 | 
127 |     def test_batches_labeled(self, samples, labels):
128 |         f = theanets.recurrent.batches(
129 |             [samples, labels], steps=u.RNN.NUM_TIMES, batch_size=u.NUM_EXAMPLES)
130 |         assert len(f()) == 2
131 |         assert_shape(f()[0].shape, u.NUM_INPUTS)
132 |         assert_shape(f()[1].shape, u.NUM_OUTPUTS)
133 | 
134 |     def test_batches_unlabeled(self, samples):
135 |         f = theanets.recurrent.batches(
136 |             [samples], steps=u.RNN.NUM_TIMES, batch_size=u.NUM_EXAMPLES)
137 |         assert len(f()) == 1
138 |         assert_shape(f()[0].shape, u.NUM_INPUTS)
139 | 
140 | 
141 | class TestText:
142 |     TXT = 'hello world, how are you!'
143 | 
144 |     def test_min_count(self):
145 |         txt = theanets.recurrent.Text(self.TXT, min_count=2, unknown='_')
146 |         assert txt.text == 'hello worl__ how _re _o__'
147 |         assert txt.alpha == ' ehlorw'
148 | 
149 |         txt = theanets.recurrent.Text(self.TXT, min_count=3, unknown='_')
150 |         assert txt.text == '__llo _o_l__ _o_ ___ _o__'
151 |         assert txt.alpha == ' lo'
152 | 
153 |     @pytest.fixture
154 |     def txt(self):
155 |         return theanets.recurrent.Text(self.TXT, alpha='helo wrd,!', unknown='_')
156 | 
157 |     def test_alpha(self, txt):
158 |         assert txt.text == 'hello world, how _re _o_!'
159 |         assert txt.alpha == 'helo wrd,!'
160 | 
161 |     def test_encode(self, txt):
162 |         assert txt.encode('hello!') == [1, 2, 3, 3, 4, 10]
163 |         assert txt.encode('you!') == [0, 4, 0, 10]
164 | 
165 |     def test_decode(self, txt):
166 |         assert txt.decode([1, 2, 3, 3, 4, 10]) == 'hello!'
167 |         assert txt.decode([0, 4, 0, 10]) == '_o_!'
168 | 
169 |     def test_classifier_batches(self, txt):
170 |         b = txt.classifier_batches(steps=8, batch_size=5)
171 |         assert len(b()) == 2
172 |         assert b()[0].shape == (5, 8, 1 + len(txt.alpha))
173 |         assert b()[1].shape == (5, 8)
174 |         assert not np.allclose(b()[0], b()[0])
175 | 


--------------------------------------------------------------------------------
/test/regularizers_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import theanets
 3 | 
 4 | import util as u
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def exp():
 9 |     return theanets.Regressor([u.NUM_INPUTS, 20, u.NUM_OUTPUTS], rng=115)
10 | 
11 | 
12 | def assert_progress(net, data=u.REG_DATA, **kwargs):
13 |     start = best = None
14 |     for _, val in net.itertrain(
15 |             data,
16 |             algorithm='sgd',
17 |             patience=2,
18 |             min_improvement=0.01,
19 |             max_gradient_norm=1,
20 |             batch_size=u.NUM_EXAMPLES,
21 |             **kwargs):
22 |         if start is None:
23 |             start = best = val['loss']
24 |         if val['loss'] < best:
25 |             best = val['loss']
26 |     assert best < start  # should have made progress!
27 | 
28 | 
29 | def test_build_dict(exp):
30 |     regs = theanets.regularizers.from_kwargs(
31 |         exp, regularizers=dict(input_noise=0.01))
32 |     assert len(regs) == 1
33 | 
34 | 
35 | def test_build_list(exp):
36 |     reg = theanets.regularizers.Regularizer.build('weight_l2', 0.01)
37 |     regs = theanets.regularizers.from_kwargs(exp, regularizers=[reg])
38 |     assert len(regs) == 1
39 | 
40 | 
41 | @pytest.mark.parametrize('key, value', [
42 |     ('input_noise', 0.1),
43 |     ('input_dropout', 0.2),
44 |     ('hidden_noise', 0.1),
45 |     ('hidden_dropout', 0.2),
46 |     ('noise', {'*:out': 0.1}),
47 |     ('dropout', {'hid?:out': 0.2}),
48 |     ('hidden_l1', 0.1),
49 |     ('weight_l1', 0.1),
50 |     ('weight_l2', 0.01),
51 |     ('contractive', 0.01),
52 | ])
53 | def test_sgd(key, value, exp):
54 |     assert_progress(exp, **{key: value})
55 | 
56 | 
57 | class TestRNN:
58 |     @pytest.fixture
59 |     def net(self):
60 |         return theanets.recurrent.Regressor([
61 |             u.NUM_INPUTS, (u.NUM_HID1, 'rnn'), u.NUM_HID2, u.NUM_OUTPUTS])
62 | 
63 |     def test_recurrent_matching(self, net):
64 |         regs = theanets.regularizers.from_kwargs(net)
65 |         outputs, _ = net.build_graph(regs)
66 |         matches = theanets.util.outputs_matching(outputs, 'hid1:out')
67 |         assert [n for n, e in matches] == ['hid1:out']
68 | 
69 |     @pytest.mark.parametrize('key, value', [
70 |         ('recurrent_norm', dict(pattern='hid1:out', weight=0.1)),
71 |         ('recurrent_state', dict(pattern='hid1:out', weight=0.1)),
72 |     ])
73 |     def test_progress(self, key, value, net):
74 |         assert_progress(net, data=u.RNN.REG_DATA, **{key: value})
75 | 
76 |     @pytest.mark.parametrize('key, value', [
77 |         ('recurrent_norm', 0.1),
78 |         ('recurrent_state', 0.1),
79 |     ])
80 |     def test_raises(self, key, value, net):
81 |         with pytest.raises(theanets.util.ConfigurationError):
82 |             assert_progress(net, **{key: value})
83 | 


--------------------------------------------------------------------------------
/test/sparse_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import scipy.sparse
 4 | import theanets
 5 | 
 6 | import util as u
 7 | 
 8 | CSR = dict(form='input', size=u.NUM_INPUTS, sparse='csr', name='in')
 9 | CSC = dict(form='input', size=u.NUM_INPUTS, sparse='csc', name='in')
10 | REG_LAYERS = dict(csr=[CSR] + u.REG_LAYERS[1:], csc=[CSC] + u.REG_LAYERS[1:])
11 | CLF_LAYERS = dict(csr=[CSR] + u.CLF_LAYERS[1:], csc=[CSC] + u.CLF_LAYERS[1:])
12 | AE_LAYERS = dict(csr=[CSR] + u.AE_LAYERS[1:], csc=[CSC] + u.AE_LAYERS[1:])
13 | 
14 | CSR = scipy.sparse.csr_matrix(u.INPUTS)
15 | CSC = scipy.sparse.csc_matrix(u.INPUTS)
16 | INPUTS = dict(csr=CSR, csc=CSC)
17 | REG_DATA = dict(csr=[CSR] + u.REG_DATA[1:], csc=[CSC] + u.REG_DATA[1:])
18 | WREG_DATA = dict(csr=[CSR] + u.WREG_DATA[1:], csc=[CSC] + u.WREG_DATA[1:])
19 | CLF_DATA = dict(csr=[CSR] + u.CLF_DATA[1:], csc=[CSC] + u.CLF_DATA[1:])
20 | WCLF_DATA = dict(csr=[CSR] + u.WCLF_DATA[1:], csc=[CSC] + u.WCLF_DATA[1:])
21 | 
22 | 
23 | @pytest.mark.parametrize('Model, layers, sparse, weighted, data', [
24 |     (theanets.Regressor, REG_LAYERS, 'csr', True, WREG_DATA),
25 |     (theanets.Classifier, CLF_LAYERS, 'csr', True, WCLF_DATA),
26 |     (theanets.Regressor, REG_LAYERS, 'csc', True, WREG_DATA),
27 |     (theanets.Classifier, CLF_LAYERS, 'csc', True, WCLF_DATA),
28 |     (theanets.Regressor, REG_LAYERS, 'csr', False, REG_DATA),
29 |     (theanets.Classifier, CLF_LAYERS, 'csr', False, CLF_DATA),
30 |     (theanets.Regressor, REG_LAYERS, 'csc', False, REG_DATA),
31 |     (theanets.Classifier, CLF_LAYERS, 'csc', False, CLF_DATA),
32 | ])
33 | def test_sgd(Model, layers, sparse, weighted, data):
34 |     u.assert_progress(Model(layers[sparse], weighted=weighted), data[sparse])
35 | 
36 | 
37 | @pytest.mark.parametrize('Model, layers, output', [
38 |     (theanets.Regressor, u.REG_LAYERS, u.NUM_OUTPUTS),
39 |     (theanets.Classifier, u.CLF_LAYERS, (u.NUM_EXAMPLES, )),
40 |     (theanets.Autoencoder, u.AE_LAYERS, u.NUM_INPUTS),
41 | ])
42 | def test_predict(Model, layers, output):
43 |     u.assert_shape(Model(layers).predict(u.INPUTS).shape, output)
44 | 
45 | 
46 | @pytest.mark.parametrize('Model, layers, target, score', [
47 |     (theanets.Regressor, u.REG_LAYERS, u.OUTPUTS, -1.0473043918609619),
48 |     (theanets.Classifier, u.CLF_LAYERS, u.CLASSES, 0.171875),
49 |     (theanets.Autoencoder, u.AE_LAYERS, u.INPUTS, 15.108331680297852),
50 | ])
51 | def test_score(Model, layers, target, score):
52 |     assert Model(layers).score(u.INPUTS, target) == score
53 | 
54 | 
55 | @pytest.mark.parametrize('Model, layers, sparse, target', [
56 |     (theanets.Regressor, REG_LAYERS, 'csr', u.NUM_OUTPUTS),
57 |     (theanets.Classifier, CLF_LAYERS, 'csr', u.NUM_CLASSES),
58 |     (theanets.Autoencoder, AE_LAYERS, 'csr', u.NUM_INPUTS),
59 |     (theanets.Regressor, REG_LAYERS, 'csc', u.NUM_OUTPUTS),
60 |     (theanets.Classifier, CLF_LAYERS, 'csc', u.NUM_CLASSES),
61 |     (theanets.Autoencoder, AE_LAYERS, 'csc', u.NUM_INPUTS),
62 | ])
63 | def test_feed_forward(Model, layers, sparse, target):
64 |     outs = Model(layers[sparse]).feed_forward(INPUTS[sparse])
65 |     assert len(list(outs)) == 7
66 |     u.assert_shape(outs['in:out'].shape, u.NUM_INPUTS)
67 |     u.assert_shape(outs['hid1:out'].shape, u.NUM_HID1)
68 |     u.assert_shape(outs['hid2:out'].shape, u.NUM_HID2)
69 |     u.assert_shape(outs['out:out'].shape, target)
70 | 


--------------------------------------------------------------------------------
/test/trainer_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import theanets
 3 | 
 4 | import util as u
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def ae():
 9 |     return theanets.Autoencoder([
10 |         u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, u.NUM_INPUTS])
11 | 
12 | 
13 | def test_downhill(ae):
14 |     # this really tests that interaction with downhill works.
15 |     u.assert_progress(ae, u.AE_DATA)
16 | 
17 | 
18 | def test_layerwise(ae):
19 |     u.assert_progress(ae, u.AE_DATA, algo='layerwise')
20 | 
21 | 
22 | def test_layerwise_tied():
23 |     ae = theanets.Autoencoder([
24 |         u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2,
25 |         (u.NUM_HID1, 'tied'), (u.NUM_INPUTS, 'tied')])
26 |     u.assert_progress(ae, u.AE_DATA, algo='layerwise')
27 | 
28 | 
29 | def test_sample(ae):
30 |     trainer = ae.itertrain(u.AE_DATA, algo='sample')
31 |     train0, valid0 = next(trainer)
32 |     # for this trainer, we don't measure the loss.
33 |     assert train0['loss'] == 0 == valid0['loss']
34 | 
35 | 
36 | def test_unsupervised_pretrainer():
37 |     u.assert_progress(
38 |         theanets.Experiment(theanets.Classifier, u.CLF_LAYERS),
39 |         u.AE_DATA, algo='pretrain')
40 | 


--------------------------------------------------------------------------------
/test/util.py:
--------------------------------------------------------------------------------
 1 | '''Helper code for theanets unit tests.'''
 2 | 
 3 | import numpy as np
 4 | 
 5 | np.random.seed(13)
 6 | 
 7 | NUM_EXAMPLES = 64
 8 | NUM_INPUTS = 7
 9 | NUM_HID1 = 8
10 | NUM_HID2 = 12
11 | NUM_OUTPUTS = 5
12 | NUM_CLASSES = 6
13 | 
14 | INPUTS = np.random.randn(NUM_EXAMPLES, NUM_INPUTS).astype('f')
15 | INPUT_WEIGHTS = abs(np.random.randn(NUM_EXAMPLES, NUM_INPUTS)).astype('f')
16 | OUTPUTS = np.random.randn(NUM_EXAMPLES, NUM_OUTPUTS).astype('f')
17 | OUTPUT_WEIGHTS = abs(np.random.randn(NUM_EXAMPLES, NUM_OUTPUTS)).astype('f')
18 | CLASSES = np.random.randint(NUM_CLASSES, size=NUM_EXAMPLES).astype('i')
19 | CLASS_WEIGHTS = abs(np.random.rand(NUM_EXAMPLES)).astype('f')
20 | 
21 | AE_DATA = [INPUTS]
22 | WAE_DATA = [INPUTS, INPUT_WEIGHTS]
23 | AE_LAYERS = [NUM_INPUTS, NUM_HID1, NUM_HID2, NUM_INPUTS]
24 | 
25 | CLF_DATA = [INPUTS, CLASSES]
26 | WCLF_DATA = [INPUTS, CLASSES, CLASS_WEIGHTS]
27 | CLF_LAYERS = [NUM_INPUTS, NUM_HID1, NUM_HID2, NUM_CLASSES]
28 | 
29 | REG_DATA = [INPUTS, OUTPUTS]
30 | WREG_DATA = [INPUTS, OUTPUTS, OUTPUT_WEIGHTS]
31 | REG_LAYERS = [NUM_INPUTS, NUM_HID1, NUM_HID2, NUM_OUTPUTS]
32 | 
33 | 
34 | class RNN:
35 |     NUM_TIMES = 31
36 | 
37 |     INPUTS = np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_INPUTS).astype('f')
38 |     INPUT_WEIGHTS = abs(
39 |         np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_INPUTS)).astype('f')
40 |     OUTPUTS = np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_OUTPUTS).astype('f')
41 |     OUTPUT_WEIGHTS = abs(
42 |         np.random.randn(NUM_EXAMPLES, NUM_TIMES, NUM_OUTPUTS)).astype('f')
43 |     CLASSES = np.random.randn(NUM_EXAMPLES, NUM_TIMES).astype('i')
44 |     CLASS_WEIGHTS = abs(np.random.rand(NUM_EXAMPLES, NUM_TIMES)).astype('f')
45 | 
46 |     AE_DATA = [INPUTS]
47 |     WAE_DATA = [INPUTS, INPUT_WEIGHTS]
48 | 
49 |     CLF_DATA = [INPUTS, CLASSES]
50 |     WCLF_DATA = [INPUTS, CLASSES, CLASS_WEIGHTS]
51 | 
52 |     REG_DATA = [INPUTS, OUTPUTS]
53 |     WREG_DATA = [INPUTS, OUTPUTS, OUTPUT_WEIGHTS]
54 | 
55 | 
56 | class CNN:
57 |     NUM_WIDTH = 13
58 |     NUM_HEIGHT = 15
59 | 
60 |     FILTER_WIDTH = 4
61 |     FILTER_HEIGHT = 3
62 |     FILTER_SIZE = (FILTER_WIDTH, FILTER_HEIGHT)
63 | 
64 |     INPUTS = np.random.randn(
65 |         NUM_EXAMPLES, NUM_WIDTH, NUM_HEIGHT, NUM_INPUTS).astype('f')
66 | 
67 |     CLF_DATA = [INPUTS, CLASSES]
68 |     WCLF_DATA = [INPUTS, CLASSES, CLASS_WEIGHTS]
69 | 
70 |     REG_DATA = [INPUTS, OUTPUTS]
71 |     WREG_DATA = [INPUTS, OUTPUTS, OUTPUT_WEIGHTS]
72 | 
73 | 
74 | def assert_progress(model, data, algo='sgd'):
75 |     trainer = model.itertrain(
76 |         data, algo=algo, momentum=0.5, batch_size=3, max_gradient_norm=1)
77 |     train0, valid0 = next(trainer)
78 |     train1, valid1 = next(trainer)
79 |     assert train1['loss'] < valid0['loss']   # should have made progress!
80 |     assert valid1['loss'] == valid0['loss']  # no new validation occurred
81 | 
82 | 
83 | def assert_shape(actual, expected):
84 |     if not isinstance(expected, tuple):
85 |         expected = (NUM_EXAMPLES, expected)
86 |     assert actual == expected
87 | 


--------------------------------------------------------------------------------
/test/util_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import theanets
 3 | import theanets.util
 4 | 
 5 | 
 6 | class TestRandomMatrix:
 7 |     def test_sparsity(self):
 8 |         x = theanets.util.random_matrix(1000, 200, sparsity=0.1, rng=5)
 9 |         assert x.shape == (1000, 200)
10 |         assert np.allclose(x.mean(), 0, atol=1e-2), x.mean()
11 |         assert np.allclose(x.std(), 0.95, atol=1e-2), x.std()
12 |         assert np.allclose((x == 0).mean(), 0.1, atol=1e-1), (x == 0).mean()
13 | 
14 |     def test_diagonal(self):
15 |         x = theanets.util.random_matrix(1000, 200, diagonal=0.9, rng=4)
16 |         assert x.shape == (1000, 200)
17 |         assert np.allclose(np.diag(x), 0.9), np.diag(x)
18 |         assert x.sum() == 180, x.sum()
19 | 
20 |     def test_radius(self):
21 |         x = theanets.util.random_matrix(1000, 200, radius=2, rng=4)
22 |         assert x.shape == (1000, 200)
23 |         u, s, vT = np.linalg.svd(x)
24 |         assert s[0] == 2, s
25 |         assert s[1] < 2
26 | 
27 | 
28 | class TestRandomVector:
29 |     def test_rng(self):
30 |         x = theanets.util.random_vector(10000, rng=4)
31 |         assert x.shape == (10000, )
32 |         assert np.allclose(x.mean(), 0, atol=1e-2)
33 |         assert np.allclose(x.std(), 1, atol=1e-2)
34 | 
35 | 
36 | class TestMatching:
37 |     def test_params_matching(self):
38 |         net = theanets.Autoencoder([10, 20, 30, 10])
39 | 
40 |         match = sorted(theanets.util.params_matching(net.layers, '*'))
41 |         assert len(match) == 6
42 |         assert [n for n, _ in match] == [
43 |             'hid1.b', 'hid1.w', 'hid2.b', 'hid2.w', 'out.b', 'out.w']
44 | 
45 |         match = sorted(theanets.util.params_matching(net.layers, '*.w'))
46 |         assert len(match) == 3
47 |         assert [n for n, _ in match] == ['hid1.w', 'hid2.w', 'out.w']
48 | 
49 |         match = sorted(theanets.util.params_matching(net.layers, 'o*.?'))
50 |         assert len(match) == 2
51 |         assert [n for n, _ in match] == ['out.b', 'out.w']
52 | 
53 |     def test_outputs_matching(self):
54 |         outputs, _ = theanets.Autoencoder([10, 20, 30, 10]).build_graph()
55 | 
56 |         match = sorted(theanets.util.outputs_matching(outputs, '*'))
57 |         assert len(match) == 7
58 |         assert [n for n, _ in match] == [
59 |             'hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre',
60 |             'in:out', 'out:out', 'out:pre']
61 | 
62 |         match = sorted(theanets.util.outputs_matching(outputs, 'hid?:*'))
63 |         assert len(match) == 4
64 |         assert [n for n, _ in match] == [
65 |             'hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre']
66 | 
67 |         match = sorted(theanets.util.outputs_matching(outputs, '*:pre'))
68 |         assert len(match) == 3
69 |         assert [n for n, _ in match] == ['hid1:pre', 'hid2:pre', 'out:pre']
70 | 


--------------------------------------------------------------------------------
/theanets/__init__.py:
--------------------------------------------------------------------------------
 1 | '''This package groups together a bunch of Theano code for neural nets.'''
 2 | 
 3 | from .activations import Activation
 4 | from .feedforward import Autoencoder, Regressor, Classifier
 5 | from .graph import Network
 6 | from .layers import Layer
 7 | from .losses import Loss
 8 | from .main import Experiment
 9 | from .regularizers import Regularizer
10 | from .util import log
11 | 
12 | from . import convolution
13 | from . import recurrent
14 | from . import regularizers
15 | 
16 | __version__ = '0.8.0pre'
17 | 


--------------------------------------------------------------------------------
/theanets/activations.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | r'''Activation functions for network layers.
  4 | 
  5 | Activation functions are normally constructed using the :func:`build` function.
  6 | Commonly available functions are:
  7 | 
  8 | - "linear"
  9 | - "logistic" (or "sigmoid")
 10 | - "tanh"
 11 | - "softmax" (typically used for :class:`classifier <theanets.feedforward.Classifier>`
 12 |   output layers)
 13 | - "relu" (or "rect:max")
 14 | - "rect:min"
 15 | - "rect:minmax"
 16 | - "softplus" (continuous approximation of "relu")
 17 | - "norm:mean": subtractive (mean) batch normalization
 18 | - "norm:max": divisive (max) batch normalization
 19 | - "norm:std": divisive (standard deviation) batch normalization
 20 | - "norm:z": z-score batch normalization
 21 | 
 22 | Additionally, the names of all classes defined in this module can be used as
 23 | keys when building an activation function.
 24 | '''
 25 | 
 26 | import functools
 27 | import numpy as np
 28 | import theano
 29 | import theano.tensor as TT
 30 | 
 31 | from . import util
 32 | 
 33 | 
 34 | def _identity(x): return x
 35 | 
 36 | 
 37 | def _relu(x): return (x + abs(x)) / 2
 38 | 
 39 | 
 40 | def _trel(x): return (x + 1 - abs(x - 1)) / 2
 41 | 
 42 | 
 43 | def _rect(x): return (abs(x) + 1 - abs(x - 1)) / 2
 44 | 
 45 | 
 46 | def _norm_mean(x): return x - x.mean(axis=-1, keepdims=True)
 47 | 
 48 | 
 49 | def _norm_max(x): return x / (abs(x).max(axis=-1, keepdims=True) + 1e-8)
 50 | 
 51 | 
 52 | def _norm_std(x): return x / (x.std(axis=-1, keepdims=True) + 1e-8)
 53 | 
 54 | 
 55 | def _norm_z(x): return ((x - x.mean(axis=-1, keepdims=True)) /
 56 |                         (x.std(axis=-1, keepdims=True) + 1e-8))
 57 | 
 58 | 
 59 | def _softmax(x):
 60 |     z = TT.exp(x - x.max(axis=-1, keepdims=True))
 61 |     return z / z.sum(axis=-1, keepdims=True)
 62 | 
 63 | 
 64 | COMMON = {
 65 |     # s-shaped
 66 |     'tanh':        TT.tanh,
 67 |     'logistic':    TT.nnet.sigmoid,
 68 |     'sigmoid':     TT.nnet.sigmoid,
 69 | 
 70 |     # softmax (typically for classification)
 71 |     'softmax':     _softmax,
 72 | 
 73 |     # linear variants
 74 |     'linear':      _identity,
 75 |     'softplus':    TT.nnet.softplus,
 76 |     'relu':        _relu,
 77 |     'rect:max':    _relu,
 78 |     'rect:min':    _trel,
 79 |     'rect:minmax': _rect,
 80 | 
 81 |     # batch normalization
 82 |     'norm:mean':   _norm_mean,
 83 |     'norm:max':    _norm_max,
 84 |     'norm:std':    _norm_std,
 85 |     'norm:z':      _norm_z,
 86 | }
 87 | 
 88 | 
 89 | def build(name, layer, **kwargs):
 90 |     '''Construct an activation function by name.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     name : str or :class:`Activation`
 95 |         The name of the type of activation function to build, or an
 96 |         already-created instance of an activation function.
 97 |     layer : :class:`theanets.layers.Layer`
 98 |         The layer to which this activation will be applied.
 99 |     kwargs : dict
100 |         Additional named arguments to pass to the activation constructor.
101 | 
102 |     Returns
103 |     -------
104 |     activation : :class:`Activation`
105 |         A neural network activation function instance.
106 |     '''
107 |     if isinstance(name, Activation):
108 |         return name
109 | 
110 |     if '+' in name:
111 |         return functools.reduce(
112 |             Compose, (build(n, layer, **kwargs) for n in name.split('+')))
113 | 
114 |     act = COMMON.get(name)
115 |     if act is not None:
116 |         act.name = name
117 |         act.params = []
118 |         return act
119 | 
120 |     if name.lower().startswith('maxout') and ':' in name:
121 |         name, pieces = name.split(':', 1)
122 |         kwargs['pieces'] = int(pieces)
123 |     kwargs['name'] = name
124 |     kwargs['layer'] = layer
125 |     return Activation.build(name, **kwargs)
126 | 
127 | 
128 | class Activation(util.Registrar(str('Base'), (), {})):
129 |     '''An activation function for a neural network layer.
130 | 
131 |     Parameters
132 |     ----------
133 |     name : str
134 |         Name of this activation function.
135 |     layer : :class:`Layer`
136 |         The layer to which this function is applied.
137 | 
138 |     Attributes
139 |     ----------
140 |     name : str
141 |         Name of this activation function.
142 |     layer : :class:`Layer`
143 |         The layer to which this function is applied.
144 |     '''
145 | 
146 |     def __init__(self, name, layer, **kwargs):
147 |         self.name = name
148 |         self.layer = layer
149 |         self.kwargs = kwargs
150 |         self.params = []
151 | 
152 |     def __call__(self, x):
153 |         '''Compute a symbolic expression for this activation function.
154 | 
155 |         Parameters
156 |         ----------
157 |         x : Theano expression
158 |             A Theano expression representing the input to this activation
159 |             function.
160 | 
161 |         Returns
162 |         -------
163 |         y : Theano expression
164 |             A Theano expression representing the output from this activation
165 |             function.
166 |         '''
167 |         raise NotImplementedError
168 | 
169 | 
170 | class Compose(Activation):
171 |     r'''Compose two activation functions.'''
172 | 
173 |     def __init__(self, f, g):
174 |         self.f = f
175 |         self.g = g
176 |         self.name = '{}({})'.format(g.name, f.name)
177 |         self.layer = None
178 |         self.kwargs = {}
179 |         self.params = getattr(g, 'params', []) + getattr(f, 'params', [])
180 | 
181 |     def __call__(self, x):
182 |         return self.g(self.f(x))
183 | 
184 | 
185 | class Prelu(Activation):
186 |     r'''Parametric rectified linear activation with learnable leak rate.
187 | 
188 |     This activation is characterized by two linear pieces joined at the origin.
189 |     For negative inputs, the unit response is a linear function of the input
190 |     with slope :math:`r` (the "leak rate"). For positive inputs, the unit
191 |     response is the identity function:
192 | 
193 |     .. math::
194 |        f(x) = \left\{ \begin{eqnarray*} rx &\qquad& \mbox{if } x < 0 \\
195 |                       x &\qquad& \mbox{otherwise} \end{eqnarray*} \right.
196 | 
197 |     This activation allocates a separate leak rate for each unit in its layer.
198 | 
199 |     References
200 |     ----------
201 |     K He, X Zhang, S Ren, J Sun (2015), "Delving Deep into Rectifiers:
202 |     Surpassing Human-Level Performance on ImageNet Classification"
203 |     http://arxiv.org/abs/1502.01852
204 |     '''
205 | 
206 |     __extra_registration_keys__ = ['leaky-relu']
207 | 
208 |     def __init__(self, *args, **kwargs):
209 |         super(Prelu, self).__init__(*args, **kwargs)
210 |         arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT)
211 |         self.leak = theano.shared(0.1 * abs(arr), name=self.layer._fmt('leak'))
212 |         self.params.append(self.leak)
213 | 
214 |     def __call__(self, x):
215 |         return (x + abs(x)) / 2 + TT.exp(self.leak) * (x - abs(x)) / 2
216 | 
217 | 
218 | class LGrelu(Activation):
219 |     r'''Rectified linear activation with learnable leak rate and gain.
220 | 
221 |     This activation is characterized by two linear pieces joined at the origin.
222 |     For negative inputs, the unit response is a linear function of the input
223 |     with slope :math:`r` (the "leak rate"). For positive inputs, the unit
224 |     response is a different linear function of the input with slope :math:`g`
225 |     (the "gain"):
226 | 
227 |     .. math::
228 |        f(x) = \left\{ \begin{eqnarray*} rx &\qquad& \mbox{if } x < 0 \\
229 |                        gx &\qquad& \mbox{otherwise} \end{eqnarray*} \right.
230 | 
231 |     This activation allocates a separate leak and gain rate for each unit in its
232 |     layer.
233 |     '''
234 | 
235 |     __extra_registration_keys__ = ['leaky-gain-relu']
236 | 
237 |     def __init__(self, *args, **kwargs):
238 |         super(LGrelu, self).__init__(*args, **kwargs)
239 |         arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT)
240 |         self.gain = theano.shared(0.1 * abs(arr), name=self.layer._fmt('gain'))
241 |         self.params.append(self.gain)
242 |         arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT)
243 |         self.leak = theano.shared(0.1 * abs(arr), name=self.layer._fmt('leak'))
244 |         self.params.append(self.leak)
245 | 
246 |     def __call__(self, x):
247 |         return TT.exp(self.gain) * (x + abs(x)) / 2 + TT.exp(self.leak) * (x - abs(x)) / 2
248 | 
249 | 
250 | class Elu(Activation):
251 |     r'''Exponential linear activation with learnable gain.
252 | 
253 |     This activation is characterized by two pieces joined at the origin. For
254 |     negative inputs, the unit response is a decaying exponential function of the
255 |     input with saturation :math:`\alpha`. For positive inputs, the unit response
256 |     is the identity linear function of the input:
257 | 
258 |     .. math::
259 |        f(x) = \left\{ \begin{eqnarray*} \alpha (exp(x) - 1) &\qquad& \mbox{if } x < 0 \\
260 |                        x &\qquad& \mbox{otherwise} \end{eqnarray*} \right.
261 | 
262 |     This activation allocates a separate gain for each unit in its layer.
263 |     '''
264 | 
265 |     __extra_registration_keys__ = []
266 | 
267 |     def __init__(self, *args, **kwargs):
268 |         super(Elu, self).__init__(*args, **kwargs)
269 |         arr = self.layer.rng.randn(self.layer.output_size).astype(util.FLOAT)
270 |         self.gain = theano.shared(0.1 * abs(arr), name=self.layer._fmt('gain'))
271 |         self.params.append(self.gain)
272 | 
273 |     def __call__(self, x):
274 |         return x * (x >= 0) + TT.exp(self.gain) * (TT.exp(x) - 1) * (x < 0)
275 | 
276 | 
277 | class Maxout(Activation):
278 |     r'''Arbitrary piecewise linear activation.
279 | 
280 |     This activation is unusual in that it requires a parameter at initialization
281 |     time: the number of linear pieces to use. Consider a layer for the moment
282 |     with just one unit. A maxout activation with :math:`k` pieces uses a slope
283 |     :math:`m_k` and an intercept :math:`b_k` for each linear piece. It then
284 |     transforms the input to the maximum of all of the pieces:
285 | 
286 |     .. math::
287 |        f(x) = \max_k m_k x + b_k
288 | 
289 |     The parameters :math:`m_k` and :math:`b_k` are learnable.
290 | 
291 |     For layers with more than one unit, the maxout activation allocates a slope
292 |     :math:`m_{ki}` and intercept :math:`b_{ki}` for each unit :math:`i` and each
293 |     piece :math:`k`. The activation for unit :math:`x_i` is:
294 | 
295 |     .. math::
296 |        f(x_i) = \max_k m_{ki} x_i + b_{ki}
297 | 
298 |     Again, the slope and intercept parameters are learnable.
299 | 
300 |     This activation is actually a generalization of the rectified linear
301 |     activations; to see how, just allocate 2 pieces and set the intercepts to 0.
302 |     The slopes of the ``relu`` activation are given by :math:`m = (0, 1)`, those
303 |     of the :class:`Prelu` function are given by :math:`m = (r, 1)`, and those of
304 |     the :class:`LGrelu` are given by :math:`m = (r, g)` where :math:`r` is the
305 |     leak rate parameter and :math:`g` is a gain parameter.
306 | 
307 |     .. note::
308 | 
309 |        To use this activation in a network layer specification, provide an
310 |        activation string of the form ``'maxout:k'``, where ``k`` is an integer
311 |        giving the number of piecewise functions.
312 | 
313 |        For example, the layer tuple ``(100, 'rnn', 'maxout:10')`` specifies a
314 |        vanilla :class:`RNN <theanets.layers.recurrent.RNN>` layer with 100 units
315 |        and a maxout activation with 10 pieces.
316 | 
317 |     Parameters
318 |     ----------
319 |     pieces : int
320 |         Number of linear pieces to use in the activation.
321 |     '''
322 | 
323 |     def __init__(self, *args, **kwargs):
324 |         super(Maxout, self).__init__(*args, **kwargs)
325 | 
326 |         self.pieces = kwargs['pieces']
327 | 
328 |         m = self.layer.rng.randn(self.layer.output_size, self.pieces).astype(util.FLOAT)
329 |         self.slope = theano.shared(m, name=self.layer._fmt('slope'))
330 |         self.params.append(self.slope)
331 | 
332 |         b = self.layer.rng.randn(self.layer.output_size, self.pieces).astype(util.FLOAT)
333 |         self.intercept = theano.shared(b, name=self.layer._fmt('intercept'))
334 |         self.params.append(self.intercept)
335 | 
336 |     def __call__(self, x):
337 |         dims = list(range(x.ndim)) + ['x']
338 |         return (x.dimshuffle(*dims) * self.slope + self.intercept).max(axis=-1)
339 | 


--------------------------------------------------------------------------------
/theanets/convolution.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | '''This module contains convolution network structures.'''
 4 | 
 5 | from . import feedforward
 6 | 
 7 | 
 8 | class Regressor(feedforward.Regressor):
 9 |     '''A regressor attempts to produce a target output.'''
10 | 
11 |     INPUT_NDIM = 4
12 |     '''Number of dimensions for holding input data arrays.'''
13 | 
14 | 
15 | class Classifier(feedforward.Classifier):
16 |     '''A classifier attempts to match a 1-hot target output.'''
17 | 
18 |     INPUT_NDIM = 4
19 |     '''Number of dimensions for holding input data arrays.'''
20 | 


--------------------------------------------------------------------------------
/theanets/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import *
2 | from .feedforward import *
3 | from .convolution import *
4 | from .recurrent import *
5 | 


--------------------------------------------------------------------------------
/theanets/layers/convolution.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | '''Convolutional layers "scan" over input data.'''
  4 | 
  5 | from __future__ import division
  6 | 
  7 | import numpy as np
  8 | import theano
  9 | import theano.tensor as TT
 10 | 
 11 | from . import base
 12 | from .. import util
 13 | 
 14 | __all__ = [
 15 |     'Conv1',
 16 |     'Conv2',
 17 |     'Pool1',
 18 |     'Pool2',
 19 | ]
 20 | 
 21 | 
 22 | class Convolution(base.Layer):
 23 |     '''Convolution layers convolve filters over the input arrays.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     filter_size : (int, int)
 28 |         Size of the convolution filters for this layer.
 29 |     stride : (int, int), optional
 30 |         Apply convolutions with this stride; i.e., skip this many samples
 31 |         between convolutions. Defaults to (1, 1)---that is, no skipping.
 32 |     border_mode : str, optional
 33 |         Compute convolutions with this border mode. Defaults to 'valid'.
 34 |     '''
 35 | 
 36 |     def __init__(self, filter_size, stride=(1, 1), border_mode='valid', **kwargs):
 37 |         self.filter_size = filter_size
 38 |         self.stride = stride
 39 |         self.border_mode = border_mode
 40 |         super(Convolution, self).__init__(**kwargs)
 41 | 
 42 |     def log(self):
 43 |         inputs = ', '.join('"{0}" {1}'.format(*ns) for ns in self._input_shapes.items())
 44 |         util.log('layer {0.__class__.__name__} "{0.name}" '
 45 |                  '{0.output_shape} {1} {0.border_mode} '
 46 |                  'filters {2}{3} from {4}', self,
 47 |                  getattr(self.activate, 'name', self.activate),
 48 |                  'x'.join(str(i) for i in self.filter_size),
 49 |                  ''.join('+{}'.format(i) for i in self.stride),
 50 |                  inputs)
 51 |         util.log('learnable parameters: {}', self.log_params())
 52 | 
 53 |     def add_conv_weights(self, name, mean=0, std=None, sparsity=0):
 54 |         '''Add a convolutional weight array to this layer's parameters.
 55 | 
 56 |         Parameters
 57 |         ----------
 58 |         name : str
 59 |             Name of the parameter to add.
 60 |         mean : float, optional
 61 |             Mean value for randomly-initialized weights. Defaults to 0.
 62 |         std : float, optional
 63 |             Standard deviation of initial matrix values. Defaults to
 64 |             :math:`1 / sqrt(n_i + n_o)`.
 65 |         sparsity : float, optional
 66 |             Fraction of weights to set to zero. Defaults to 0.
 67 |         '''
 68 |         nin = self.input_size
 69 |         nout = self.output_size
 70 |         mean = self.kwargs.get(
 71 |             'mean_{}'.format(name),
 72 |             self.kwargs.get('mean', mean))
 73 |         std = self.kwargs.get(
 74 |             'std_{}'.format(name),
 75 |             self.kwargs.get('std', std or 1 / np.sqrt(nin + nout)))
 76 |         sparsity = self.kwargs.get(
 77 |             'sparsity_{}'.format(name),
 78 |             self.kwargs.get('sparsity', sparsity))
 79 |         arr = np.zeros((nout, nin) + self.filter_size, util.FLOAT)
 80 |         for r in range(self.filter_size[0]):
 81 |             for c in range(self.filter_size[1]):
 82 |                 arr[:, :, r, c] = util.random_matrix(
 83 |                     nout, nin, mean, std, sparsity=sparsity, rng=self.rng)
 84 |         self._params.append(theano.shared(arr, name=self._fmt(name)))
 85 | 
 86 | 
 87 | class Conv1(Convolution):
 88 |     '''1-dimensional convolutions run over one data axis.
 89 | 
 90 |     Notes
 91 |     -----
 92 | 
 93 |     One-dimensional convolution layers are typically used in ``theanets`` models
 94 |     that use recurrent inputs and outputs, i.e.,
 95 |     :class:`theanets.recurrent.Autoencoder`,
 96 |     :class:`theanets.recurrent.Predictor`,
 97 |     :class:`theanets.recurrent.Classifier`, or
 98 |     :class:`theanets.recurrent.Regressor`.
 99 | 
100 |     The convolution will be applied over the "time" dimension (axis 1).
101 | 
102 |     Parameters
103 |     ----------
104 |     filter_size : int
105 |         Length of the convolution filters for this layer.
106 |     stride : int, optional
107 |         Apply convolutions with this stride; i.e., skip this many samples
108 |         between convolutions. Defaults to 1, i.e., no skipping.
109 |     border_mode : str, optional
110 |         Compute convolutions with this border mode. Defaults to 'valid'.
111 |     '''
112 | 
113 |     def __init__(self, filter_size, stride=1, border_mode='valid', **kwargs):
114 |         super(Conv1, self).__init__(
115 |             filter_size=(1, filter_size),
116 |             stride=(1, stride),
117 |             border_mode=border_mode,
118 |             **kwargs)
119 | 
120 |     def setup(self):
121 |         self.add_conv_weights('w')
122 |         self.add_bias('b', self.output_size)
123 | 
124 |     def resolve_outputs(self):
125 |         if self.input_shape is None or self.input_shape[0] is None:
126 |             return super(Conv1, self).resolve_outputs()
127 |         image = np.array(self.input_shape[:-1])
128 |         kernel = np.array(self.filter_size)
129 |         result = image
130 |         if self.border_mode == 'full':
131 |             result = image + kernel - 1
132 |         if self.border_mode == 'valid':
133 |             result = image - kernel + 1
134 |         self._output_shapes['out'] = tuple(result) + (self.kwargs['size'], )
135 | 
136 |     def transform(self, inputs):
137 |         # input is:     (batch, time, input)
138 |         # conv2d wants: (batch, input, 1, time)
139 |         x = inputs[self.input_name].dimshuffle(0, 2, 'x', 1)
140 | 
141 |         pre = TT.nnet.conv2d(
142 |             x,
143 |             self.find('w'),
144 |             image_shape=(None, self.input_size, 1, None),
145 |             filter_shape=(self.output_size, self.input_size) + self.filter_size,
146 |             border_mode=self.border_mode,
147 |             subsample=self.stride,
148 |         ).dimshuffle(0, 3, 1, 2)[:, :, :, 0] + self.find('b')
149 |         # conv2d output is: (batch, output, 1, time)
150 |         # we want:          (batch, time, output)
151 |         # (have to do [:, :, :, 0] to remove unused trailing dimension)
152 | 
153 |         return dict(pre=pre, out=self.activate(pre)), []
154 | 
155 | 
156 | class Conv2(Convolution):
157 |     '''2-dimensional convolutions run over two data axes.
158 | 
159 |     Two-dimensional convolution layers are standard image processing techniques.
160 |     In theanets, these layers expect an input consisting of (num-examples,
161 |     width, height, num-channels).
162 | 
163 |     Parameters
164 |     ----------
165 |     filter_size : (int, int)
166 |         Size of the convolution filters for this layer.
167 |     stride : (int, int), optional
168 |         Apply convolutions with this stride; i.e., skip this many samples
169 |         between convolutions. Defaults to (1, 1), i.e., no skipping.
170 |     border_mode : str, optional
171 |         Compute convolutions with this border mode. Defaults to 'valid'.
172 |     '''
173 | 
174 |     def setup(self):
175 |         self.add_conv_weights('w')
176 |         self.add_bias('b', self.output_size)
177 | 
178 |     def resolve_outputs(self):
179 |         shape = self.input_shape
180 |         if shape is None or shape[0] is None or shape[1] is None:
181 |             return super(Conv2, self).resolve_outputs()
182 |         image = np.array(shape[:-1])
183 |         kernel = np.array(self.filter_size)
184 |         result = image
185 |         if self.border_mode == 'full':
186 |             result = image + kernel - 1
187 |         if self.border_mode == 'valid':
188 |             result = image - kernel + 1
189 |         self._output_shapes['out'] = tuple(result) + (self.kwargs['size'], )
190 | 
191 |     def transform(self, inputs):
192 |         # input is:     (batch, width, height, input)
193 |         # conv2d wants: (batch, input, width, height)
194 |         x = inputs[self.input_name].dimshuffle(0, 3, 1, 2)
195 | 
196 |         pre = TT.nnet.conv2d(
197 |             x,
198 |             self.find('w'),
199 |             image_shape=(None, self.input_size, None, None),
200 |             filter_shape=(self.output_size, self.input_size) + self.filter_size,
201 |             border_mode=self.border_mode,
202 |             subsample=self.stride,
203 |         ).dimshuffle(0, 2, 3, 1) + self.find('b')
204 |         # conv2d output is: (batch, output, width, height)
205 |         # we want:          (batch, width, height, output)
206 | 
207 |         return dict(pre=pre, out=self.activate(pre)), []
208 | 
209 | 
210 | class Pooling(base.Layer):
211 |     '''
212 |     '''
213 | 
214 | 
215 | class Pool1(Pooling):
216 |     '''
217 |     '''
218 | 
219 |     def transform(self, inputs):
220 |         # input is:     (batch, time, input)
221 |         # conv2d wants: (batch, input, time, 1)
222 |         x = inputs[self.input_name].dimshuffle(0, 2, 1, 'x')
223 | 
224 |         pre = TT.signal.downsample.max_pool_2d(
225 |             x, self.pool_size, st=self.stride, mode=self.mode,
226 |         ).dimshuffle(0, 2, 1, 3)[:, :, :, 0]
227 |         # conv2d output is: (batch, output, time, 1)
228 |         # we want:          (batch, time, output)
229 | 
230 |         return dict(pre=pre, out=self.activate(pre)), []
231 | 
232 | 
233 | class Pool2(Pooling):
234 |     '''
235 |     '''
236 | 
237 |     def transform(self, inputs):
238 |         # input is:     (batch, width, height, input)
239 |         # conv2d wants: (batch, input, width, height)
240 |         x = inputs[self.input_name].dimshuffle(0, 3, 1, 2)
241 | 
242 |         pre = TT.signal.downsample.max_pool_2d(
243 |             x, self.pool_size, st=self.stride, mode=self.mode,
244 |         ).dimshuffle(0, 2, 3, 1)
245 |         # conv2d output is: (batch, output, width, height)
246 |         # we want:          (batch, width, height, output)
247 | 
248 |         return dict(pre=pre, out=self.activate(pre)), []
249 | 


--------------------------------------------------------------------------------
/theanets/layers/feedforward.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | r'''Feedforward layers for neural network computation graphs.'''
  4 | 
  5 | from __future__ import division
  6 | 
  7 | import numpy as np
  8 | import theano.sparse as SS
  9 | import theano.tensor as TT
 10 | 
 11 | from . import base
 12 | from .. import util
 13 | 
 14 | __all__ = [
 15 |     'Classifier',
 16 |     'Feedforward',
 17 |     'Tied',
 18 | ]
 19 | 
 20 | 
 21 | class Feedforward(base.Layer):
 22 |     '''A feedforward neural network layer performs a transform of its input.
 23 | 
 24 |     More precisely, feedforward layers as implemented here perform an affine
 25 |     transformation of their input, followed by a potentially nonlinear
 26 |     :ref:`activation function <activations>` performed elementwise on the
 27 |     transformed input.
 28 | 
 29 |     Feedforward layers are the fundamental building block on which most neural
 30 |     network models are built.
 31 | 
 32 |     Notes
 33 |     -----
 34 | 
 35 |     This layer can be constructed using the forms ``'feedforward'`` or ``'ff'``.
 36 | 
 37 |     *Parameters*
 38 | 
 39 |     - With one input:
 40 | 
 41 |       - ``b`` --- bias
 42 |       - ``w`` --- weights
 43 | 
 44 |     - With :math:`N>1` inputs:
 45 | 
 46 |       - ``b`` --- bias
 47 |       - ``w_1`` --- weight for input 1
 48 |       - ``w_2`` ...
 49 |       - ``w_N`` --- weight for input :math:`N`
 50 | 
 51 |     *Outputs*
 52 | 
 53 |     - ``out`` --- the post-activation state of the layer
 54 |     - ``pre`` --- the pre-activation state of the layer
 55 |     '''
 56 | 
 57 |     __extra_registration_keys__ = ['ff']
 58 | 
 59 |     def _weight_for_input(self, name):
 60 |         return 'w' if len(self._input_shapes) == 1 else 'w_{}'.format(name)
 61 | 
 62 |     def transform(self, inputs):
 63 |         def _dot(x, y):
 64 |             if isinstance(x, SS.SparseVariable):
 65 |                 return SS.structured_dot(x, y)
 66 |             else:
 67 |                 return TT.dot(x, y)
 68 | 
 69 |         xws = ((inputs[name], self.find(self._weight_for_input(name)))
 70 |                for name in self._input_shapes)
 71 |         pre = sum(_dot(x, w) for x, w in xws) + self.find('b')
 72 |         return dict(pre=pre, out=self.activate(pre)), []
 73 | 
 74 |     def setup(self):
 75 |         for name, shape in self._input_shapes.items():
 76 |             label = self._weight_for_input(name)
 77 |             self.add_weights(label, shape[-1], self.output_size)
 78 |         self.add_bias('b', self.output_size)
 79 | 
 80 | 
 81 | class Classifier(Feedforward):
 82 |     '''A classifier layer performs a softmax over a linear input transform.
 83 | 
 84 |     Classifier layers are typically the "output" layer of a classifier network.
 85 | 
 86 |     This layer type really only wraps the output activation of a standard
 87 |     :class:`Feedforward` layer.
 88 | 
 89 |     Notes
 90 |     -----
 91 | 
 92 |     The classifier layer is just a vanilla :class:`Feedforward` layer that uses
 93 |     a ``'softmax'`` output :ref:`activation <activations>`.
 94 |     '''
 95 | 
 96 |     __extra_registration_keys__ = ['softmax']
 97 | 
 98 |     def __init__(self, **kwargs):
 99 |         kwargs['activation'] = 'softmax'
100 |         super(Classifier, self).__init__(**kwargs)
101 | 
102 | 
103 | class Tied(base.Layer):
104 |     '''A tied-weights feedforward layer shadows weights from another layer.
105 | 
106 |     Notes
107 |     -----
108 | 
109 |     Tied weights are typically featured in some types of autoencoder models
110 |     (e.g., PCA). A layer with tied weights requires a "partner" layer -- the
111 |     tied layer borrows the weights from its partner and uses the transpose of
112 |     them to perform its feedforward mapping. Thus, tied layers do not have their
113 |     own weights. On the other hand, tied layers do have their own bias values,
114 |     but these can be fixed to zero during learning to simulate networks with no
115 |     bias (e.g., PCA on mean-centered data).
116 | 
117 |     *Parameters*
118 | 
119 |     - ``b`` --- bias
120 | 
121 |     *Outputs*
122 | 
123 |     - ``out`` --- the post-activation state of the layer
124 |     - ``pre`` --- the pre-activation state of the layer
125 | 
126 |     Parameters
127 |     ----------
128 |     partner : str or :class:`theanets.layers.base.Layer`
129 |         The "partner" layer to which this layer is tied.
130 | 
131 |     Attributes
132 |     ----------
133 |     partner : :class:`theanets.layers.base.Layer`
134 |         The "partner" layer to which this layer is tied.
135 |     '''
136 | 
137 |     def __init__(self, partner, **kwargs):
138 |         self.partner = partner
139 |         kwargs['size'] = kwargs['shape'] = None
140 |         if isinstance(partner, base.Layer):
141 |             kwargs['shape'] = partner.input_shape
142 |         super(Tied, self).__init__(**kwargs)
143 | 
144 |     def transform(self, inputs):
145 |         x = inputs[self.input_name]
146 |         pre = TT.dot(x, self.partner.find('w').T) + self.find('b')
147 |         return dict(pre=pre, out=self.activate(pre)), []
148 | 
149 |     def resolve_inputs(self, layers):
150 |         super(Tied, self).resolve_inputs(layers)
151 |         if isinstance(self.partner, util.basestring):
152 |             # if the partner is named, just get that layer.
153 |             matches = [l for l in layers if l.name == self.partner]
154 |             if len(matches) != 1:
155 |                 raise util.ConfigurationError(
156 |                     'tied layer "{}": cannot find partner "{}"'
157 |                     .format(self.name, self.partner))
158 |             self.partner = matches[0]
159 | 
160 |     def resolve_outputs(self):
161 |         self._output_shapes['out'] = self.partner.input_shape
162 | 
163 |     def setup(self):
164 |         # this layer does not create a weight matrix!
165 |         self.add_bias('b', self.output_size)
166 | 
167 |     def log(self):
168 |         inputs = ', '.join('"{0}" {1}'.format(*ns) for ns in self._input_shapes.items())
169 |         util.log('layer {0.__class__.__name__} "{0.name}" '
170 |                  '(tied to "{0.partner.name}") {0.output_shape} {1} from {2}',
171 |                  self, getattr(self.activate, 'name', self.activate), inputs)
172 |         util.log('learnable parameters: {}', self.log_params())
173 | 
174 |     def to_spec(self):
175 |         spec = super(Tied, self).to_spec()
176 |         spec['partner'] = self.partner.name
177 |         return spec
178 | 


--------------------------------------------------------------------------------
/theanets/main.py:
--------------------------------------------------------------------------------
  1 | '''This module contains some glue code encapsulating a "main" process.
  2 | 
  3 | The code here wraps the most common tasks involved in creating and, especially,
  4 | training a neural network model.
  5 | '''
  6 | 
  7 | import os
  8 | 
  9 | from . import graph
 10 | from . import util
 11 | 
 12 | 
 13 | class Experiment:
 14 |     '''This class encapsulates tasks for training and evaluating a network.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     model : :class:`Network <theanets.graph.Network>` or str
 19 |         A specification for obtaining a model. If a string is given, it is
 20 |         assumed to name a file containing a pickled model; this file will be
 21 |         loaded and used. If a network instance is provided, it will be used
 22 |         as the model. If a callable (such as a subclass) is provided, it
 23 |         will be invoked using the provided keyword arguments to create a
 24 |         network instance.
 25 |     '''
 26 | 
 27 |     def __init__(self, network, *args, **kwargs):
 28 |         if isinstance(network, util.basestring) and os.path.isfile(network):
 29 |             self.load(network)
 30 |         elif isinstance(network, graph.Network):
 31 |             self.network = network
 32 |         else:
 33 |             assert network is not graph.Network, \
 34 |                 'use a concrete theanets.Network subclass ' \
 35 |                 'like theanets.{Autoencoder,Regressor,...}'
 36 |             self.network = network(*args, **kwargs)
 37 | 
 38 |     def train(self, *args, **kwargs):
 39 |         '''Train the network until the trainer converges.
 40 | 
 41 |         All arguments are passed to :func:`train
 42 |         <theanets.graph.Network.itertrain>`.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         training : dict
 47 |             A dictionary of monitor values computed using the training dataset,
 48 |             at the conclusion of training. This dictionary will at least contain
 49 |             a 'loss' key that indicates the value of the loss function. Other
 50 |             keys may be available depending on the trainer being used.
 51 |         validation : dict
 52 |             A dictionary of monitor values computed using the validation
 53 |             dataset, at the conclusion of training.
 54 |         '''
 55 |         return self.network.train(*args, **kwargs)
 56 | 
 57 |     def itertrain(self, *args, **kwargs):
 58 |         '''Train the network iteratively.
 59 | 
 60 |         All arguments are passed to :func:`itertrain
 61 |         <theanets.graph.Network.itertrain>`.
 62 | 
 63 |         Yields
 64 |         ------
 65 |         training : dict
 66 |             A dictionary of monitor values computed using the training dataset,
 67 |             at the conclusion of training. This dictionary will at least contain
 68 |             a 'loss' key that indicates the value of the loss function. Other
 69 |             keys may be available depending on the trainer being used.
 70 |         validation : dict
 71 |             A dictionary of monitor values computed using the validation
 72 |             dataset, at the conclusion of training.
 73 |         '''
 74 |         return self.network.itertrain(*args, **kwargs)
 75 | 
 76 |     def save(self, path):
 77 |         '''Save the current network to a pickle file on disk.
 78 | 
 79 |         Parameters
 80 |         ----------
 81 |         path : str
 82 |             Location of the file to save the network.
 83 |         '''
 84 |         self.network.save(path)
 85 | 
 86 |     def load(self, path):
 87 |         '''Load a saved network from a pickle file on disk.
 88 | 
 89 |         This method sets the ``network`` attribute of the experiment to the
 90 |         loaded network model.
 91 | 
 92 |         Parameters
 93 |         ----------
 94 |         filename : str
 95 |             Load the keyword arguments and parameters of a network from a pickle
 96 |             file at the named path. If this name ends in ".gz" then the input
 97 |             will automatically be gunzipped; otherwise the input will be treated
 98 |             as a "raw" pickle.
 99 | 
100 |         Returns
101 |         -------
102 |         network : :class:`Network <graph.Network>`
103 |             A newly-constructed network, with topology and parameters loaded
104 |             from the given pickle file.
105 |         '''
106 |         self.network = graph.Network.load(path)
107 |         return self.network
108 | 


--------------------------------------------------------------------------------
/theanets/util.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | '''Utility functions and classes.'''
  4 | 
  5 | import click
  6 | import datetime
  7 | import fnmatch
  8 | import inspect
  9 | import numpy as np
 10 | import theano
 11 | import theano.tensor as TT
 12 | 
 13 | try:
 14 |     basestring = basestring
 15 | except NameError:
 16 |     basestring = str
 17 | 
 18 | FLOAT = theano.config.floatX
 19 | 
 20 | FLOAT_CONTAINERS = (TT.scalar, TT.vector, TT.matrix, TT.tensor3, TT.tensor4)
 21 | 
 22 | INT_CONTAINERS = (TT.iscalar, TT.ivector, TT.imatrix, TT.itensor3, TT.itensor4)
 23 | 
 24 | 
 25 | class Error(Exception):
 26 |     pass
 27 | 
 28 | 
 29 | class ConfigurationError(Error):
 30 |     pass
 31 | 
 32 | 
 33 | class Registrar(type):
 34 |     '''A metaclass that builds a registry of its subclasses.'''
 35 | 
 36 |     def __init__(cls, name, bases, dct):
 37 |         if not hasattr(cls, '_registry'):
 38 |             cls._registry = {}
 39 |         else:
 40 |             cls._registry[name.lower()] = cls
 41 |             for name in getattr(cls, '__extra_registration_keys__', ()):
 42 |                 cls._registry[name.lower()] = cls
 43 |         super(Registrar, cls).__init__(name, bases, dct)
 44 | 
 45 |     def build(cls, key, *args, **kwargs):
 46 |         return cls._registry[key.lower()](*args, **kwargs)
 47 | 
 48 |     def get_class(cls, key):
 49 |         return cls._registry[key.lower()]
 50 | 
 51 |     def is_registered(cls, key):
 52 |         return key.lower() in cls._registry
 53 | 
 54 | 
 55 | def random_matrix(rows, cols, mean=0, std=1, sparsity=0, radius=0, diagonal=0, rng=None):
 56 |     '''Create a matrix of randomly-initialized weights.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     rows : int
 61 |         Number of rows of the weight matrix -- equivalently, the number of
 62 |         "input" units that the weight matrix connects.
 63 |     cols : int
 64 |         Number of columns of the weight matrix -- equivalently, the number
 65 |         of "output" units that the weight matrix connects.
 66 |     mean : float, optional
 67 |         Draw initial weight values from a normal with this mean. Defaults to 0.
 68 |     std : float, optional
 69 |         Draw initial weight values from a normal with this standard deviation.
 70 |         Defaults to 1.
 71 |     sparsity : float in (0, 1), optional
 72 |         If given, ensure that the given fraction of the weight matrix is
 73 |         set to zero. Defaults to 0, meaning all weights are nonzero.
 74 |     radius : float, optional
 75 |         If given, rescale the initial weights to have this spectral radius.
 76 |         No scaling is performed by default.
 77 |     diagonal : float, optional
 78 |         If nonzero, create a matrix containing all zeros except for this value
 79 |         along the diagonal. If nonzero, other arguments (except for rows and
 80 |         cols) will be ignored.
 81 |     rng : :class:`numpy.random.RandomState` or int, optional
 82 |         A random number generator, or an integer seed for a random number
 83 |         generator. If not provided, the random number generator will be created
 84 |         with an automatically chosen seed.
 85 | 
 86 |     Returns
 87 |     -------
 88 |     matrix : numpy array
 89 |         An array containing random values. These often represent the weights
 90 |         connecting each "input" unit to each "output" unit in a layer.
 91 |     '''
 92 |     if rng is None or isinstance(rng, int):
 93 |         rng = np.random.RandomState(rng)
 94 |     arr = mean + std * rng.randn(rows, cols)
 95 |     if 1 > sparsity > 0:
 96 |         k = min(rows, cols)
 97 |         mask = rng.binomial(n=1, p=1 - sparsity, size=(rows, cols)).astype(bool)
 98 |         mask[:k, :k] |= np.eye(k).astype(bool)
 99 |         arr *= mask
100 |     if radius > 0:
101 |         # rescale weights to have the appropriate spectral radius.
102 |         u, s, vT = np.linalg.svd(arr, full_matrices=False)
103 |         arr = np.dot(np.dot(u, np.diag(radius * s / abs(s[0]))), vT)
104 |     if diagonal != 0:
105 |         # generate a diagonal weight matrix. ignore other options.
106 |         arr = diagonal * np.eye(max(rows, cols))[:rows, :cols]
107 |     return arr.astype(FLOAT)
108 | 
109 | 
110 | def random_vector(size, mean=0, std=1, rng=None):
111 |     '''Create a vector of randomly-initialized values.
112 | 
113 |     Parameters
114 |     ----------
115 |     size : int
116 |         Length of vecctor to create.
117 |     mean : float, optional
118 |         Mean value for initial vector values. Defaults to 0.
119 |     std : float, optional
120 |         Standard deviation for initial vector values. Defaults to 1.
121 |     rng : :class:`numpy.random.RandomState` or int, optional
122 |         A random number generator, or an integer seed for a random number
123 |         generator. If not provided, the random number generator will be created
124 |         with an automatically chosen seed.
125 | 
126 |     Returns
127 |     -------
128 |     vector : numpy array
129 |         An array containing random values. This often represents the bias for a
130 |         layer of computation units.
131 |     '''
132 |     if rng is None or isinstance(rng, int):
133 |         rng = np.random.RandomState(rng)
134 |     return (mean + std * rng.randn(size)).astype(FLOAT)
135 | 
136 | 
137 | def outputs_matching(outputs, patterns):
138 |     '''Get the outputs from a network that match a pattern.
139 | 
140 |     Parameters
141 |     ----------
142 |     outputs : dict or sequence of (str, theano expression)
143 |         Output expressions to filter for matches. If this is a dictionary, its
144 |         ``items()`` will be processed for matches.
145 |     patterns : sequence of str
146 |         A sequence of glob-style patterns to match against. Any parameter
147 |         matching any pattern in this sequence will be included in the match.
148 | 
149 |     Yields
150 |     ------
151 |     matches : pair of str, theano expression
152 |         Generates a sequence of (name, expression) pairs. The name is the name
153 |         of the output that matched, and the expression is the symbolic output in
154 |         the network graph.
155 |     '''
156 |     if isinstance(patterns, basestring):
157 |         patterns = (patterns, )
158 |     if isinstance(outputs, dict):
159 |         outputs = outputs.items()
160 |     for name, expr in outputs:
161 |         for pattern in patterns:
162 |             if fnmatch.fnmatch(name, pattern):
163 |                 yield name, expr
164 |                 break
165 | 
166 | 
167 | def params_matching(layers, patterns):
168 |     '''Get the parameters from a network that match a pattern.
169 | 
170 |     Parameters
171 |     ----------
172 |     layers : list of :class:`theanets.layers.Layer`
173 |         A list of network layers to retrieve parameters from.
174 |     patterns : sequence of str
175 |         A sequence of glob-style patterns to match against. Any parameter
176 |         matching any pattern in this sequence will be included in the match.
177 | 
178 |     Yields
179 |     ------
180 |     matches : pair of str, theano expression
181 |         Generates a sequence of (name, expression) pairs. The name is the name
182 |         of the parameter that matched, and the expression represents the
183 |         parameter symbolically.
184 |     '''
185 |     if isinstance(patterns, basestring):
186 |         patterns = (patterns, )
187 |     for layer in layers:
188 |         for param in layer.params:
189 |             name = param.name
190 |             for pattern in patterns:
191 |                 if fnmatch.fnmatch(name, pattern):
192 |                     yield name, param
193 |                     break
194 | 
195 | 
196 | _detailed_callsite = False
197 | 
198 | 
199 | def enable_detailed_callsite_logging():
200 |     '''Enable detailed callsite logging.'''
201 |     global _detailed_callsite
202 |     _detailed_callsite = True
203 | 
204 | 
205 | def log(msg, *args, **kwargs):
206 |     '''Log a message to the console.
207 | 
208 |     Parameters
209 |     ----------
210 |     msg : str
211 |         A string to display on the console. This can contain {}-style
212 |         formatting commands; the remaining positional and keyword arguments
213 |         will be used to fill them in.
214 |     '''
215 |     now = datetime.datetime.now()
216 |     module = 'theanets'
217 |     if _detailed_callsite:
218 |         caller = inspect.stack()[1]
219 |         parts = caller.filename.replace('.py', '').split('/')
220 |         module = '{}:{}'.format(
221 |             '.'.join(parts[parts.index('theanets')+1:]), caller.lineno)
222 |     click.echo(' '.join((
223 |         click.style(now.strftime('%Y%m%d'), fg='blue'),
224 |         click.style(now.strftime('%H%M%S'), fg='cyan'),
225 |         click.style(module, fg='green'),
226 |         msg.format(*args, **kwargs),
227 |     )))
228 | 


--------------------------------------------------------------------------------