├── .gitignore
├── CHANGES.md
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── conf.py
    ├── data_providers.rst
    ├── getting_started.rst
    ├── index.rst
    ├── initialization.rst
    ├── installation.rst
    ├── introduction.rst
    ├── layers.rst
    ├── make.bat
    ├── models.rst
    ├── monitors.rst
    ├── optimizers.rst
    ├── parameter_updaters.rst
    ├── requirements_sphinx.txt
    └── schedulers.rst
├── examples
    ├── mnist_neural_net_deep.yml
    ├── mnist_neural_net_deep_script.py
    ├── mnist_neural_net_shallow.yml
    └── neural_net_regression_example.py
├── hebel
    ├── __init__.py
    ├── config.py
    ├── cross_validation.py
    ├── data_providers.py
    ├── layers
    │   ├── __init__.py
    │   ├── column.py
    │   ├── dummy_layer.py
    │   ├── flattening_layer.py
    │   ├── hidden_layer.py
    │   ├── input_dropout.py
    │   ├── linear_regression_layer.py
    │   ├── logistic_layer.py
    │   ├── multi_column_layer.py
    │   ├── multitask_top_layer.py
    │   ├── softmax_layer.py
    │   └── top_layer.py
    ├── models
    │   ├── __init__.py
    │   ├── logistic_regression.py
    │   ├── model.py
    │   ├── multitask_neural_net.py
    │   ├── neural_net.py
    │   └── neural_net_regression.py
    ├── monitors.py
    ├── optimizers.py
    ├── parameter_updaters.py
    ├── pycuda_ops
    │   ├── __init__.py
    │   ├── cublas.py
    │   ├── cuda.py
    │   ├── cudadrv.py
    │   ├── cudart.py
    │   ├── elementwise.py
    │   ├── linalg.py
    │   ├── matrix.py
    │   ├── reductions.py
    │   ├── softmax.py
    │   └── utils.py
    ├── schedulers.py
    ├── utils
    │   ├── __init__.py
    │   ├── call_check.py
    │   ├── environ.py
    │   ├── exc.py
    │   ├── math.py
    │   ├── plotting.py
    │   ├── serial.py
    │   └── string_utils.py
    └── version.py
├── hebel_test.py
├── setup.py
└── train_model.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | bin/
10 | build/
11 | develop-eggs/
12 | dist/
13 | eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Installer logs
24 | pip-log.txt
25 | pip-delete-this-directory.txt
26 | 
27 | # Unit test / coverage reports
28 | .tox/
29 | .coverage
30 | .cache
31 | nosetests.xml
32 | coverage.xml
33 | 
34 | # Translations
35 | *.mo
36 | 
37 | # Mr Developer
38 | .mr.developer.cfg
39 | .project
40 | .pydevproject
41 | 
42 | # Django stuff:
43 | *.log
44 | *.pot
45 | 
46 | # Sphinx documentation
47 | docs/_build/
48 | 
49 | # Example output directory
50 | examples/mnist/
51 | 
52 | # Backup files
53 | *~
54 | #*#
55 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | Hebel Changelog
 2 | ===============
 3 | 
 4 | Version 0.02.1
 5 | --------------
 6 | 
 7 | 05-22-2014
 8 | 
 9 | * Added setup.py
10 | * Added to PyPi
11 | 
12 | Version 0.02
13 | ------------
14 | 
15 | 05-08-2014
16 | 
17 | * Windows compatibility (Thanks to [@Wainberg](https://github.com/Wainberg))
18 | * CUDA 4.x is no longer supported, please upgrade to CUDA 5 or CUDA 6
19 | * All initialization is now handled through `hebel.init()`. No need to
20 |   initialize PyCUDA separately anymore.
21 | * `LogisticLayer` has been renamed to `SoftmaxLayer`. `LogisticLayer`
22 |   now does binary classification while `SoftmaxLayer` is for
23 |   multiclass classification.
24 | * Framework for cross-validation.
25 | * When `ProgressMonitor` has `save_interval=None`, then only the
26 |   currently best model is serialized. If it is a positive integer,
27 |   then regular snapshots of the model are stored with that frequency.
28 | 
29 | Version 0.01
30 | ------------
31 | 
32 | 01-01-2014
33 | 
34 | * Removed dependency on scikits.cuda (this should make Hebel
35 |   compatible with Windows, but I couldn't test that yet)
36 | 
37 | * Serious speed-ups by avoiding freeing and reallocating memory for
38 |   temporary objects. Previously, many temporary gpuarrays were
39 |   reallocated in every single minibatch and then discarded, which was
40 |   very inefficient. By using persistent objects for temporary objects
41 |   across minibatches and some other improvements such as doing more
42 |   computations in-place, a roughly 2x speed-up could be realised.
43 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.txt
3 | include CHANGES.md
4 | include examples/*.yml
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Hebel
 2 | 
 3 | GPU-Accelerated Deep Learning Library in Python
 4 | 
 5 | Hebel is a library for deep learning with neural networks in Python using GPU acceleration with CUDA through PyCUDA. It implements the most important types of neural network models and offers a variety of different activation functions and training methods such as momentum, Nesterov momentum, dropout, and early stopping.
 6 | 
 7 | I no longer actively develop Hebel. If you are looking for a deep learning framework in Python, I now recommend [Chainer](https://github.com/pfnet/chainer).
 8 | 
 9 | ## Models
10 | 
11 | Right now, Hebel implements feed-forward neural networks for classification and regression on one or multiple tasks. Other models such as Autoencoder, Convolutional neural nets, and Restricted Boltzman machines are planned for the future.
12 | 
13 | Hebel implements dropout as well as L1 and L2 weight decay for regularization.
14 | 
15 | ## Optimization
16 | 
17 | Hebel implements stochastic gradient descent (SGD) with regular and Nesterov momentum.
18 | 
19 | ## Compatibility
20 | 
21 | Currently, Hebel will run on Linux and Windows, and probably Mac OS X (not tested). 
22 | 
23 | ## Dependencies
24 | - PyCUDA
25 | - numpy
26 | - PyYAML
27 | - skdata (only for MNIST example)
28 | 
29 | ## Installation
30 | 
31 | Hebel is on PyPi, so you can install it with
32 | 
33 |     pip install hebel
34 | 
35 | ## Getting started
36 | Study the yaml configuration files in `examples/` and run
37 |     
38 |     python train_model.py examples/mnist_neural_net_shallow.yml
39 |     
40 | The script will create a directory in `examples/mnist` where the models and logs are saved.
41 | 
42 | Read the Getting started guide at [hebel.readthedocs.org/en/latest/getting_started.html](http://hebel.readthedocs.org/en/latest/getting_started.html) for more information.
43 | 
44 | ## Documentation
45 | [hebel.readthedocs.org](http://hebel.readthedocs.org)
46 | 
47 | ## Contact
48 | Maintained by [Hannes Bretschneider](http://github.com/hannes-brt) (hannes@psi.utoronto.ca).
49 | If your are using Hebel, please let me know whether you find it useful and file a Github issue if you find any bugs or have feature requests.
50 | 
51 | ## Citing
52 | [![http://dx.doi.org/10.5281/zenodo.10050](https://zenodo.org/badge/doi/10.5281/zenodo.10050.png)](http://dx.doi.org/10.5281/zenodo.10050)
53 | 
54 | If you make use of Hebel in your research, please cite it. The BibTeX reference is
55 |     
56 |     @article{Bretschneider:10050,
57 |       author        = "Hannes Bretschneider",
58 |       title         = "{Hebel - GPU-Accelerated Deep Learning Library in Python}",
59 |       month         = "May",
60 |       year          = "2014",
61 |       doi           = "10.5281/zenodo.10050",
62 |       url           = "https://zenodo.org/record/10050",
63 |     }
64 | 
65 | ## What's with the name?
66 | _Hebel_ is the German word for _lever_, one of the oldest tools that humans use. As Archimedes said it: _"Give me a lever long enough and a fulcrum on which to place it, and I shall move the world."_
67 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Hebel.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Hebel.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Hebel"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Hebel"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Hebel documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Nov 25 19:20:29 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | # Copyright (C) 2013  Hannes Bretschneider
 15 | 
 16 | # This program is free software; you can redistribute it and/or modify
 17 | # it under the terms of the GNU General Public License as published by
 18 | # the Free Software Foundation; either version 2 of the License, or
 19 | # (at your option) any later version.
 20 | 
 21 | # This program is distributed in the hope that it will be useful,
 22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 24 | # GNU General Public License for more details.
 25 | 
 26 | # You should have received a copy of the GNU General Public License along
 27 | # with this program; if not, write to the Free Software Foundation, Inc.,
 28 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 29 | 
 30 | import mock
 31 | import sys, os
 32 | 
 33 | MOCK_MODULES = ['hebel.pycuda_ops', 'hebel.pycuda_ops.linalg',
 34 |                 'hebel.pycuda_ops.cuda', 'hebel.pycuda_ops.cudart',
 35 |                 'hebel.pycuda_ops.elementwise', 'hebel.pycuda_ops.matrix',
 36 |                 'hebel.pycuda_ops.reductions', 'hebel.pycuda_ops.softmax',
 37 |                 'hebel.pycuda_ops.cublas', 'hebel.pycuda_ops.cudadrv', 'skdata',
 38 |                 'skdata.mnist', 'skdata.mnist.view', 'pycuda', 'pycuda.autoinit',
 39 |                 'pycuda.compiler', 'pycuda.cumath', 'pycuda.driver', 'pycuda.tools',
 40 |                 'pycuda.elementwise', 'pycuda.gpuarray', 'numpy']
 41 | 
 42 | for mod_name in MOCK_MODULES:
 43 |     sys.modules[mod_name] = mock.Mock()
 44 | 
 45 | sys.path = ['../'] + sys.path
 46 | 
 47 | # If extensions (or modules to document with autodoc) are in another directory,
 48 | # add these directories to sys.path here. If the directory is relative to the
 49 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 50 | #sys.path.insert(0, os.path.abspath('.'))
 51 | 
 52 | # -- General configuration -----------------------------------------------------
 53 | 
 54 | # If your documentation needs a minimal Sphinx version, state it here.
 55 | #needs_sphinx = '1.0'
 56 | 
 57 | # Add any Sphinx extension module names here, as strings. They can be extensions
 58 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 59 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.mathjax']
 60 | 
 61 | # Add any paths that contain templates here, relative to this directory.
 62 | templates_path = ['_templates']
 63 | 
 64 | # The suffix of source filenames.
 65 | source_suffix = '.rst'
 66 | 
 67 | # The encoding of source files.
 68 | #source_encoding = 'utf-8-sig'
 69 | 
 70 | # The master toctree document.
 71 | master_doc = 'index'
 72 | 
 73 | # General information about the project.
 74 | project = u'Hebel'
 75 | copyright = u'2013, Hannes Bretschneider'
 76 | 
 77 | # The version info for the project you're documenting, acts as replacement for
 78 | # |version| and |release|, also used in various other places throughout the
 79 | # built documents.
 80 | #
 81 | # The short X.Y version.
 82 | from .hebel.version import version
 83 | # The full version, including alpha/beta/rc tags.
 84 | from .hebel.version import release
 85 | 
 86 | # The language for content autogenerated by Sphinx. Refer to documentation
 87 | # for a list of supported languages.
 88 | #language = None
 89 | 
 90 | # There are two options for replacing |today|: either, you set today to some
 91 | # non-false value, then it is used:
 92 | #today = ''
 93 | # Else, today_fmt is used as the format for a strftime call.
 94 | #today_fmt = '%B %d, %Y'
 95 | 
 96 | # List of patterns, relative to source directory, that match files and
 97 | # directories to ignore when looking for source files.
 98 | exclude_patterns = ['_build']
 99 | 
100 | # The reST default role (used for this markup: `text`) to use for all documents.
101 | #default_role = None
102 | 
103 | # If true, '()' will be appended to :func: etc. cross-reference text.
104 | #add_function_parentheses = True
105 | 
106 | # If true, the current module name will be prepended to all description
107 | # unit titles (such as .. function::).
108 | #add_module_names = True
109 | 
110 | # If true, sectionauthor and moduleauthor directives will be shown in the
111 | # output. They are ignored by default.
112 | #show_authors = False
113 | 
114 | # The name of the Pygments (syntax highlighting) style to use.
115 | pygments_style = 'sphinx'
116 | 
117 | # A list of ignored prefixes for module index sorting.
118 | #modindex_common_prefix = []
119 | 
120 | 
121 | # -- Options for HTML output ---------------------------------------------------
122 | 
123 | # The theme to use for HTML and HTML Help pages.  See the documentation for
124 | # a list of builtin themes.
125 | html_theme = 'default'
126 | 
127 | # Theme options are theme-specific and customize the look and feel of a theme
128 | # further.  For a list of options available for each theme, see the
129 | # documentation.
130 | #html_theme_options = {}
131 | 
132 | # Add any paths that contain custom themes here, relative to this directory.
133 | #html_theme_path = []
134 | 
135 | # The name for this set of Sphinx documents.  If None, it defaults to
136 | # "<project> v<release> documentation".
137 | #html_title = None
138 | 
139 | # A shorter title for the navigation bar.  Default is the same as html_title.
140 | #html_short_title = None
141 | 
142 | # The name of an image file (relative to this directory) to place at the top
143 | # of the sidebar.
144 | #html_logo = None
145 | 
146 | # The name of an image file (within the static path) to use as favicon of the
147 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
148 | # pixels large.
149 | #html_favicon = None
150 | 
151 | # Add any paths that contain custom static files (such as style sheets) here,
152 | # relative to this directory. They are copied after the builtin static files,
153 | # so a file named "default.css" will overwrite the builtin "default.css".
154 | html_static_path = ['_static']
155 | 
156 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
157 | # using the given strftime format.
158 | #html_last_updated_fmt = '%b %d, %Y'
159 | 
160 | # If true, SmartyPants will be used to convert quotes and dashes to
161 | # typographically correct entities.
162 | #html_use_smartypants = True
163 | 
164 | # Custom sidebar templates, maps document names to template names.
165 | #html_sidebars = {}
166 | 
167 | # Additional templates that should be rendered to pages, maps page names to
168 | # template names.
169 | #html_additional_pages = {}
170 | 
171 | # If false, no module index is generated.
172 | #html_domain_indices = True
173 | 
174 | # If false, no index is generated.
175 | #html_use_index = True
176 | 
177 | # If true, the index is split into individual pages for each letter.
178 | #html_split_index = False
179 | 
180 | # If true, links to the reST sources are added to the pages.
181 | #html_show_sourcelink = True
182 | 
183 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
184 | #html_show_sphinx = True
185 | 
186 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
187 | #html_show_copyright = True
188 | 
189 | # If true, an OpenSearch description file will be output, and all pages will
190 | # contain a <link> tag referring to it.  The value of this option must be the
191 | # base URL from which the finished HTML is served.
192 | #html_use_opensearch = ''
193 | 
194 | # This is the file name suffix for HTML files (e.g. ".xhtml").
195 | #html_file_suffix = None
196 | 
197 | # Output file base name for HTML help builder.
198 | htmlhelp_basename = 'Hebeldoc'
199 | 
200 | 
201 | # -- Options for LaTeX output --------------------------------------------------
202 | 
203 | latex_elements = {
204 | # The paper size ('letterpaper' or 'a4paper').
205 | #'papersize': 'letterpaper',
206 | 
207 | # The font size ('10pt', '11pt' or '12pt').
208 | #'pointsize': '10pt',
209 | 
210 | # Additional stuff for the LaTeX preamble.
211 | #'preamble': '',
212 | }
213 | 
214 | # Grouping the document tree into LaTeX files. List of tuples
215 | # (source start file, target name, title, author, documentclass [howto/manual]).
216 | latex_documents = [
217 |   ('index', 'Hebel.tex', u'Hebel Documentation',
218 |    u'Hannes Bretschneider', 'manual'),
219 | ]
220 | 
221 | # The name of an image file (relative to this directory) to place at the top of
222 | # the title page.
223 | #latex_logo = None
224 | 
225 | # For "manual" documents, if this is true, then toplevel headings are parts,
226 | # not chapters.
227 | #latex_use_parts = False
228 | 
229 | # If true, show page references after internal links.
230 | #latex_show_pagerefs = False
231 | 
232 | # If true, show URL addresses after external links.
233 | #latex_show_urls = False
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #latex_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #latex_domain_indices = True
240 | 
241 | 
242 | # -- Options for manual page output --------------------------------------------
243 | 
244 | # One entry per manual page. List of tuples
245 | # (source start file, name, description, authors, manual section).
246 | man_pages = [
247 |     ('index', 'hebel', u'Hebel Documentation',
248 |      [u'Hannes Bretschneider'], 1)
249 | ]
250 | 
251 | # If true, show URL addresses after external links.
252 | #man_show_urls = False
253 | 
254 | 
255 | # -- Options for Texinfo output ------------------------------------------------
256 | 
257 | # Grouping the document tree into Texinfo files. List of tuples
258 | # (source start file, target name, title, author,
259 | #  dir menu entry, description, category)
260 | texinfo_documents = [
261 |   ('index', 'Hebel', u'Hebel Documentation',
262 |    u'Hannes Bretschneider', 'Hebel', 'One line description of project.',
263 |    'Miscellaneous'),
264 | ]
265 | 
266 | # Documents to append as an appendix to all manuals.
267 | #texinfo_appendices = []
268 | 
269 | # If false, no module index is generated.
270 | #texinfo_domain_indices = True
271 | 
272 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
273 | #texinfo_show_urls = 'footnote'
274 | 


--------------------------------------------------------------------------------
/docs/data_providers.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | Data Providers
18 | ==============
19 | 
20 | .. automodule:: hebel.data_providers
21 | 
22 | Abstract Base Class
23 | -------------------
24 | 
25 | .. autoclass:: hebel.data_providers.DataProvider
26 |    :members:
27 | 
28 | Minibatch Data Provider
29 | -----------------------
30 | 
31 | .. autoclass:: hebel.data_providers.MiniBatchDataProvider
32 |    :members:
33 | 
34 | Multi-Task Data Provider
35 | ------------------------
36 | 
37 | .. autoclass:: hebel.data_providers.MultiTaskDataProvider
38 |    :members:
39 | 
40 | Batch Data Provider
41 | -------------------
42 | 
43 | .. autoclass:: hebel.data_providers.BatchDataProvider
44 |    :members:
45 | 
46 | Dummy Data Provider
47 | -------------------
48 | 
49 | .. autoclass:: hebel.data_providers.DummyDataProvider
50 |    :members:
51 | 
52 | MNIST Data Provider
53 | -------------------
54 | 
55 | .. autoclass:: hebel.data_providers.MNISTDataProvider
56 |    :members:
57 | 
58 | 


--------------------------------------------------------------------------------
/docs/getting_started.rst:
--------------------------------------------------------------------------------
  1 | .. Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 |    This program is free software; you can redistribute it and/or modify
  4 |    it under the terms of the GNU General Public License as published by
  5 |    the Free Software Foundation; either version 2 of the License, or
  6 |    (at your option) any later version.
  7 |    
  8 |    This program is distributed in the hope that it will be useful,
  9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |    GNU General Public License for more details.
 12 |    
 13 |    You should have received a copy of the GNU General Public License along
 14 |    with this program; if not, write to the Free Software Foundation, Inc.,
 15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | Getting Started
 18 | ***************
 19 | 
 20 | There are two basic methods how you can run Hebel:
 21 | 
 22 | #. You can write a YAML configuration file that describes your model
 23 |    architecture, data set, and hyperparameters and run it using the
 24 |    :file:`train_model.py` script.
 25 | #. In your own Python script or program, you can create instances of
 26 |    models and optimizers programmatically.
 27 | 
 28 | The first makes estimating a model the easiest, as you don't have to
 29 | write any actual code. You simply specify all your parameters and data
 30 | set in an easy to read YAML configuration file and pass it to the
 31 | :file:`train_model.py` script. The script will create a directory for your
 32 | results where it will save intermediary models (in pickle-format), the
 33 | logs and final results.
 34 | 
 35 | The second method gives you more control over how exactly the model is
 36 | estimated and lets you interact with Hebel from other Python programs.
 37 | 
 38 | 
 39 | Running models from YAML configuration files
 40 | ============================================
 41 | 
 42 | If you check the example YAML files in ``examples/`` you will see that the configuration file defines three top-level sections:
 43 | 
 44 | #. ``run_conf``: These options are passed to the method
 45 |    :meth:`hebel.optimizers.SGD.run()`.
 46 | #. ``optimizer``: Here you instantiate a :class:`hebel.optimizers.SGD`
 47 |    object, including the model you want to train and the data to use
 48 |    for training and validation.
 49 | #. ``test_dataset``: This section is optional, but here you can define
 50 |    test data to evaluate the model on after training.
 51 | 
 52 | Check out :file:`examples/mnist_neural_net_shallow.yml`, which
 53 | includes everything to train a one layer neural network on the `MNIST
 54 | dataset <http://yann.lecun.com/exdb/mnist/>`_:
 55 | 
 56 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml
 57 | 
 58 | You can see that the only option we pass to ``run_conf`` is the number
 59 | of iterations to train the model. 
 60 | 
 61 | The ``optimizer`` section is more interesting. Hebel uses the special
 62 | ``!obj``, ``!import``, and ``!pkl`` directives from `PyLearn 2
 63 | <http://deeplearning.net/software/pylearn2/yaml_tutorial/index.html#yaml-tutorial>`_. The
 64 | ``!obj`` directive is used most extensively and can be used to
 65 | instantiate any Python class. First the optimizer
 66 | :class:`hebel.optimizers.SGD` is instantiated and in the lines below
 67 | we are instantiating the model:
 68 | 
 69 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml
 70 |    :lines: 3-17
 71 | 
 72 | We are designing a model with one hidden layer that has 784 input
 73 | units (the dimensionality of the MNIST data) and 2000 hidden units. We
 74 | are also using `dropout <http://arxiv.org/abs/1207.0580>`_ for
 75 | regularization. The logistic output layer uses 10 classes (the number
 76 | of classes in the MNIST data). You can also add different amounts of
 77 | L1 or L2 penalization to each layer, which we are not doing here.
 78 | 
 79 | .. _parameter-updaters:
 80 | 
 81 | Next, we define a ``parameter_updater``, which is a rule that defines
 82 | how the weights are updated given the gradients:
 83 | 
 84 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml
 85 |    :lines: 18
 86 | 
 87 | There are currently three choices:
 88 | 
 89 | * :class:`hebel.parameter_updaters.SimpleSGDUpdate`, which performs
 90 |    regular gradient descent
 91 | * :class:`hebel.parameter_updaters.MomentumUpdate`, which performs
 92 |    gradient descent with momentum, and
 93 | * :class:`hebel.parameter_updaters.NesterovMomentumUpdate`, which performs
 94 |    gradient descent with Nesterov momentum.
 95 | 
 96 | The next two sections define the data for the model. All data must be
 97 | given as instances of ``DataProvider`` objects:
 98 | 
 99 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml
100 |    :lines: 19-25
101 | 
102 | A ``DataProvider`` is a class that defines an iterator which returns
103 | successive minibatches of the data as well as saves some metadata,
104 | such as the number of data points. There is a special
105 | :class:`hebel.data_providers.MNISTDataProvider` especially for the
106 | MNIST data. We use the standard splits for training and validation
107 | data here. There are several ``DataProviders`` defined in
108 | :mod:`hebel.data_providers`.
109 | 
110 | The next few lines define how some of the hyperparameters are changed
111 | over the course of the training:
112 | 
113 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml
114 |    :lines: 26-31
115 | 
116 | The module :mod:`hebel.schedulers` defines several schedulers, which
117 | are basically just simple rules how certain parameters should
118 | evolve. Here, we define that the learning rate should decay
119 | exponentially with a factor of 0.995 in every epoch and the momentum
120 | should increase from 0.5 to 0.9 during the first 10 epochs and then
121 | stay at this value.
122 | 
123 | The last entry argument to :class:`hebel.optimizers.SGD` is
124 | ``progress_monitor``:
125 | 
126 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml
127 |    :lines: 32-38
128 | 
129 | A progress monitor is an object that takes care of reporting periodic
130 | progress of our model, saving snapshots of the model at regular
131 | intervals, etc. When you are using the YAML configuration system,
132 | you'll probably want to use :class:`hebel.monitors.ProgressMonitor`,
133 | which will save logs, outputs, and snapshots to disk. In contrast,
134 | :class:`hebel.monitors.SimpleProgressMonitor` will only print progress
135 | to the terminal without saving the model itself.
136 | 
137 | Finally, you can define a test data set to be evaluated after the training completes:
138 | 
139 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml
140 |    :lines: 40-43
141 | 
142 | Here, we are specifying the MNIST test split.
143 | 
144 | Once you have your configuration file defined, you can run it such as in::
145 | 
146 |   python train_model.py examples/mnist_neural_net_shallow.yml
147 | 
148 | The script will create the output directory you specified in
149 | ``save_model_path`` if it doesn't exist yet and start writing the log
150 | into a file called ``output_log``. If you are interested in keeping an
151 | eye on the training process you can check on that file with::
152 | 
153 |   tail -f output_log
154 | 
155 | Using Hebel in Your Own Code
156 | ============================
157 | 
158 | If you want more control over the training procedure or integrate
159 | Hebel with your own code, then you can use Hebel programmatically.
160 | 
161 | Unlike the simpler one hidden layer model from the previous part, here
162 | we are going to build a more powerful deep neural net with multiple
163 | hidden layers.
164 | 
165 | For an example, have a look at :file:`examples/mnist_neural_net_deep_script.py`:
166 | 
167 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py
168 | 
169 | There are three basic tasks you have to do to train a model in Hebel:
170 | 
171 | #. Define the data you want to use for training, validation, or
172 |    testing using ``DataProvider`` objects,
173 | #. instantiate a ``Model`` object, and
174 | #. instantiate an ``SGD`` object that will train the model using
175 |    stochastic gradient descent.
176 | 
177 | Defining a Data Set
178 | -------------------
179 | 
180 | In this example we're using the MNIST data set again through the
181 | :class:`hebel.data_providers.MNISTDataProvider` class:
182 | 
183 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py
184 |    :lines: 9-12
185 | 
186 | We create three data sets, corresponding to the official training,
187 | validation, and test data splits of MNIST. For the training data set,
188 | we set a batch size of 100 training examples, while the validation and
189 | test data sets are used as complete batches.
190 | 
191 | Instantiating a model
192 | ---------------------
193 | 
194 | To train a model, you simply need to create an object representing a
195 | model that inherits from the abstract base class
196 | :class:`hebel.models.Model`. 
197 | 
198 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py
199 |    :lines: 17-21
200 | 
201 | Currently, Hebel implements the following models:
202 | 
203 | * :class:`hebel.models.NeuralNet`: A neural net with any number of
204 |   hidden layers for classification, using the cross-entropy loss
205 |   function and softmax units in the output layer.
206 | 
207 | * :class:`hebel.models.LogisticRegression`: Multi-class logistic
208 |   regression. Like :class:`hebel.models.NeuralNet` but does not have
209 |   any hidden layers.
210 | 
211 | * :class:`hebel.models.MultitaskNeuralNet`: A neural net trained on
212 |   multiple tasks simultaneously. A multi-task neural net can have any
213 |   number of hidden layers with weights that are shared between the
214 |   tasks and any number of output layers with separate weights for each
215 |   task.
216 | 
217 | * :class:`hebel.models.NeuralNetRegression`: A neural net with a
218 |   linear regression output layer to model continuous variables.
219 | 
220 | The :class:`hebel.models.NeuralNet` model we are using here takes as
221 | input the dimensionality of the data, the number of classes, the sizes
222 | of the hidden layers, the activation function to use, and whether to
223 | use dropout for regularization. There are also a few more options such
224 | as for L1 or L2 weight regularization, that we don't use here.
225 | 
226 | Here, we are using the simpler form of the constructor rather than the
227 | extended form that we used in the YAML example. Also we are adding a
228 | small amount of dropout (20%) to the input layer.
229 | 
230 | Training the model
231 | ------------------
232 | 
233 | To train the model, you first need to create an instance of
234 | :class:`hebel.optimizers.SGD`:
235 | 
236 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py
237 |    :lines: 23-35
238 | 
239 | First we are creating a :class:`hebel.monitors.ProgressMonitor`
240 | object, that will save regular snapshots of the model during training
241 | and save the logs and results to disk.
242 | 
243 | Next, we are creating the :class:`hebel.optimizers.SGD` object. We
244 | instantiate the optimizer with the model, the parameter update rule,
245 | training data, validation data, and the schedulers for the learning
246 | rate and the momentum parameters.
247 | 
248 | Finally, we can start the training by invoking the
249 | :meth:`hebel.optimizers.SGD.run` method. Here we train the model for
250 | 100 epochs. However, by default :class:`hebel.optimizers.SGD` uses
251 | early stopping which means that it remembers the parameters that give
252 | the best result on the validation set and will reset the model
253 | parameters to them after the end of training.
254 | 
255 | Evaluating on test data
256 | -----------------------
257 | 
258 | After training is complete we can do anything we want with the trained
259 | model, such as using it in some prediction pipeline, pickle it to
260 | disk, etc. Here we are evaluating the performance of the model on the
261 | MNIST test data split:
262 | 
263 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py
264 |    :lines: 37-40
265 | 
266 | 
267 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 |    
17 | 
18 | Welcome to Hebel's documentation!
19 | =================================
20 | 
21 | Contents:
22 | 
23 | .. toctree::
24 |    :maxdepth: 2
25 | 
26 |    introduction
27 |    installation
28 |    getting_started
29 |    initialization
30 |    data_providers
31 |    layers
32 |    monitors
33 |    models
34 |    optimizers
35 |    parameter_updaters
36 |    schedulers
37 | 
38 | 
39 | 
40 | Indices and tables
41 | ==================
42 | 
43 | * :ref:`genindex`
44 | * :ref:`modindex`
45 | * :ref:`search`
46 | 
47 | 


--------------------------------------------------------------------------------
/docs/initialization.rst:
--------------------------------------------------------------------------------
1 | Initialization
2 | **************
3 | 
4 | .. automodule:: hebel
5 | 
6 | .. autofunction:: hebel.init
7 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 


--------------------------------------------------------------------------------
/docs/introduction.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 


--------------------------------------------------------------------------------
/docs/layers.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | Layers
18 | ******
19 | 
20 | Hidden Layer
21 | ============
22 | 
23 | .. autoclass:: hebel.layers.HiddenLayer
24 |    :members:
25 | 
26 | .. autoclass:: hebel.layers.InputDropout
27 |    :members:
28 | 
29 | .. autoclass:: hebel.layers.DummyLayer
30 |    :members:
31 | 
32 | Top Layers
33 | ==========
34 | 
35 | Abstract Base Class Top Layer
36 | -----------------------------
37 | 
38 | .. autoclass:: hebel.layers.TopLayer
39 |    :members:
40 | 
41 | Logistic Layer
42 | --------------
43 | 
44 | .. autoclass:: hebel.layers.LogisticLayer
45 |    :members:
46 | 
47 | Softmax Layer
48 | -------------
49 | 
50 | .. autoclass:: hebel.layers.SoftmaxLayer
51 |    :members:
52 | 
53 | Linear Regression Layer
54 | -----------------------
55 | 
56 | .. autoclass:: hebel.layers.LinearRegressionLayer
57 |    :members:
58 | 
59 | Multitask Top Layer
60 | -------------------
61 | 
62 | .. autoclass:: hebel.layers.MultitaskTopLayer
63 |    :members:
64 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Hebel.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Hebel.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/docs/models.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | Models
18 | ******
19 | 
20 | Abstract Base Class Model
21 | =========================
22 | 
23 | .. autoclass:: hebel.models.Model
24 |    :members:
25 | 
26 | Neural Network
27 | ==============
28 | 
29 | .. autoclass:: hebel.models.NeuralNet
30 |    :members:
31 | 
32 | Neural Network Regression
33 | =========================
34 | 
35 | .. autoclass:: hebel.models.NeuralNetRegression
36 |    :members:
37 | 
38 | Logistic Regression
39 | ===================
40 | 
41 | .. autoclass:: hebel.models.LogisticRegression
42 |    :members:
43 | 
44 | Multi-Task Neural Net
45 | =====================
46 | 
47 | .. autoclass:: hebel.models.MultitaskNeuralNet
48 |    :members:
49 | 
50 | 


--------------------------------------------------------------------------------
/docs/monitors.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | Monitors
18 | ********
19 | 
20 | Progress Monitor
21 | ================
22 | 
23 | .. autoclass:: hebel.monitors.ProgressMonitor
24 |    :members:
25 |    :undoc-members:
26 | 
27 | Simple Progress Monitor
28 | =======================
29 | 
30 | .. autoclass:: hebel.monitors.SimpleProgressMonitor
31 |   :members:
32 |   :undoc-members:
33 | 


--------------------------------------------------------------------------------
/docs/optimizers.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | Optimizers
18 | **********
19 | 
20 | Stochastic Gradient Descent
21 | ===========================
22 | 
23 | .. autoclass:: hebel.optimizers.SGD
24 |    :members:
25 |    :undoc-members:
26 | 


--------------------------------------------------------------------------------
/docs/parameter_updaters.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | Parameter Updaters
18 | ******************
19 | 
20 | Abstract Base Class
21 | ===================
22 | 
23 | .. autoclass:: hebel.parameter_updaters.ParameterUpdater
24 |    :members:
25 |    :undoc-members:
26 | 
27 | Simple SGD Update
28 | =================
29 | 
30 | .. autoclass:: hebel.parameter_updaters.SimpleSGDUpdate
31 |    :members:
32 |    :undoc-members:
33 | 
34 | Momentum Update
35 | ===============
36 | 
37 | .. autoclass:: hebel.parameter_updaters.MomentumUpdate
38 |    :members:
39 |    :undoc-members:
40 | 
41 | Nesterov Momentum Update
42 | ========================
43 | 
44 | .. autoclass:: hebel.parameter_updaters.NesterovMomentumUpdate
45 |    :members:
46 |    :undoc-members:
47 | 


--------------------------------------------------------------------------------
/docs/requirements_sphinx.txt:
--------------------------------------------------------------------------------
1 | mock
2 | 


--------------------------------------------------------------------------------
/docs/schedulers.rst:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 |    This program is free software; you can redistribute it and/or modify
 4 |    it under the terms of the GNU General Public License as published by
 5 |    the Free Software Foundation; either version 2 of the License, or
 6 |    (at your option) any later version.
 7 |    
 8 |    This program is distributed in the hope that it will be useful,
 9 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
10 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 |    GNU General Public License for more details.
12 |    
13 |    You should have received a copy of the GNU General Public License along
14 |    with this program; if not, write to the Free Software Foundation, Inc.,
15 |    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | Schedulers
18 | **********
19 | 
20 | Constant Scheduler
21 | ==================
22 | 
23 | .. autofunction:: hebel.schedulers.constant_scheduler
24 | 
25 | Exponential Scheduler
26 | =====================
27 | 
28 | .. autofunction:: hebel.schedulers.exponential_scheduler
29 | 
30 | Linear Scheduler Up
31 | ===================
32 | 
33 | .. autofunction:: hebel.schedulers.linear_scheduler_up
34 | 
35 | Linear Scheduler Up-Down
36 | ========================
37 | 
38 | .. autofunction:: hebel.schedulers.linear_scheduler_up_down
39 | 


--------------------------------------------------------------------------------
/examples/mnist_neural_net_deep.yml:
--------------------------------------------------------------------------------
 1 | run_conf:
 2 |   iterations: 3000
 3 | optimizer: !obj:hebel.optimizers.SGD {
 4 |   model: !obj:hebel.models.NeuralNet {
 5 |     layers: [
 6 |       !obj:hebel.layers.InputDropout {
 7 |         n_in: 784,
 8 |         dropout_probability: .2,
 9 |       },
10 |       !obj:hebel.layers.HiddenLayer {
11 |         n_in: 784,
12 |         n_units: 2000,
13 |         activation_function: relu,
14 |         dropout: yes,
15 |         l2_penalty_weight: .0
16 |       },
17 |       !obj:hebel.layers.HiddenLayer {
18 |         n_in: 2000,
19 |         n_units: 2000,
20 |         activation_function: relu,
21 |         dropout: yes,
22 |         l2_penalty_weight: .0
23 |       },
24 |       !obj:hebel.layers.HiddenLayer {
25 |         n_in: 2000,
26 |         n_units: 2000,
27 |         activation_function: relu,
28 |         dropout: yes,
29 |         l2_penalty_weight: .0
30 |       },
31 |       !obj:hebel.layers.HiddenLayer {
32 |         n_in: 2000,
33 |         n_units: 500,
34 |         activation_function: relu,
35 |         dropout: yes,
36 |         l2_penalty_weight: .0
37 |       },
38 |       !obj:hebel.layers.HiddenLayer {
39 |         n_in: 500,
40 |         n_units: 500,
41 |         activation_function: relu,
42 |         dropout: yes,
43 |         l2_penalty_weight: .0
44 |       }    
45 |     ],
46 |     top_layer: !obj:hebel.layers.SoftmaxLayer {
47 |       n_in: 500,
48 |       n_out: 10     
49 |     }
50 |   },
51 |   parameter_updater: !import hebel.parameter_updaters.NesterovMomentumUpdate,
52 |   train_data: !obj:hebel.data_providers.MNISTDataProvider {
53 |     batch_size: 100,
54 |     array: train
55 |   },
56 |   validation_data: !obj:hebel.data_providers.MNISTDataProvider {
57 |     array: val
58 |   },
59 |   learning_rate_schedule: !obj:hebel.schedulers.exponential_scheduler {
60 |     init_value: 2., decay: .995
61 |   },
62 |   momentum_schedule: !obj:hebel.schedulers.linear_scheduler_up {
63 |     init_value: .1, target_value: .99, duration: 200
64 |   },
65 |   progress_monitor:
66 |     !obj:hebel.monitors.ProgressMonitor {
67 |       experiment_name: mnist_deep,
68 |       save_model_path: examples/mnist,
69 |       output_to_log: yes
70 |     }
71 | }
72 | test_dataset:
73 |   test_data: !obj:hebel.data_providers.MNISTDataProvider {
74 |     array: test
75 | }
76 | 


--------------------------------------------------------------------------------
/examples/mnist_neural_net_deep_script.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import hebel
 4 | from hebel.models import NeuralNet
 5 | from hebel.optimizers import SGD
 6 | from hebel.parameter_updaters import MomentumUpdate
 7 | from hebel.data_providers import MNISTDataProvider
 8 | from hebel.monitors import ProgressMonitor
 9 | from hebel.schedulers import exponential_scheduler, linear_scheduler_up
10 | 
11 | hebel.init(random_seed=0)
12 | 
13 | # Initialize data providers
14 | train_data = MNISTDataProvider('train', batch_size=100)
15 | validation_data = MNISTDataProvider('val')
16 | test_data = MNISTDataProvider('test')
17 | 
18 | D = train_data.D                        # Dimensionality of inputs 
19 | K = 10                                  # Number of classes
20 | 
21 | # Create model object
22 | model = NeuralNet(n_in=train_data.D, n_out=K,
23 |                   layers=[2000, 2000, 2000, 500],
24 |                   activation_function='relu',
25 |                   dropout=True, input_dropout=0.2)
26 | 
27 | # Create optimizer object
28 | progress_monitor = ProgressMonitor(
29 |     experiment_name='mnist',
30 |     save_model_path='examples/mnist',
31 |     save_interval=5,
32 |     output_to_log=True)
33 | 
34 | optimizer = SGD(model, MomentumUpdate, train_data, validation_data, progress_monitor,
35 |                 learning_rate_schedule=exponential_scheduler(5., .995),
36 |                 momentum_schedule=linear_scheduler_up(.1, .9, 100))
37 | 
38 | # Run model
39 | optimizer.run(50)
40 | 
41 | # Evaulate error on test set
42 | test_error = model.test_error(test_data)
43 | print "Error on test set: %.3f" % test_error
44 | 


--------------------------------------------------------------------------------
/examples/mnist_neural_net_shallow.yml:
--------------------------------------------------------------------------------
 1 | run_conf:
 2 |   iterations: 50
 3 | optimizer: !obj:hebel.optimizers.SGD {
 4 |   model: !obj:hebel.models.NeuralNet {
 5 |     layers: [
 6 |       !obj:hebel.layers.HiddenLayer {
 7 |         n_in: 784,
 8 |         n_units: 2000,
 9 |         dropout: yes,
10 |         l2_penalty_weight: .0
11 |       }    
12 |     ],
13 |     top_layer: !obj:hebel.layers.SoftmaxLayer {
14 |       n_in: 2000,
15 |       n_out: 10     
16 |     }
17 |   },
18 |   parameter_updater: !import hebel.parameter_updaters.MomentumUpdate,
19 |   train_data: !obj:hebel.data_providers.MNISTDataProvider {
20 |     batch_size: 100,
21 |     array: train
22 |   },
23 |   validation_data: !obj:hebel.data_providers.MNISTDataProvider {
24 |     array: val
25 |   },
26 |   learning_rate_schedule: !obj:hebel.schedulers.exponential_scheduler {
27 |     init_value: 30., decay: .995
28 |   },
29 |   momentum_schedule: !obj:hebel.schedulers.linear_scheduler_up {
30 |     init_value: .5, target_value: .9, duration: 10
31 |   },
32 |   progress_monitor:
33 |     !obj:hebel.monitors.ProgressMonitor {
34 |       experiment_name: mnist_shallow,
35 |       save_model_path: examples/mnist,
36 |       output_to_log: yes
37 |     }
38 | }
39 | test_dataset:
40 |   test_data: !obj:hebel.data_providers.MNISTDataProvider {
41 |     array: test
42 |   }
43 | 


--------------------------------------------------------------------------------
/examples/neural_net_regression_example.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | """Example of neural net with a linear regression output layer, using
18 | the Boston dataset.
19 | 
20 | """
21 | 
22 | def main():
23 |     import numpy as np
24 |     import pycuda.autoinit
25 |     from pycuda import gpuarray
26 |     from skdata import toy
27 |     from hebel import memory_pool
28 |     from hebel.data_providers import BatchDataProvider
29 |     from hebel.models import NeuralNetRegression
30 |     from hebel.optimizers import SGD
31 |     from hebel.parameter_updaters import SimpleSGDUpdate
32 |     from hebel.monitors import SimpleProgressMonitor
33 |     from hebel.schedulers import exponential_scheduler
34 | 
35 |     # Get data
36 |     data_cpu, targets_cpu = toy.Boston().regression_task()
37 |     data = gpuarray.to_gpu(data_cpu.astype(np.float32), allocator=memory_pool.allocate)
38 |     targets = gpuarray.to_gpu(targets_cpu.astype(np.float32), allocator=memory_pool.allocate)
39 |     data_provider = BatchDataProvider(data, targets)
40 | 
41 |     # Create model object
42 |     model = NeuralNetRegression(n_in=data_cpu.shape[1], n_out=targets_cpu.shape[1],
43 |                                 layers=[100], activation_function='relu')
44 |     
45 |     # Create optimizer object
46 |     optimizer = SGD(model, SimpleSGDUpdate, data_provider, data_provider,
47 |                     learning_rate_schedule=exponential_scheduler(.1, .9999),
48 |                     early_stopping=True)
49 |     optimizer.run(3000)
50 |     
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/hebel/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | """Before Hebel can be used, it must be initialized using the function
 18 | :func:`hebel.init`.
 19 | 
 20 | """
 21 | 
 22 | import numpy as np
 23 | import pycuda.driver as cuda
 24 | cuda.init()
 25 | from pycuda.tools import make_default_context, DeviceMemoryPool
 26 | 
 27 | import os as _os
 28 | neural_nets_root = _os.path.split(
 29 |     _os.path.abspath(_os.path.dirname(__file__)))[0]
 30 | 
 31 | is_initialized = False
 32 | 
 33 | class _Sampler(object):
 34 |     _sampler = None
 35 | 
 36 |     def __getattribute__(self, name):
 37 |         if name in ('seed', 'set_seed'):
 38 |             return object.__getattribute__(self, name)
 39 |     
 40 |         sampler = object.__getattribute__(self, '_sampler')
 41 |         if sampler is None:
 42 |             from pycuda import curandom, gpuarray
 43 |             seed_func = curandom.seed_getter_uniform if self.seed is None \
 44 |               else lambda N: gpuarray.to_gpu(
 45 |                       np.array(N * [self.seed], dtype=np.int32),
 46 |                       allocator=memory_pool.allocate)
 47 |             sampler = curandom.XORWOWRandomNumberGenerator(seed_func)
 48 |             self._sampler = sampler
 49 |         return sampler.__getattribute__(name)
 50 | 
 51 |     def set_seed(self, seed):
 52 |         self.seed = seed
 53 |         self._sampler = None
 54 | sampler = _Sampler()
 55 | 
 56 | class _Context(object):
 57 |     _context = None
 58 | 
 59 |     def init_context(self, device_id=None):
 60 |         if device_id is None:
 61 |             context = make_default_context()
 62 |             self._context = context
 63 |         else:
 64 |             context = cuda.Device(device_id).make_context()
 65 |             self._context = context
 66 | 
 67 |     def __getattribute__(self, name):
 68 |         if name in 'init_context':
 69 |             return object.__getattribute__(self, name)
 70 | 
 71 |         if object.__getattribute__(self, '_context') is None:
 72 |             raise RuntimeError("Context hasn't been initialized yet")
 73 |         
 74 |         return object.__getattribute__(self, '_context').__getattribute__(name)
 75 | 
 76 | context = _Context()
 77 | 
 78 | 
 79 | class _MemoryPool(object):
 80 |     _memory_pool = None
 81 | 
 82 |     def init(self):
 83 |         self._memory_pool = DeviceMemoryPool()
 84 | 
 85 |     def __getattribute__(self, name):
 86 |         if name == 'init':
 87 |             return object.__getattribute__(self, name)
 88 |         
 89 |         if object.__getattribute__(self, '_memory_pool') is None:
 90 |             raise RuntimeError("Memory Pool hasn't been initialized yet")
 91 | 
 92 |         return object.__getattribute__(self, '_memory_pool').__getattribute__(name)
 93 | memory_pool = _MemoryPool()
 94 |         
 95 | 
 96 | def init(device_id=None, random_seed=None):
 97 |     """Initialize Hebel.
 98 | 
 99 |     This function creates a CUDA context, CUBLAS context and
100 |     initializes and seeds the pseudo-random number generator.
101 | 
102 |     **Parameters:**
103 |     
104 |     device_id : integer, optional
105 |         The ID of the GPU device to use. If this is omitted, PyCUDA's
106 |         default context is used, which by default uses the fastest
107 |         available device on the system. Alternatively, you can put the
108 |         device id in the environment variable ``CUDA_DEVICE`` or into
109 |         the file ``.cuda-device`` in the user's home directory.
110 | 
111 |     random_seed : integer, optional
112 |         The seed to use for the pseudo-random number generator. If
113 |         this is omitted, the seed is taken from the environment
114 |         variable ``RANDOM_SEED`` and if that is not defined, a random
115 |         integer is used as a seed.
116 |     """
117 | 
118 |     if device_id is None:
119 |         random_seed = _os.environ.get('CUDA_DEVICE')
120 |     
121 |     if random_seed is None:
122 |         random_seed = _os.environ.get('RANDOM_SEED')
123 | 
124 |     global is_initialized
125 |     if not is_initialized:
126 |         is_initialized = True
127 | 
128 |         global context
129 |         context.init_context(device_id)
130 | 
131 |         from pycuda import gpuarray, driver, curandom
132 | 
133 |         # Initialize memory pool
134 |         global memory_pool
135 |         memory_pool.init()
136 | 
137 |         # Initialize PRG
138 |         global sampler
139 |         sampler.set_seed(random_seed)
140 | 
141 |         # Initialize pycuda_ops
142 |         from hebel import pycuda_ops
143 |         pycuda_ops.init()
144 | 
145 | 
146 | def _finish_up():
147 |     global is_initialized
148 |     if is_initialized:
149 |         global context
150 |         context.pop()
151 |         context = None
152 | 
153 |         from pycuda.tools import clear_context_caches
154 |         clear_context_caches()
155 |         is_initialized = False
156 | 
157 | import atexit
158 | atexit.register(_finish_up)
159 | 


--------------------------------------------------------------------------------
/hebel/cross_validation.py:
--------------------------------------------------------------------------------
  1 | from .utils.math import ceil_div
  2 | import numpy as np
  3 | import os
  4 | from hebel.optimizers import SGD
  5 | from hebel import memory_pool
  6 | 
  7 | class CrossValidation(object):
  8 |     def __init__(self, config, data):
  9 | 
 10 |         self.n_folds = config['n_folds']
 11 |         self.n_data = config['n_data']
 12 |         self.validation_share = config['validation_share']
 13 | 
 14 |         self.fold_size = ceil_div(self.n_data, self.n_folds)
 15 |         self.N_train_validate = self.n_data - self.fold_size
 16 |         self.N_train = int(np.ceil((1. - self.validation_share) * self.N_train_validate))
 17 | 
 18 |         self.models_cv = []
 19 |         self.progress_monitors_cv = []
 20 |         self.fold_idx = []
 21 | 
 22 |         self.fold_stats = []
 23 | 
 24 |         self.train_error = {
 25 |             'training_error': [],
 26 |             'validation_error': []
 27 |         }
 28 | 
 29 |         self.predictions = None
 30 |         self.config = config
 31 |         self.data = data
 32 | 
 33 |         np.random.seed(config.get('numpy_seed'))
 34 | 
 35 |     def run_fold(self, k):
 36 |         memory_pool.free_held()
 37 |         fold_range = (k*self.fold_size, min((k+1)*self.fold_size, self.n_data))
 38 |         test_idx = np.arange(fold_range[0], fold_range[1], dtype=np.int32)
 39 | 
 40 |         train_validate_idx = np.random.permutation(
 41 |             np.r_[np.arange(0, fold_range[0], dtype=np.int32),
 42 |                   np.arange(fold_range[1], self.n_data, dtype=np.int32)])
 43 |         train_idx = train_validate_idx[:self.N_train]
 44 |         validate_idx = train_validate_idx[self.N_train:]
 45 | 
 46 |         self.fold_idx.append({
 47 |             'test_idx': test_idx,
 48 |             'train_idx': train_idx,
 49 |             'validate_idx': validate_idx
 50 |         })
 51 | 
 52 |         dp_train = self.make_data_provider(train_idx,
 53 |                                            self.config.get('batch_size_train'))
 54 |         dp_validate = self.make_data_provider(validate_idx,
 55 |                                               self.config.get('batch_size_validate'))
 56 |         dp_test = self.make_data_provider(test_idx,
 57 |                                           self.config.get('batch_size_test'))
 58 | 
 59 |         model = self.make_model()
 60 |         model.calibrate_learning_rate(dp_train)
 61 |         self.models_cv.append(model)
 62 | 
 63 |         progress_monitor = self.make_progress_monitor(k)
 64 |         self.progress_monitors_cv.append(progress_monitor)
 65 | 
 66 |         learning_rate_schedule = self.config['learning_rate_fct'](**self.config['learning_rate_params'])
 67 | 
 68 |         momentum_schedule = self.config['momentum_schedule_fct'](**self.config['momentum_schedule_params']) \
 69 |                             if 'momentum_schedule_fct' in self.config else None
 70 |         
 71 |         optimizer = SGD(model, self.config['parameter_updater'], dp_train, dp_validate,
 72 |                         progress_monitor,
 73 |                         learning_rate_schedule=learning_rate_schedule,
 74 |                         momentum_schedule=momentum_schedule,
 75 |                         early_stopping=self.config.get('early_stopping', True))
 76 | 
 77 |         optimizer.run(self.config['epochs'],
 78 |                       validation_interval=self.config.get('validation_interval', 5),
 79 |                       yaml_config=self.config['yaml_config'])
 80 | 
 81 |         stats = self.get_stats(dp_train, dp_test, model)
 82 |         self.fold_stats.append(stats)
 83 | 
 84 |         predictions_fold = model.feed_forward(dp_test.data).get()
 85 |         self.predictions = np.r_[self.predictions, predictions_fold] \
 86 |                            if self.predictions is not None else predictions_fold
 87 | 
 88 |         self.make_figures(model, progress_monitor, k)
 89 |         
 90 |         self.train_error['training_error'].append(progress_monitor.train_error)
 91 |         self.train_error['validation_error'].append(progress_monitor.validation_error)
 92 | 
 93 | 
 94 |         del optimizer, dp_train, dp_validate, dp_test
 95 | 
 96 |     def run(self):
 97 |         for k in range(self.n_folds):
 98 |             self.run_fold(k)
 99 | 
100 |     def make_data_provider(self, idx, batch_size):
101 |         raise NotImplementedError
102 | 
103 |     def make_model(self):
104 |         raise NotImplementedError
105 | 
106 |     def make_progress_monitor(self, fold):
107 |         raise NotImplementedError
108 | 
109 |     def get_stats_func(self, dp_train, dp_test, model):
110 |         return {}
111 | 
112 |     def make_figures(self, model, progress_monitor, fold):
113 |         pass
114 | 
115 |     def post_run(self):
116 |         pass
117 | 


--------------------------------------------------------------------------------
/hebel/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from .dummy_layer import DummyLayer
18 | from .hidden_layer import HiddenLayer
19 | from .softmax_layer import SoftmaxLayer
20 | from .logistic_layer import LogisticLayer
21 | from .multitask_top_layer import MultitaskTopLayer
22 | from .top_layer import TopLayer
23 | from .linear_regression_layer import LinearRegressionLayer
24 | from .input_dropout import InputDropout
25 | from .column import Column
26 | from .multi_column_layer import MultiColumnLayer
27 | from .flattening_layer import FlatteningLayer


--------------------------------------------------------------------------------
/hebel/layers/column.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | from . import HiddenLayer
 18 | from itertools import chain
 19 | 
 20 | class Column(object):
 21 |     l1_penalty_weight = True
 22 |     l2_penalty_weight = True
 23 | 
 24 |     def __init__(self, hidden_layers):
 25 |         assert all([isinstance(hl, HiddenLayer) for hl in hidden_layers])
 26 |         self.hidden_layers = hidden_layers
 27 | 
 28 |     @property
 29 |     def n_parameters(self):
 30 |         return sum(hl.n_parameters for hl in self.hidden_layers)
 31 | 
 32 |     @property
 33 |     def n_units(self):
 34 |         return self.hidden_layers[-1].n_units
 35 | 
 36 |     @property
 37 |     def n_in(self):
 38 |         return self.hidden_layers[0].n_in
 39 | 
 40 |     @property
 41 |     def parameters(self):
 42 |         return list(chain.from_iterable(hl.parameters for hl in self.hidden_layers))
 43 | 
 44 |     @parameters.setter
 45 |     def parameters(self, new_parameters):
 46 |         for hl in self.hidden_layers:
 47 |             hl.parameters = new_parameters[:hl.n_parameters]
 48 |             new_parameters = new_parameters[hl.n_parameters:]
 49 | 
 50 |     def update_parameters(self, values, stream=None):
 51 |         assert len(values) == self.n_parameters
 52 | 
 53 |         for hl in self.hidden_layers:
 54 |             hl.update_parameters(values[:hl.n_parameters])
 55 |             values = values[hl.n_parameters:]
 56 | 
 57 |     @property
 58 |     def l1_penalty(self):
 59 |         return sum(hl.l1_penalty for hl in self.hidden_layers)
 60 | 
 61 |     @property
 62 |     def l2_penalty(self):
 63 |         return sum(hl.l2_penalty for hl in self.hidden_layers)
 64 | 
 65 |     @property
 66 |     def lr_multiplier(self):
 67 |         return tuple(chain.from_iterable((hl.lr_multiplier for hl in self.hidden_layers)))
 68 | 
 69 |     @lr_multiplier.setter
 70 |     def lr_multiplier(self, value):
 71 |         assert self.n_parameters == len(value)
 72 |         i = 0
 73 |         for hl in self.hidden_layers:
 74 |             hl.lr_multiplier = value[i:i+hl.n_parameters]
 75 |             i += hl.n_parameters
 76 | 
 77 |     def feed_forward(self, input_data, prediction=False):
 78 |         cache = []
 79 |         activations = [input_data]
 80 |         a = input_data
 81 |         for hl in self.hidden_layers:
 82 |             c = hl.feed_forward(a, prediction)
 83 |             a = c[0]
 84 |             activations.append(c[0])
 85 |             cache.append(c)
 86 | 
 87 |         del activations[-1]
 88 |         return a, (activations, cache)
 89 | 
 90 |     def backprop(self, input_data, df_output, cache=None):
 91 |         if cache is None:
 92 |             _, (activations, cache) = self.feed_forward(input_data, False)
 93 |         else:
 94 |             _, (activations, cache) = cache
 95 | 
 96 |         df_param = []
 97 |         df_input = df_output
 98 |         for hl, a, c in zip(self.hidden_layers[::-1], activations[::-1], cache[::-1]):
 99 |             df_p, df_input = hl.backprop(a, df_input, c)
100 |             df_param.append(df_p)
101 | 
102 |         df_param.reverse()
103 |         df_param = list(chain.from_iterable(df_param))
104 | 
105 |         return df_param, df_input


--------------------------------------------------------------------------------
/hebel/layers/dummy_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from .hidden_layer import HiddenLayer
18 | 
19 | 
20 | class DummyLayer(HiddenLayer):
21 |     """ This class has no hidden units and simply passes through its
22 |     input
23 |     """
24 | 
25 |     lr_multiplier = []
26 |     n_parameters = 0
27 |     l1_penalty_weight = 0.
28 |     l2_penalty_weight = 0.
29 |     dropout = 0.
30 | 
31 |     def __init__(self, n_in):
32 |         self.n_in = n_in
33 |         self.n_units = n_in
34 | 
35 |     @property
36 |     def parameters(self):
37 |         return []
38 | 
39 |     @parameters.setter
40 |     def parameters(self, value):
41 |         pass
42 | 
43 |     def update_parameters(self, values, stream=None):
44 |         pass
45 | 
46 |     @property
47 |     def l1_penalty(self):
48 |         return 0.
49 | 
50 |     @property
51 |     def l2_penalty(self):
52 |         return 0.
53 | 
54 |     def feed_forward(self, input_data, prediction=False):
55 |         if input_data.shape[1] != self.n_in:
56 |             raise ValueError('Number of outputs from previous layer (%d) '
57 |                              'does not match number of inputs to this layer (%d)' %
58 |                              (input_data.shape[1], self.n_in))
59 |         return (input_data,)
60 | 
61 |     def backprop(self, input_data, df_output, cache=None):
62 |         return tuple(), df_output
63 | 


--------------------------------------------------------------------------------
/hebel/layers/flattening_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | import numpy as np
18 | from . import HiddenLayer
19 | 
20 | class FlatteningLayer(HiddenLayer):
21 |     n_parameters = 0
22 |     lr_multiplier = []
23 | 
24 |     def __init__(self, n_in, n_filters,
25 |                  l1_penalty_weight=0., l2_penalty_weight=0.):
26 |         self.n_in = n_in
27 |         self.n_filters = n_filters
28 |         self.n_units = n_in * n_filters
29 | 
30 |         self.l1_penalty_weight = 0.
31 |         self.l2_penalty_weight = 0.
32 | 
33 |     def feed_forward(self, input_data, prediction=False):
34 |         N = input_data.shape[0]
35 |         return input_data.reshape((N, self.n_units)), None
36 | 
37 |     def backprop(self, input_data, df_output, cache=None):
38 |         N = input_data.shape[0]
39 |         return tuple(), df_output.reshape((N, self.n_in, self.n_filters))
40 | 
41 |     @property
42 |     def parameters(self):
43 |         return []
44 | 
45 |     @parameters.setter
46 |     def parameters(self, value):
47 |         pass
48 | 
49 |     def update_parameters(self, values, stream=None):
50 |         pass
51 | 
52 |     @property
53 |     def l1_penalty(self):
54 |         return 0.
55 | 
56 |     @property
57 |     def l2_penalty(self):
58 |         return 0.
59 | 


--------------------------------------------------------------------------------
/hebel/layers/input_dropout.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | import numpy as np
 18 | import cPickle
 19 | from pycuda import gpuarray
 20 | from .dummy_layer import DummyLayer
 21 | from .. import memory_pool
 22 | from ..pycuda_ops.elementwise import sample_dropout_mask, \
 23 |     apply_dropout_mask
 24 | from ..pycuda_ops.matrix import add_vec_to_mat
 25 | from ..pycuda_ops.reductions import matrix_sum_out_axis
 26 | 
 27 | class InputDropout(DummyLayer):
 28 |     r"""This layer performs dropout on the input data.
 29 | 
 30 |     It does not have any learnable parameters of its own. It should be
 31 |     used as the first layer and will perform dropout with any dropout
 32 |     probability on the incoming data.
 33 | 
 34 |     **Parameters:**
 35 | 
 36 |     n_in : integer
 37 |         Number of input units.
 38 | 
 39 |     dropout_probability : float in [0, 1)
 40 |         Probability of dropping out each input during training. Default is 0.2.
 41 | 
 42 |     compute_input_gradients : Bool
 43 |         Whether to compute the gradients with respect to the input
 44 |         data. This only necessary if you're training a model where the
 45 |         input itself is learned.
 46 | 
 47 |     """
 48 | 
 49 |     def __init__(self, n_in, dropout_probability=.2,
 50 |                  compute_input_gradients=False):
 51 |         self.n_in = n_in
 52 |         self.n_units = n_in
 53 |         
 54 |         assert dropout_probability >= 0. and \
 55 |             dropout_probability <= 1.
 56 |         self.dropout_probability = dropout_probability
 57 |         self.compute_input_gradients = compute_input_gradients
 58 | 
 59 |     def feed_forward(self, input_data, prediction=False):
 60 |         """Propagate forward through the layer
 61 | 
 62 |         **Parameters:**
 63 | 
 64 |         input_data : ``GPUArray``
 65 |             Inpute data to perform dropout on.
 66 | 
 67 |         prediction : bool, optional
 68 |             Whether to use prediction model. If true, then the data is
 69 |             scaled by ``1 - dropout_probability`` uses dropout.
 70 | 
 71 |         **Returns:**
 72 |         
 73 |         dropout_data : ``GPUArray``
 74 |             The data after performing dropout.
 75 |         """
 76 | 
 77 |         if input_data.shape[1] != self.n_in:
 78 |             raise ValueError('Number of outputs from previous layer (%d) '
 79 |                              'does not match number of inputs to this layer (%d)' %
 80 |                              (input_data.shape[1], self.n_in))
 81 | 
 82 |         if not prediction:
 83 |             dropout_input = gpuarray.empty_like(input_data)
 84 |             dropout_mask = sample_dropout_mask(input_data,
 85 |                                                self.dropout_probability, target=dropout_input
 86 |                                            )
 87 |             return dropout_input, dropout_mask
 88 |         else:
 89 |             return (input_data * (1 - self.dropout_probability),)
 90 | 
 91 |     def backprop(self, input_data, df_output, cache=None):
 92 |         """ Backpropagate through the hidden layer
 93 | 
 94 |         **Parameters:**
 95 | 
 96 |         input_data : ``GPUArray``
 97 |             Inpute data to perform dropout on.
 98 | 
 99 |         df_output : ``GPUArray``
100 |             Gradients with respect to the output of this layer
101 |             (received from the layer above).
102 | 
103 |         cache : list of ``GPUArray``
104 |             Cache obtained from forward pass. If the cache is
105 |             provided, then the activations are not recalculated.
106 | 
107 |         **Returns:**
108 | 
109 |         gradients : empty tuple
110 |             Gradients are empty since this layer has no parameters.
111 | 
112 |         df_input : ``GPUArray``
113 |             Gradients with respect to the input.
114 |         """
115 | 
116 |         if self.compute_input_gradients:            
117 |             apply_dropout_mask(df_output, dropout_mask)
118 | 
119 |         return tuple(), df_output
120 | 


--------------------------------------------------------------------------------
/hebel/layers/linear_regression_layer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | import numpy as np
 18 | from pycuda import gpuarray, cumath
 19 | from math import sqrt
 20 | from .. import sampler, memory_pool
 21 | from .softmax_layer import SoftmaxLayer
 22 | from ..pycuda_ops.elementwise import sign, nan_to_zeros
 23 | from ..pycuda_ops.reductions import matrix_sum_out_axis
 24 | from ..pycuda_ops.matrix import add_vec_to_mat
 25 | from ..pycuda_ops import linalg
 26 | 
 27 | 
 28 | class LinearRegressionLayer(SoftmaxLayer):
 29 |     r"""Linear regression layer with linear outputs and squared loss error function.
 30 | 
 31 |         **Parameters:**
 32 |     
 33 |     n_in : integer
 34 |         Number of input units.
 35 | 
 36 |     n_out : integer
 37 |         Number of output units (classes).
 38 | 
 39 |     parameters : array_like of ``GPUArray``
 40 |         Parameters used to initialize the layer. If this is omitted,
 41 |         then the weights are initalized randomly using *Bengio's rule*
 42 |         (uniform distribution with scale :math:`4 \cdot \sqrt{6 /
 43 |         (\mathtt{n\_in} + \mathtt{n\_out})}`) and the biases are
 44 |         initialized to zero. If ``parameters`` is given, then is must
 45 |         be in the form ``[weights, biases]``, where the shape of
 46 |         weights is ``(n_in, n_out)`` and the shape of ``biases`` is
 47 |         ``(n_out,)``. Both weights and biases must be ``GPUArray``.
 48 |     
 49 |     weights_scale : float, optional
 50 |         If ``parameters`` is omitted, then this factor is used as
 51 |         scale for initializing the weights instead of *Bengio's rule*.
 52 | 
 53 |     l1_penalty_weight : float, optional
 54 |         Weight used for L1 regularization of the weights.
 55 | 
 56 |     l2_penalty_weight : float, optional
 57 |        Weight used for L2 regularization of the weights.
 58 | 
 59 |     lr_multiplier : float, optional
 60 |         If this parameter is omitted, then the learning rate for the
 61 |         layer is scaled by :math:`2 / \sqrt{\mathtt{n\_in}}`. You may
 62 |         specify a different factor here.
 63 | 
 64 |     test_error_fct : {``class_error``, ``kl_error``, ``cross_entropy_error``}, optional
 65 |         Which error function to use on the test set. Default is
 66 |         ``class_error`` for classification error. Other choices are
 67 |         ``kl_error``, the Kullback-Leibler divergence, or
 68 |         ``cross_entropy_error``.
 69 | 
 70 |     **See also:**
 71 | 
 72 |     :class:`hebel.models.NeuralNetRegression`,
 73 |     :class:`hebel.models.NeuralNet`,
 74 |     :class:`hebel.layers.LogisticLayer`
 75 | 
 76 |     """
 77 | 
 78 |     
 79 |     n_parameters = 2
 80 |     
 81 |     def __init__(self, n_in, n_out,
 82 |                  parameters=None,
 83 |                  weights_scale=None,
 84 |                  l1_penalty_weight=0.,
 85 |                  l2_penalty_weight=0.,
 86 |                  lr_multiplier=None):
 87 | 
 88 |         # Initialize weight using Bengio's rule
 89 |         self.weights_scale = 4 * sqrt(6. / (n_in + n_out)) \
 90 |                              if weights_scale is None \
 91 |                                 else weights_scale
 92 | 
 93 |         if parameters is not None:
 94 |             self.W, self.b = parameters
 95 |         else:
 96 |             self.W = gpuarray.empty((n_in, n_out), dtype=np.float32,
 97 |                                     allocator=memory_pool.allocate)
 98 |             sampler.fill_uniform(self.W)
 99 |             self.W = self.weights_scale * (self.W -.5)
100 | 
101 |             self.b = gpuarray.zeros((n_out,), dtype=np.float32,
102 |                                     allocator=memory_pool.allocate)
103 | 
104 |         self.n_in = n_in
105 |         self.n_out = n_out
106 | 
107 |         self.l1_penalty_weight = l1_penalty_weight
108 |         self.l2_penalty_weight = l2_penalty_weight
109 | 
110 |         self.lr_multiplier = 2 * [1. / np.sqrt(n_in, dtype=np.float32)] \
111 |           if lr_multiplier is None else lr_multiplier
112 | 
113 |     def feed_forward(self, input_data, prediction=False):
114 |         """Propagate forward through the layer.
115 | 
116 |         **Parameters:**
117 | 
118 |         input_data : ``GPUArray``
119 |             Inpute data to compute activations for.
120 | 
121 |         prediction : bool, optional
122 |             Whether to use prediction model. Only relevant when using
123 |             dropout. If true, then weights are multiplied by
124 |             1 - dropout if the layer uses dropout.
125 | 
126 |         **Returns:**
127 |         
128 |         activations : ``GPUArray``
129 |             The activations of the output units.
130 |         """
131 | 
132 |         if input_data.shape[1] != self.W.shape[0]:
133 |             raise ValueError('Number of outputs from previous layer (%d) '
134 |                              'does not match number of inputs to this layer (%d)' %
135 |                              (input_data.shape[1], self.W.shape[0]))
136 | 
137 |         activations = linalg.dot(input_data, self.W)
138 |         activations = add_vec_to_mat(activations, self.b, inplace=True)
139 | 
140 |         return activations
141 | 
142 |     def test_error(self, input_data, targets, average=True,
143 |                    cache=None, prediction=True):
144 |         """Compute the test error function given some data and targets.
145 | 
146 |         Uses the error function defined in
147 |         :class:`SoftmaxLayer.test_error_fct`, which may be different
148 |         from the cross-entropy error function used for
149 |         training'. Alternatively, the other test error functions may
150 |         be called directly.
151 | 
152 |         **Parameters:**
153 | 
154 |         input_data : ``GPUArray``
155 |             Inpute data to compute the test error function for.
156 | 
157 |         targets : ``GPUArray``
158 |             The target values of the units.
159 | 
160 |         average : bool
161 |             Whether to divide the value of the error function by the
162 |             number of data points given.
163 | 
164 |         cache : list of ``GPUArray``
165 |             Cache obtained from forward pass. If the cache is
166 |             provided, then the activations are not recalculated.
167 | 
168 |         prediction : bool, optional
169 |             Whether to use prediction model. Only relevant when using
170 |             dropout. If true, then weights are multiplied by
171 |             1 - dropout if the layer uses dropout.
172 | 
173 |         **Returns:**
174 |         test_error : float
175 |         """
176 | 
177 |         return self.squared_loss(input_data, targets, average,
178 |                                  cache, prediction)
179 | 
180 |     def squared_loss(self, input_data, targets, average=True,
181 |                      cache=None, prediction=False):
182 |         if cache is not None:
183 |             activations = cache
184 |         else:
185 |             activations  = \
186 |                 self.feed_forward(input_data, prediction=prediction)
187 | 
188 |         loss = gpuarray.sum(
189 |             matrix_sum_out_axis((targets - activations) ** 2, 1))
190 | 
191 |         if average: loss = loss.mean()
192 |         return loss.get()
193 |     train_error = squared_loss
194 | 


--------------------------------------------------------------------------------
/hebel/layers/multi_column_layer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | from .. import memory_pool
 18 | from . import HiddenLayer, Column
 19 | from pycuda import gpuarray
 20 | import numpy as np
 21 | from ..pycuda_ops.matrix import insert_columns, extract_columns
 22 | from itertools import chain
 23 | 
 24 | class MultiColumnLayer(HiddenLayer):
 25 |     l1_penalty_weight = True
 26 |     l2_penalty_weight = True
 27 | 
 28 |     def __init__(self, columns, input_as_list=False):
 29 |         assert all([isinstance(c, (Column, HiddenLayer)) for c in columns])
 30 |         self.columns = columns
 31 |         self.input_as_list = input_as_list
 32 |         self._setup_weight_sharing()
 33 | 
 34 |     def _setup_weight_sharing(self):
 35 |         i = 0
 36 |         shared_idx = []
 37 |         master_layers = [hl for column in self.columns
 38 |                          for hl in column.hidden_layers if hl.is_master_layer]
 39 |         master_param_idx = []
 40 |         for column in self.columns:
 41 |             for hl in column.hidden_layers:
 42 |                 if not hl.is_master_layer:
 43 |                     ml = hl.master_layer
 44 |                     ml_idx = master_layers.index(ml)
 45 |                     idx_start = sum(hl.n_parameters for hl in master_layers[:ml_idx])
 46 |                     shared_idx.extend([(n, m) for n, m in
 47 |                         zip(range(i, i+ml.n_parameters),
 48 |                             range(idx_start, idx_start+ml.n_parameters))])
 49 |                     i += ml.n_parameters
 50 |                 else:
 51 |                     master_param_idx.extend(range(i, i+hl.n_parameters))
 52 |                     i += hl.n_parameters
 53 |         self.master_param_idx = master_param_idx
 54 |         self.shared_idx = shared_idx
 55 | 
 56 |     @property
 57 |     def n_in(self):
 58 |         return sum(c.n_in for c in self.columns)
 59 | 
 60 |     @property
 61 |     def n_units(self):
 62 |         return sum(c.n_units for c in self.columns)
 63 | 
 64 |     @property
 65 |     def lr_multiplier(self):
 66 |         return tuple(chain.from_iterable((c.lr_multiplier for c in self.columns)))
 67 | 
 68 |     @property
 69 |     def n_parameters(self):
 70 |         return sum(c.n_parameters for c in self.columns)
 71 | 
 72 |     @property
 73 |     def parameters(self):
 74 |         return tuple(chain.from_iterable((c.parameters for c in self.columns)))
 75 | 
 76 |     @parameters.setter
 77 |     def parameters(self, value):
 78 |         assert len(value) == self.n_parameters
 79 | 
 80 |         i = 0
 81 |         for c in self.columns:
 82 |             c.parameters = value[i:i+c.n_parameters]
 83 |             i += c.n_parameters
 84 | 
 85 |     def update_parameters(self, values, stream=None):
 86 |         assert len(values) == self.n_parameters
 87 | 
 88 |         i = 0
 89 |         for c in self.columns:
 90 |             c.update_parameters(values[i:i+c.n_parameters])
 91 |             i += c.n_parameters
 92 | 
 93 |     @property
 94 |     def l1_penalty(self):
 95 |         return sum(c.l1_penalty for c in self.columns if c.l1_penalty_weight)
 96 | 
 97 |     @property
 98 |     def l2_penalty(self):
 99 |         return sum(c.l2_penalty for c in self.columns if c.l2_penalty_weight)
100 | 
101 |     @property
102 |     def lr_multiplier(self):
103 |         return [lr for column in
104 |                 self.columns
105 |                 for lr in column.lr_multiplier]
106 | 
107 |     @lr_multiplier.setter
108 |     def lr_multiplier(self, value):
109 |         assert len(value) == self.n_parameters
110 | 
111 |         i = 0
112 |         for column in self.columns:
113 |             column.lr_multiplier = value[i:i+column.n_parameters]
114 |             i += column.n_parameters
115 | 
116 |     def feed_forward(self, input_data, prediction=False):
117 |         if self.input_as_list:
118 |             return self._feed_forward_list(input_data, prediction)
119 |         else:
120 |             return self._feed_forward_array(input_data, prediction)
121 | 
122 |     def _feed_forward_list(self, input_data, prediction=False):
123 |         output = gpuarray.empty((input_data[0].shape[0], self.n_units), np.float32,
124 |                                 allocator=memory_pool.allocate)
125 |         cache = []
126 |         i_out = 0
127 |         for column, input_column in zip(self.columns, input_data):
128 |             c = column.feed_forward(input_column, prediction)
129 |             cache.append((input_column, c))
130 |             insert_columns(c[0], output, i_out)
131 |             i_out += column.n_units
132 | 
133 |         return output, cache
134 | 
135 |     def _feed_forward_array(self, input_data, prediction=False):
136 |         output = gpuarray.empty((input_data.shape[0], self.n_units), np.float32,
137 |                                 allocator=memory_pool.allocate)
138 |         cache = []
139 |         i_in = 0
140 |         i_out = 0
141 |         for column in self.columns:
142 |             input_column = extract_columns(input_data, i_in, i_in + column.n_in)
143 |             c = column.feed_forward(input_column, prediction)
144 |             cache.append((input_column, c))
145 |             insert_columns(c[0], output, i_out)
146 |             i_in += column.n_in
147 |             i_out += column.n_units
148 | 
149 |         return output, cache
150 | 
151 |     def backprop(self, input_data, df_output, cache=None):
152 |         if cache is None:
153 |             _, cache = self.feed_forward(input_data, False)
154 |         else:
155 |             cache = cache[1]
156 | 
157 |         df_params = []
158 |         df_input = []
159 |         i = 0
160 |         for column, cache_column in zip(self.columns, cache):
161 |             df_output_column = extract_columns(df_output, i, i + column.n_units)
162 |             df_params_column, df_input_column = column.backprop(cache_column[0], df_output_column, cache_column[1])
163 |             df_params.extend(df_params_column)
164 |             df_input.append(df_input_column)
165 |             i += column.n_units
166 | 
167 |         df_params_master = [df_params[idx] for idx in self.master_param_idx]
168 |         for slave_idx, master_idx in self.shared_idx:
169 |             df_params_master[master_idx] += df_params[slave_idx]
170 | 
171 |         del df_params
172 | 
173 |         if not self.input_as_list:
174 |             df_input_list = df_input
175 |             df_input = gpuarray.empty(input_data.shape, np.float32,
176 |                                       allocator=memory_pool.allocate)
177 | 
178 |             i = 0
179 |             for dfi, column in zip(df_input_list, self.columns):
180 |                 insert_columns(dfi, df_input, i)
181 |                 i += column.n_in
182 | 
183 |         return df_params_master, df_input
184 | 


--------------------------------------------------------------------------------
/hebel/layers/top_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from .hidden_layer import HiddenLayer
18 | 
19 | 
20 | class TopLayer(HiddenLayer):
21 |     """Abstract base class for a top-level layer."""
22 |     
23 |     n_tasks = 1
24 | 


--------------------------------------------------------------------------------
/hebel/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from .logistic_regression import LogisticRegression
18 | from .multitask_neural_net import MultitaskNeuralNet
19 | from .neural_net import NeuralNet
20 | from .neural_net_regression import NeuralNetRegression
21 | from .model import Model
22 | 


--------------------------------------------------------------------------------
/hebel/models/logistic_regression.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from .neural_net import NeuralNet
18 | 
19 | 
20 | class LogisticRegression(NeuralNet):
21 |     """ A logistic regression model
22 | 
23 |     """
24 | 
25 |     def __init__(self, n_in, n_out, test_error_fct='class_error'):
26 |         super(LogisticRegression, self).\
27 |             __init__(n_in, n_out, [],
28 |                      test_error_fct=test_error_fct)
29 | 


--------------------------------------------------------------------------------
/hebel/models/model.py:
--------------------------------------------------------------------------------
 1 | class Model(object):
 2 |     """ Abstract base-class for a Hebel model
 3 |     """
 4 | 
 5 |     def __init__(self):
 6 |         raise NotImplementedError
 7 | 
 8 |     @property
 9 |     def parameters(self):
10 |         raise NotImplementedError
11 | 
12 |     @parameters.setter
13 |     def parameters(self, value):
14 |         raise NotImplementedError
15 | 
16 |     def update_parameters(self, value):
17 |         raise NotImplementedError
18 | 
19 |     def evaluate(self, input_data, targets,
20 |                  return_cache=False, prediction=True):
21 |         """ Evaluate the loss function without computing gradients
22 |         """
23 | 
24 |         raise NotImplementedError
25 | 
26 |     def training_pass(self, input_data, targets):
27 |         """ Perform a full forward and backward pass through the model
28 |         """
29 | 
30 |         raise NotImplementedError
31 | 
32 |     def test_error(self, input_data, targets, average=True, cache=None):
33 |         """ Evaulate performance on a test set
34 | 
35 |         """
36 |         raise NotImplementedError
37 | 
38 |     def feed_forward(self, input_data, return_cache=False, prediction=True):
39 |         """ Get predictions from the model
40 |         """
41 | 
42 |         raise NotImplementedError
43 | 


--------------------------------------------------------------------------------
/hebel/models/multitask_neural_net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from .neural_net import NeuralNet
18 | from ..layers import MultitaskTopLayer
19 | 
20 | class MultitaskNeuralNet(NeuralNet):
21 |     TopLayerClass = MultitaskTopLayer
22 | 


--------------------------------------------------------------------------------
/hebel/models/neural_net_regression.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from .neural_net import NeuralNet
18 | from ..layers import LinearRegressionLayer
19 | 
20 | class NeuralNetRegression(NeuralNet):
21 |     """A neural network for regression using the squared error loss
22 |     function.
23 | 
24 |     This class exists for convenience. The same results can be
25 |     achieved by creating a :class:`hebel.models.NeuralNet` instance
26 |     and passing a :class:`hebel.layers.LinearRegressionLayer` instance
27 |     as the ``top_layer`` argument.
28 | 
29 |     **Parameters:**
30 | 
31 |     layers : array_like
32 |         An array of either integers or instances of
33 |         :class:`hebel.models.HiddenLayer` objects. If integers are
34 |         given, they represent the number of hidden units in each layer
35 |         and new ``HiddenLayer`` objects will be created. If
36 |         ``HiddenLayer`` instances are given, the user must make sure
37 |         that each ``HiddenLayer`` has ``n_in`` set to the preceding
38 |         layer's ``n_units``. If ``HiddenLayer`` instances are passed,
39 |         then ``activation_function``, ``dropout``, ``n_in``,
40 |         ``l1_penalty_weight``, and ``l2_penalty_weight`` are ignored.
41 | 
42 |     top_layer : :class:`hebel.models.TopLayer` instance, optional
43 |         If ``top_layer`` is given, then it is used for the output
44 |         layer, otherwise, a ``LinearRegressionLayer`` instance is created.
45 | 
46 |     activation_function : {'sigmoid', 'tanh', 'relu', or 'linear'}, optional
47 |         The activation function to be used in the hidden layers.
48 | 
49 |     dropout : float in [0, 1)
50 |         Probability of dropping out each hidden unit during training. Default is 0.
51 | 
52 |     n_in : integer, optional
53 |         The dimensionality of the input. Must be given, if the first
54 |         hidden layer is not passed as a
55 |         :class:`hebel.models.HiddenLayer` instance.
56 | 
57 |     n_out : integer, optional
58 |         The number of classes to predict from. Must be given, if a
59 |         :class:`hebel.models.HiddenLayer` instance is not given in
60 |         ``top_layer``.
61 | 
62 |     l1_penalty_weight : float, optional
63 |         Weight for L1 regularization
64 | 
65 |     l2_penalty_weight : float, optional
66 |         Weight for L2 regularization
67 | 
68 |     kwargs : optional
69 |         Any additional arguments are passed on to ``top_layer``
70 | 
71 |     **See also:**
72 |     
73 |     :class:`hebel.models.NeuralNet`,
74 |     :class:`hebel.models.MultitaskNeuralNet`,
75 |     :class:`hebel.layers.LinearRegressionLayer`
76 | 
77 |     """
78 |     TopLayerClass = LinearRegressionLayer
79 | 


--------------------------------------------------------------------------------
/hebel/monitors.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | """ Implements monitors that report on the progress of training, such
 18 | as error rates and parameters. Currently, we just have
 19 | SimpleProgressMonitor, which simply prints the current error to the
 20 | shell.
 21 | 
 22 | """
 23 | 
 24 | import numpy as np
 25 | import time, cPickle, os, sys
 26 | from datetime import datetime
 27 | 
 28 | class ProgressMonitor(object):
 29 |     def __init__(self, experiment_name=None, save_model_path=None,
 30 |                  save_interval=None, output_to_log=False, 
 31 |                  model=None, make_subdir=True):
 32 | 
 33 |         self.experiment_name = experiment_name
 34 |         self.save_model_path = save_model_path
 35 |         self.save_interval = save_interval
 36 |         self.output_to_log = output_to_log
 37 |         self.model = model
 38 | 
 39 |         self.train_error = []
 40 |         self.validation_error = []
 41 |         self.avg_epoch_t = None
 42 |         self._time = datetime.now().strftime('%Y-%m-%dT%H-%M-%S')
 43 | 
 44 |         self.epochs = 0
 45 | 
 46 |         self.makedir(make_subdir)
 47 | 
 48 |     def print_(self, obj):
 49 |         if self.log is not None:
 50 |             self.log.write(str(obj) + '\n')
 51 |         print obj
 52 |         sys.stdout.flush()
 53 | 
 54 |     @property
 55 |     def yaml_config(self):
 56 |         return self._yaml_config
 57 | 
 58 |     @yaml_config.setter
 59 |     def yaml_config(self, yaml_config):
 60 |         if yaml_config is not None:
 61 |             self._yaml_config = yaml_config
 62 |             yaml_path = os.path.join(self.save_path, 'yaml_config.yml')
 63 |             f = open(yaml_path, 'w')
 64 |             f.write(self._yaml_config)
 65 |             self._yaml_config = yaml_config
 66 | 
 67 |     @property
 68 |     def test_error(self):
 69 |         return self._test_error
 70 | 
 71 |     @test_error.setter
 72 |     def test_error(self, test_error):
 73 |         self._test_error = test_error
 74 |         self.print_("Test error: %.4f" % test_error)
 75 |         f = open(os.path.join(self.save_path, "test_error"), 'w')
 76 |         f.write('%.5f\n' % test_error)
 77 | 
 78 |     def makedir(self, make_subdir=True):
 79 |         if make_subdir:
 80 |             experiment_dir_name = '_'.join((
 81 |                 self.experiment_name,
 82 |                 datetime.now().strftime('%Y-%m-%dT%H-%M-%S')))
 83 | 
 84 |             path = os.path.join(self.save_model_path,
 85 |                                 experiment_dir_name)
 86 |         else:
 87 |             path = self.save_model_path
 88 |         if not os.path.exists(path):
 89 |             os.makedirs(path)
 90 |         self.save_path = path
 91 | 
 92 |         if self.output_to_log:
 93 |             self.log = open(os.path.join(self.save_path, 'output.log'), 'w', 1)
 94 |             # sys.stdout = self.log
 95 |             # sys.stderr = self.log
 96 | 
 97 |     def start_training(self):
 98 |         self.start_time = datetime.now()
 99 | 
100 |     def report(self, epoch, train_error, validation_error=None,
101 |                new_best=None, epoch_t=None):
102 |         # Print logs
103 |         self.train_error.append((epoch, train_error))
104 |         if validation_error is not None:
105 |             self.validation_error.append((epoch, validation_error))
106 |         self.print_error(epoch, train_error, validation_error, new_best)
107 | 
108 |         if epoch_t is not None:
109 |             self.avg_epoch_t = ((epoch - 1) * \
110 |                                 self.avg_epoch_t + epoch_t) / epoch \
111 |                                 if self.avg_epoch_t is not None else epoch_t
112 | 
113 |         # Pickle model
114 |         if self.save_interval is not None:
115 |             if not epoch % self.save_interval:
116 |                 filename = 'model_%s_epoch%04d.pkl' % (
117 |                   self.experiment_name,
118 |                   epoch)
119 |                 path = os.path.join(self.save_path, filename)
120 |                 cPickle.dump(self.model, open(path, 'wb'))
121 |         elif new_best is not None and new_best:
122 |             filename = 'model_%s_current_best.pkl' % self.experiment_name
123 |             path = os.path.join(self.save_path, filename)
124 |             cPickle.dump(self.model, open(path, 'wb'))
125 | 
126 |     def print_error(self, epoch, train_error, validation_error=None, new_best=None):
127 |         if validation_error is not None:
128 |             report_str = 'Epoch %d, Validation error: %.5g, Train Loss: %.3f' % \
129 |               (epoch, validation_error, train_error)
130 |             if new_best is not None and new_best:
131 |                 report_str = '* ' + report_str
132 |         else:
133 |             report_str = 'Epoch %d, Train Loss: %.3f' % \
134 |               (epoch, train_error)
135 |         self.print_(report_str)
136 | 
137 |     def avg_weight(self):
138 |         self.print_("\nAvg weights:")
139 | 
140 |         i = 0
141 |         for param in self.model.parameters:
142 |             if len(param.shape) != 2: continue
143 |             param_cpu = np.abs(param.get())
144 |             mean_weight = param_cpu.mean()
145 |             std_weight = param_cpu.std()
146 |             self.print_('Layer %d: %.4f [%.4f]' % (i, mean_weight, std_weight))
147 |             
148 |             i += 1
149 | 
150 |     def finish_training(self):
151 |         # Print logs
152 |         end_time = datetime.now()
153 |         self.train_time = end_time - self.start_time
154 |         self.print_("Runtime: %dm %ds" % (self.train_time.total_seconds() // 60,
155 |                                     self.train_time.total_seconds() % 60))
156 |         self.print_("Avg. time per epoch %.2fs" % self.avg_epoch_t)
157 | 
158 |         # Pickle model
159 |         filename = 'model_%s_final.pkl' % self.experiment_name
160 |         path = os.path.join(self.save_path, filename)
161 |         self.print_("Saving model to %s" % path)
162 |         cPickle.dump(self.model, open(path, 'wb'))
163 |         if self.save_interval is None:
164 |             os.remove(os.path.join(
165 |                 self.save_path, 'model_%s_current_best.pkl' % self.experiment_name))
166 | 
167 |     def __del__(self):
168 |         if self.output_to_log:
169 |             self.log.close()
170 | 
171 | 
172 | class SimpleProgressMonitor(object):
173 |     def __init__(self, model=None):
174 |         self.model = model
175 | 
176 |         self.train_error = []
177 |         self.validation_error = []
178 |         self.avg_epoch_t = None
179 |         self._time = datetime.now().strftime('%Y-%m-%dT%H-%M-%S')
180 | 
181 |     def start_training(self):
182 |         self.start_time = datetime.now()
183 | 
184 |     def report(self, epoch, train_error, validation_error=None,
185 |                new_best=None, epoch_t=None):
186 |         self.train_error.append((epoch, train_error))
187 |         if validation_error is not None:
188 |             self.validation_error.append((epoch, validation_error))
189 | 
190 |         # Print logs
191 |         self.print_error(epoch, train_error, validation_error, new_best)
192 | 
193 |         if epoch_t is not None and epoch > 0:
194 |             self.avg_epoch_t = ((epoch - 1) * \
195 |                                 self.avg_epoch_t + epoch_t) / epoch \
196 |                                 if self.avg_epoch_t is not None else epoch_t
197 |         sys.stdout.flush()
198 | 
199 |     def print_error(self, epoch, train_error, validation_error=None, new_best=None):
200 |         if validation_error is not None:
201 |             report_str = 'Epoch %d, Validation error: %.5g, Train Loss: %.3f' % \
202 |               (epoch, validation_error, train_error)
203 |             if new_best is not None and new_best:
204 |                 report_str = '* ' + report_str
205 |             print report_str
206 |         else:
207 |             print 'Epoch %d, Train Loss: %.3f' % \
208 |               (epoch, train_error)
209 | 
210 |     def avg_weight(self):
211 |         print "\nAvg weights:"
212 | 
213 |         i = 0
214 |         for param in self.model.parameters:
215 |             if len(param.shape) != 2: continue
216 |             param_cpu = np.abs(param.get())
217 |             mean_weight = param_cpu.mean()
218 |             std_weight = param_cpu.std()
219 |             print 'Layer %d: %.4f [%.4f]' % (i, mean_weight, std_weight)
220 |             i += 1
221 |         sys.stdout.flush()
222 | 
223 |     def finish_training(self):
224 |         # Print logs
225 |         end_time = datetime.now()
226 |         self.train_time = end_time - self.start_time
227 |         print "Runtime: %dm %ds" % (self.train_time.total_seconds() // 60,
228 |                                     self.train_time.total_seconds() % 60)
229 |         print "Avg. time per epoch %.2fs" % self.avg_epoch_t
230 |         sys.stdout.flush()
231 | 
232 | 
233 | class DummyProgressMonitor(object):
234 |     def __init__(self, model=None):
235 |         self.model = model
236 | 
237 |     def start_training(self):
238 |         pass
239 | 
240 |     def report(self, epoch, train_error, validation_error=None,
241 |                new_best=None, epoch_t=None):
242 |         pass
243 | 
244 |     def finish_training(self):
245 |         pass
246 | 


--------------------------------------------------------------------------------
/hebel/optimizers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | """ Implements optimization algorithms to train the models. The single
 18 | algorithm we have in online stochastic gradient descent (SGD).
 19 | 
 20 | """
 21 | 
 22 | import numpy as np
 23 | import time, cPickle, sys, os, inspect
 24 | from .pycuda_ops.matrix import vector_normalize
 25 | from .schedulers import constant_scheduler
 26 | from .monitors import SimpleProgressMonitor, DummyProgressMonitor
 27 | from . import memory_pool
 28 | from pycuda._driver import MemoryError
 29 | 
 30 | 
 31 | class EarlyStoppingModule(object):
 32 |     def __init__(self, model, verbose):
 33 |         self.model = model
 34 |         self.best_validation_loss = np.inf
 35 |         self.verbose = verbose
 36 | 
 37 |     def update(self, epoch, validation_loss):
 38 |         if validation_loss < self.best_validation_loss:
 39 |             self.best_validation_loss = validation_loss
 40 |             try:
 41 |                 del self.best_model
 42 |             except AttributeError:
 43 |                 pass
 44 | 
 45 |             try:
 46 |                 self.best_model = cPickle.dumps(self.model)
 47 |             except MemoryError:
 48 |                 memory_pool.free_held()
 49 |                 self.best_model = cPickle.dumps(self.model)
 50 | 
 51 |             # assert self.best_model[0] is not self.model.parameters[0]
 52 |             self.best_epoch = epoch
 53 |             return True
 54 |         return False
 55 | 
 56 |     def finish(self):
 57 |         # self.model.parameters = self.best_model
 58 |         try:
 59 |             self.model = cPickle.loads(self.best_model)
 60 |         except AttributeError:
 61 |             # Training has not yet reached the first validation epoch, so there is no self.best_model
 62 |             return
 63 |         if self.verbose:
 64 |             print "Optimization complete. " \
 65 |                   "Best validation error of %.5g obtained in self.epoch %d" % \
 66 |                   (self.best_validation_loss, self.best_epoch)
 67 | 
 68 | 
 69 | class SGD(object):
 70 |     @property
 71 |     def best_validation_loss(self):
 72 |         return self.early_stopping_module.best_validation_loss
 73 | 
 74 |     def __init__(self,
 75 |                  model, parameter_updater,
 76 |                  train_data,
 77 |                  validation_data=None,
 78 |                  progress_monitor=None,
 79 |                  learning_rate_schedule=constant_scheduler(.1),
 80 |                  momentum_schedule=None,
 81 |                  early_stopping=True,
 82 |                  verbose=True):
 83 | 
 84 |         """ Stochastic gradient descent
 85 |         """
 86 | 
 87 |         ### Initialization
 88 | 
 89 |         self.model = model
 90 | 
 91 |         ### Training data
 92 |         self.train_data = train_data
 93 | 
 94 |         ### Validation data
 95 |         self.validation_data = validation_data
 96 | 
 97 |         ### Data size
 98 |         self.N_train = self.train_data.N
 99 | 
100 |         if validation_data is not None:
101 |             self.N_validation = self.validation_data.N
102 | 
103 |         ### Learning rate schedule
104 |         self.learning_parameter_iterators = [learning_rate_schedule]
105 | 
106 |         ### Momentum, rmsprop, etc
107 | 
108 |         self.parameter_updater = parameter_updater(self.model)
109 | 
110 |         if momentum_schedule is not None:
111 |             self.learning_parameter_iterators.append(momentum_schedule)
112 | 
113 |         if progress_monitor is None:
114 |             if verbose:
115 |                 self.progress_monitor = SimpleProgressMonitor(model=self.model)
116 |             else:
117 |                 self.progress_monitor = DummyProgressMonitor()
118 |         else:
119 |             self.progress_monitor = progress_monitor
120 | 
121 |         if self.progress_monitor.model is None:
122 |             self.progress_monitor.model = self.model
123 | 
124 |         self.early_stopping = early_stopping
125 |         self.verbose = verbose
126 |         self.epoch = 0
127 | 
128 |     def run(self, iterations=200, validation_interval=5,
129 |             yaml_config=None,
130 |             task_id=None):
131 | 
132 |         self.early_stopping_module = EarlyStoppingModule(self.model, self.verbose) \
133 |             if self.early_stopping else None
134 | 
135 |         keyboard_interrupt = False
136 | 
137 |         self.progress_monitor.start_training()
138 | 
139 |         self.progress_monitor.task_id = task_id
140 |         self.progress_monitor.yaml_config = yaml_config
141 | 
142 |         # Main loop
143 |         for self.epoch in range(self.epoch + 1, self.epoch + iterations + 1):
144 |             learning_parameters = map(lambda lp: lp.next(),
145 |                                       self.learning_parameter_iterators)
146 |             if keyboard_interrupt: break
147 | 
148 |             try:
149 |                 t = time.time()
150 | 
151 |                 # Train on mini-batches
152 |                 train_loss = 0.
153 | 
154 |                 for batch_idx, (batch_data, batch_targets) in \
155 |                   enumerate(self.train_data):
156 |                     batch_size = self.train_data.batch_size
157 | 
158 |                     self.parameter_updater.pre_gradient_update()
159 | 
160 |                     batch_loss, gradients = \
161 |                         self.model.training_pass(batch_data, batch_targets)
162 |                     train_loss += batch_loss
163 |                     self.parameter_updater\
164 |                       .post_gradient_update(gradients, batch_size,
165 |                                             learning_parameters)
166 | 
167 |                 # Evaluate on validation data
168 |                 if self.validation_data is not None and \
169 |                    not self.epoch % validation_interval:
170 |                     validation_loss_rate = self.model.test_error(
171 |                         self.validation_data)
172 |                     # validation_loss = 0.
173 |                     # for batch_idx, (batch_data, batch_targets) in \
174 |                     #   enumerate(self.validation_data):
175 | 
176 |                     #     validation_loss += self.model.test_error(batch_data,
177 |                     #                                              batch_targets,
178 |                     #                                              average=False)
179 | 
180 |                     # validation_loss_rate = \
181 |                     #     validation_loss / float(self.N_validation)
182 | 
183 |                     new_best = self.early_stopping_module.update(
184 |                         self.epoch, validation_loss_rate) \
185 |                         if self.early_stopping_module is not None else None
186 | 
187 |                     epoch_t = time.time() - t
188 | 
189 |                     self.progress_monitor.report(self.epoch, train_loss,
190 |                                                  validation_loss_rate,
191 |                                                  new_best,
192 |                                                  epoch_t=epoch_t)
193 |                 else:
194 |                     epoch_t = time.time() - t
195 |                     self.progress_monitor.report(self.epoch, train_loss,
196 |                                                  epoch_t=epoch_t)
197 | 
198 |             except KeyboardInterrupt:
199 |                 print "Keyboard interrupt. Stopping training and cleaning up."
200 |                 keyboard_interrupt = True
201 | 
202 |         if self.early_stopping_module is not None:
203 |             self.early_stopping_module.finish()
204 |             # self.model = self.early_stopping_module.model
205 | 
206 |         self.progress_monitor.finish_training()
207 | 
208 |         if keyboard_interrupt:
209 |             sys.exit()
210 | 
211 |     def norm_v_norm(self):
212 |         if self.max_vec_norm:
213 |             for w in self.model.parameters:
214 |                 if len(w.shape) == 2:
215 |                     vector_normalize(w, self.max_vec_norm)
216 | 


--------------------------------------------------------------------------------
/hebel/parameter_updaters.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | """ Implements different variants of updating the parameters in SGD,
 18 | such as momentum and Nesterov momentum.
 19 | 
 20 | """
 21 | 
 22 | from pycuda import gpuarray
 23 | from itertools import izip
 24 | 
 25 | 
 26 | class ParameterUpdater(object):
 27 |     def __init__(self, model):
 28 |         self.model = model
 29 | 
 30 |     def pre_gradient_update(self, stream=None):
 31 |         pass
 32 | 
 33 |     def post_gradient_update(self, gradients, stream=None):
 34 |         pass
 35 | 
 36 | 
 37 | class SimpleSGDUpdate(ParameterUpdater):
 38 |     def post_gradient_update(self, gradients, batch_size,
 39 |                              learning_parameters,
 40 |                              stream=None):
 41 |         learning_rate = learning_parameters[0]
 42 | 
 43 |         multiplier = [-lr_mult * learning_rate / batch_size for lr_mult in
 44 |                       self.model.lr_multiplier]
 45 |         update = zip(gradients, multiplier)
 46 |         self.model.update_parameters(update)
 47 | 
 48 | 
 49 | class MomentumUpdate(ParameterUpdater):
 50 |     def __init__(self, model):
 51 |         self.model = model
 52 |         self.velocity = [gpuarray.zeros_like(p)
 53 |                          for p in self.model.parameters]
 54 | 
 55 |     def post_gradient_update(self, gradients, batch_size,
 56 |                              learning_parameters, stream=None):
 57 |         learning_rate, momentum = learning_parameters
 58 | 
 59 |         updates = []
 60 |         for gparam, vparam, lr_multiplier in \
 61 |             izip(gradients, self.velocity, self.model.lr_multiplier):
 62 |             vparam._axpbyz(momentum,
 63 |                            gparam, -learning_rate * lr_multiplier / batch_size,
 64 |                            vparam, stream=stream)
 65 |             updates.append((vparam, 1.))
 66 |         self.model.update_parameters(updates)
 67 | 
 68 | 
 69 | class NesterovMomentumUpdate(MomentumUpdate):
 70 |     def pre_gradient_update(self):
 71 |         """ First step of Nesterov momentum method:
 72 |         take step in direction of accumulated gradient
 73 |         """
 74 | 
 75 |         updates = zip(self.velocity, self.model.n_parameters * [1.])
 76 |         self.model.update_parameters(updates)
 77 | 
 78 |     def post_gradient_update(self, gradients, batch_size,
 79 |                              learning_parameters, stream=None):
 80 |         """ Second step of Nesterov momentum method:
 81 |         take step in direction of new gradient and update velocity
 82 |         """
 83 | 
 84 |         learning_rate, momentum = learning_parameters
 85 | 
 86 |         updates = []
 87 |         for param, gparam, vparam, lr_multiplier in \
 88 |           izip(self.model.parameters, gradients,
 89 |               self.velocity, self.model.lr_multiplier):
 90 | 
 91 |             updates.append(
 92 |                 (gparam, -learning_rate * lr_multiplier / batch_size))
 93 |             # param -= learning_rate*lr_multiplier/batch_size*gparam
 94 |             # param._axpbyz(1., gparam, -learning_rate*lr_multiplier/batch_size,
 95 |             #               param, stream=stream)
 96 |             # vparam = momentum*vparam \
 97 |             #    - learning_rate*lr_multiplier/batch_size*gparam
 98 |             vparam._axpbyz(momentum, gparam, -learning_rate*lr_multiplier/batch_size,
 99 |                            vparam, stream=stream)
100 |         self.model.update_parameters(updates)
101 | 


--------------------------------------------------------------------------------
/hebel/pycuda_ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | import numpy as np
18 | eps = np.finfo(np.float32).eps
19 | 
20 | def init():
21 |     from . import elementwise
22 |     from . import matrix
23 |     from . import reductions
24 |     from . import softmax
25 |     from . import linalg
26 | 
27 |     elementwise.init()
28 |     matrix.init()
29 |     reductions.init()
30 |     # softmax.init()
31 |     linalg.init()


--------------------------------------------------------------------------------
/hebel/pycuda_ops/cuda.py:
--------------------------------------------------------------------------------
 1 | # This file is taken from scikits.cuda (https://github.com/lebedov/scikits.cuda)
 2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved.
 3 | 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | 
 8 | # Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | # Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | # Neither the name of Lev Givon nor the names of any contributors may
14 | # be used to endorse or promote products derived from this software
15 | # without specific prior written permission.  THIS SOFTWARE IS
16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 | # SUCH DAMAGE.
28 | 
29 | #!/usr/bin/env python
30 | 
31 | """
32 | Python interface to CUDA functions.
33 | """
34 | 
35 | from cudart import *
36 | from cudadrv import *
37 | 
38 | 


--------------------------------------------------------------------------------
/hebel/pycuda_ops/cudadrv.py:
--------------------------------------------------------------------------------
  1 | # This file is taken from scikits.cuda (https://github.com/lebedov/scikits.cuda)
  2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved.
  3 | 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | 
  8 | # Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | # Redistributions in binary form must reproduce the above copyright
 11 | # notice, this list of conditions and the following disclaimer in the
 12 | # documentation and/or other materials provided with the distribution.
 13 | # Neither the name of Lev Givon nor the names of any contributors may
 14 | # be used to endorse or promote products derived from this software
 15 | # without specific prior written permission.  THIS SOFTWARE IS
 16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 27 | # SUCH DAMAGE.
 28 | 
 29 | #!/usr/bin/env python
 30 | 
 31 | """
 32 | Python interface to CUDA driver functions.
 33 | """
 34 | 
 35 | import sys, ctypes
 36 | from ctypes.util import find_library
 37 | 
 38 | # Load CUDA driver library:
 39 | _libcuda_shortname = 'nvcuda' if sys.platform == 'win32' else 'cuda'
 40 | _libcuda_name = find_library(_libcuda_shortname) # on Windows, this is the full path, not just the name
 41 | if _libcuda_name is None:
 42 |     raise OSError('CUDA driver library not found')
 43 | _libcuda = ctypes.cdll.LoadLibrary(_libcuda_name)
 44 | 
 45 | # Exceptions corresponding to various CUDA driver errors:
 46 | 
 47 | class CUDA_ERROR(Exception):
 48 |     """CUDA error."""
 49 |     pass
 50 | 
 51 | class CUDA_ERROR_INVALID_VALUE(CUDA_ERROR):
 52 |     pass
 53 | 
 54 | class CUDA_ERROR_OUT_OF_MEMORY(CUDA_ERROR):
 55 |     pass
 56 | 
 57 | class CUDA_ERROR_NOT_INITIALIZED(CUDA_ERROR):
 58 |     pass
 59 | 
 60 | class CUDA_ERROR_DEINITIALIZED(CUDA_ERROR):
 61 |     pass
 62 | 
 63 | class CUDA_ERROR_PROFILER_DISABLED(CUDA_ERROR):
 64 |     pass
 65 | 
 66 | class CUDA_ERROR_PROFILER_NOT_INITIALIZED(CUDA_ERROR):
 67 |     pass
 68 | 
 69 | class CUDA_ERROR_PROFILER_ALREADY_STARTED(CUDA_ERROR):
 70 |     pass
 71 | 
 72 | class CUDA_ERROR_PROFILER_ALREADY_STOPPED(CUDA_ERROR):
 73 |     pass
 74 | 
 75 | class CUDA_ERROR_NO_DEVICE(CUDA_ERROR):
 76 |     pass
 77 | 
 78 | class CUDA_ERROR_INVALID_DEVICE(CUDA_ERROR):
 79 |     pass
 80 | 
 81 | class CUDA_ERROR_INVALID_IMAGE(CUDA_ERROR):
 82 |     pass
 83 | 
 84 | class CUDA_ERROR_INVALID_CONTEXT(CUDA_ERROR):
 85 |     pass
 86 | 
 87 | class CUDA_ERROR_CONTEXT_ALREADY_CURRENT(CUDA_ERROR):
 88 |     pass
 89 | 
 90 | class CUDA_ERROR_MAP_FAILED(CUDA_ERROR):
 91 |     pass
 92 | 
 93 | class CUDA_ERROR_UNMAP_FAILED(CUDA_ERROR):
 94 |     pass
 95 | 
 96 | class CUDA_ERROR_ARRAY_IS_MAPPED(CUDA_ERROR):
 97 |     pass
 98 | 
 99 | class CUDA_ERROR_ALREADY_MAPPED(CUDA_ERROR):
100 |     pass
101 | 
102 | class CUDA_ERROR_NO_BINARY_FOR_GPU(CUDA_ERROR):
103 |     pass
104 | 
105 | class CUDA_ERROR_ALREADY_ACQUIRED(CUDA_ERROR):
106 |     pass
107 | 
108 | class CUDA_ERROR_NOT_MAPPED(CUDA_ERROR):
109 |     pass
110 | 
111 | class CUDA_ERROR_NOT_MAPPED_AS_ARRAY(CUDA_ERROR):
112 |     pass
113 | 
114 | class CUDA_ERROR_NOT_MAPPED_AS_POINTER(CUDA_ERROR):
115 |     pass
116 | 
117 | class CUDA_ERROR_ECC_UNCORRECTABLE(CUDA_ERROR):
118 |     pass
119 | 
120 | class CUDA_ERROR_UNSUPPORTED_LIMIT(CUDA_ERROR):
121 |     pass
122 | 
123 | class CUDA_ERROR_CONTEXT_ALREADY_IN_USE(CUDA_ERROR):
124 |     pass
125 | 
126 | class CUDA_ERROR_INVALID_SOURCE(CUDA_ERROR):
127 |     pass
128 | 
129 | class CUDA_ERROR_FILE_NOT_FOUND(CUDA_ERROR):
130 |     pass
131 | 
132 | class CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND(CUDA_ERROR):
133 |     pass
134 | 
135 | class CUDA_ERROR_SHARED_OBJECT_INIT_FAILED(CUDA_ERROR):
136 |     pass
137 | 
138 | class CUDA_ERROR_OPERATING_SYSTEM(CUDA_ERROR):
139 |     pass
140 | 
141 | class CUDA_ERROR_INVALID_HANDLE(CUDA_ERROR):
142 |     pass
143 | 
144 | class CUDA_ERROR_NOT_FOUND(CUDA_ERROR):
145 |     pass
146 | 
147 | class CUDA_ERROR_NOT_READY(CUDA_ERROR):
148 |     pass
149 | 
150 | 
151 | CUDA_EXCEPTIONS = {
152 |     1: CUDA_ERROR_INVALID_VALUE,
153 |     2: CUDA_ERROR_OUT_OF_MEMORY,
154 |     3: CUDA_ERROR_NOT_INITIALIZED,
155 |     4: CUDA_ERROR_DEINITIALIZED,
156 |     5: CUDA_ERROR_PROFILER_DISABLED,
157 |     6: CUDA_ERROR_PROFILER_NOT_INITIALIZED,
158 |     7: CUDA_ERROR_PROFILER_ALREADY_STARTED,
159 |     8: CUDA_ERROR_PROFILER_ALREADY_STOPPED,
160 |     100: CUDA_ERROR_NO_DEVICE,
161 |     101: CUDA_ERROR_INVALID_DEVICE,
162 |     200: CUDA_ERROR_INVALID_IMAGE,
163 |     201: CUDA_ERROR_INVALID_CONTEXT,
164 |     202: CUDA_ERROR_CONTEXT_ALREADY_CURRENT,
165 |     205: CUDA_ERROR_MAP_FAILED,
166 |     206: CUDA_ERROR_UNMAP_FAILED,
167 |     207: CUDA_ERROR_ARRAY_IS_MAPPED,
168 |     208: CUDA_ERROR_ALREADY_MAPPED,
169 |     209: CUDA_ERROR_NO_BINARY_FOR_GPU,
170 |     210: CUDA_ERROR_ALREADY_ACQUIRED,
171 |     211: CUDA_ERROR_NOT_MAPPED,
172 |     212: CUDA_ERROR_NOT_MAPPED_AS_ARRAY,
173 |     213: CUDA_ERROR_NOT_MAPPED_AS_POINTER,
174 |     214: CUDA_ERROR_ECC_UNCORRECTABLE,
175 |     215: CUDA_ERROR_UNSUPPORTED_LIMIT,
176 |     216: CUDA_ERROR_CONTEXT_ALREADY_IN_USE,
177 |     300: CUDA_ERROR_INVALID_SOURCE,
178 |     301: CUDA_ERROR_FILE_NOT_FOUND,
179 |     302: CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
180 |     303: CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
181 |     304: CUDA_ERROR_OPERATING_SYSTEM,
182 |     400: CUDA_ERROR_INVALID_HANDLE,
183 |     500: CUDA_ERROR_NOT_FOUND,
184 |     600: CUDA_ERROR_NOT_READY,
185 |     }
186 | 
187 | def cuCheckStatus(status):
188 |     """
189 |     Raise CUDA exception.
190 | 
191 |     Raise an exception corresponding to the specified CUDA driver
192 |     error code.
193 | 
194 |     Parameters
195 |     ----------
196 |     status : int
197 |         CUDA driver error code.
198 | 
199 |     See Also
200 |     --------
201 |     CUDA_EXCEPTIONS
202 | 
203 |     """
204 | 
205 |     if status != 0:
206 |         try:
207 |             raise CUDA_EXCEPTIONS[status]
208 |         except KeyError:
209 |             raise CUDA_ERROR
210 | 
211 |         
212 | CU_POINTER_ATTRIBUTE_CONTEXT = 1
213 | CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2 
214 | CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3
215 | CU_POINTER_ATTRIBUTE_HOST_POINTER = 4
216 | 
217 | _libcuda.cuPointerGetAttribute.restype = int
218 | _libcuda.cuPointerGetAttribute.argtypes = [ctypes.c_void_p,
219 |                                            ctypes.c_int,
220 |                                            ctypes.c_uint]
221 | def cuPointerGetAttribute(attribute, ptr):
222 |     data = ctypes.c_void_p()
223 |     status = _libcuda.cuPointerGetAttribute(data, attribute, ptr)
224 |     cuCheckStatus(status)
225 |     return data
226 | 


--------------------------------------------------------------------------------
/hebel/pycuda_ops/elementwise.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | import numpy as np
 18 | from pycuda import gpuarray
 19 | from pycuda.elementwise import ElementwiseKernel
 20 | from .. import sampler, memory_pool
 21 | from .matrix import extract_columns, insert_columns
 22 | 
 23 | class Kernel(object):
 24 |     """ Defers creation of the ElementwiseKernels until the first
 25 |     runtime and automatically selects kernels for double and float.
 26 |     """
 27 | 
 28 |     def __init__(self, name, signature_float, code_float, 
 29 |                  signature_double, code_double):
 30 |         self.name = name
 31 |         self.kernel_float = ElementwiseKernel(signature_float, code_float, name)
 32 |         self.kernel_double = ElementwiseKernel(signature_double, code_double, name)
 33 | 
 34 |     def __call__(self, *args, **kwargs):
 35 |         if args[0].dtype == np.float32:
 36 |             self.kernel_float(*args, **kwargs)
 37 |         elif args[0].dtype == np.float64:
 38 |             self.kernel_double(*args, **kwargs)
 39 |         else:
 40 |             raise ValueError("Unknown datatype, must be np.float32 or np.float64")
 41 | 
 42 |     def get_kernel(self, dtype):
 43 |         if dtype == np.float32 or dtype == 'float':
 44 |             return self.kernel_float
 45 |         elif dtype == np.float64 or dtype == 'double':
 46 |             return self.kernel_double
 47 |         else:
 48 |             raise ValueError("Unknown datatype, must be np.float32 or np.float64")
 49 | 
 50 | all_kernels = None
 51 | def init():
 52 |     from pycuda import elementwise
 53 |     
 54 |     global all_kernels
 55 | 
 56 |     all_kernels_code = {
 57 |         'sign': {
 58 |             'float':  ("float *mat, float *target",
 59 |                        "target[i] = (mat[i] > 0.) - (mat[i] < 0);"),
 60 |             'double': ("double *mat, double *target",
 61 |                        "target[i] = (mat[i] > 0.) - (mat[i] < 0);")
 62 |         },
 63 | 
 64 |         'sigmoid': {
 65 |             'float':  ("float *mat",
 66 |                        "mat[i] = 1. / (1. + __expf(-mat[i]))",),
 67 |             'double': ("double *mat",
 68 |                        "mat[i] = 1. / (1. + exp(-mat[i]))")
 69 |         },
 70 | 
 71 |         'df_sigmoid': {
 72 |             'float': ("float *mat, float *target",
 73 |                       """const float f = mat[i];
 74 |                       target[i] = f * (1 - f);
 75 |                       """),
 76 |             'double': ("double *mat, double *target",
 77 |                        """const double f = mat[i];
 78 |                        target[i] = f * (1 - f);
 79 |                        """)
 80 |         },
 81 | 
 82 |         'tanh_inplace': {
 83 |             'float':  ("float *mat",
 84 |                        "mat[i] = tanhf(mat[i]);"),
 85 |             'double': ("double *mat",
 86 |                        "mat[i] = tanh(mat[i]);")
 87 |         },
 88 | 
 89 |         'df_tanh': {
 90 |             'float': ("float *mat, float *target",
 91 |                       """float f = mat[i];
 92 |                       target[i] = 1 - pow(f, 2);"""),
 93 |             'double': ("double *mat, double *target",
 94 |                        """double f = mat[i];
 95 |                        target[i] = 1 - pow(f, 2);""")
 96 |         },
 97 | 
 98 |         'relu': {
 99 |             'float':  ("float *mat",
100 |                        "if (mat[i] < 0.) mat[i] = 0.",),
101 |             'double': ("double *mat",
102 |                        "if (mat[i] < 0.) mat[i] = 0.")
103 |         },
104 | 
105 |         'df_relu': {
106 |             'float':  ("float *mat, float *target",
107 |                        "if (mat[i] <= 0.)\n  target[i] = 0.;\nelse\n  target[i] = 1.;"),
108 |             'double': ("double *mat, double *target",
109 |                        "if (mat[i] <= 0.)\n  target[i] = 0.;\nelse\n  target[i] = 1.;")
110 |         },
111 | 
112 |         'sample_dropout_mask': {
113 |             'float':  ("float *mat, float *target, char *dropout_mask, "
114 |                        "float *dropout_prob_array, float dropout_probability",
115 |                        """if (dropout_prob_array[i] <= dropout_probability) {
116 |                             dropout_mask[i] = 0.;
117 |                             target[i] = 0.;
118 |                           } else {
119 |                             dropout_mask[i] = 1.;
120 |                             if (target != mat)
121 |                                 target[i] = mat[i];
122 |                           }
123 |                         """),
124 |             'double':  ("double *mat, double *targets, char *dropout_mask, "
125 |                         "double *dropout_prob_array, float dropout_probability",
126 |                         """if (dropout_prob_array[i] <= dropout_probability) {
127 |                             dropout_mask[i] = 0.;
128 |                             target[i] = 0.;
129 |                           } else {
130 |                             dropout_mask[i] = 1.;
131 |                             if (target != mat)                    
132 |                                 target[i] = mat[i];
133 |                           }
134 |                         """)
135 |         },
136 | 
137 |         'apply_dropout_mask': {
138 |             'float':    ("float *mat, char *mask",
139 |                          "if (mask[i] == 0.) mat[i] = 0;"),
140 |             'double':   ("double *mat, char *mask",
141 |                          "if (mask[i] == 0.) mat[i] = 0;"),
142 |         },
143 | 
144 |         'nan_to_zeros': {
145 |             'float':    ("float *mat, float *target",
146 |                          "target[i] = isnan(mat[i]) ? 0. : mat[i];"),
147 |             'double':   ("double *mat, double *target",
148 |                          "target[i] = isnan(mat[i]) ? 0. : mat[i];")
149 |         },
150 | 
151 |         'mult_matrix': {
152 |             'float': ("const float *a, const float *b, float *c",
153 |                       "c[i] = a[i] * b[i];"),
154 |             'double': ("const double *b, const double *b, double *c",
155 |                        "c[i] = a[i] * b[i];")
156 | 
157 |         },
158 |         'substract_matrix': {
159 |             'float': ("const float *a, const float *b, float *c",
160 |                       "c[i] = a[i] - b[i];"),
161 |             'double': ("const double *a, const double *b, double *c",
162 |                        "c[i] = a[i] - b[i];")
163 |         }
164 |     }
165 | 
166 |     all_kernels = {
167 |         name: Kernel(name, 
168 |                      val['float'][0], val['float'][1],
169 |                      val['double'][0], val['double'][1])
170 |         for name, val in all_kernels_code.iteritems()
171 |     }
172 | 
173 | def sign(x, target=None):
174 |     assert x.flags.c_contiguous
175 |     if target is None:
176 |         target = gpuarray.GPUArray(x.shape, dtype=x.dtype, allocator=memory_pool.allocate)
177 |     assert target.shape == x.shape
178 |     assert target.dtype == x.dtype
179 |     assert target.flags.c_contiguous
180 |     all_kernels['sign'](x, target)
181 |     return target
182 | 
183 | def sigmoid(x):
184 |     assert x.flags.c_contiguous
185 |     all_kernels['sigmoid'](x)
186 | 
187 | def df_sigmoid(f, target=None):
188 |     assert f.flags.c_contiguous
189 |     if target is None:
190 |         target = gpuarray.empty_like(f)
191 |     all_kernels['df_sigmoid'](f, target)
192 |     return target
193 | 
194 | def tanh(x):
195 |     assert x.flags.c_contiguous
196 |     all_kernels['tanh_inplace'](x)
197 | 
198 | def df_tanh(f, target=None):
199 |     assert f.flags.c_contiguous
200 |     if target is None:
201 |         target = gpuarray.empty_like(f)
202 |     all_kernels['df_tanh'](f, target)
203 |     return target
204 | 
205 | def relu(x):
206 |     assert x.flags.c_contiguous
207 |     all_kernels['relu'](x)
208 | 
209 | def df_relu(x, target=None):
210 |     assert x.flags.c_contiguous
211 |     if target is None:
212 |         target = gpuarray.empty_like(x)        
213 |     all_kernels['df_relu'](x, target)
214 |     return target
215 | 
216 | def linear(x):
217 |     pass
218 | 
219 | def df_linear(x):
220 |     return x
221 | 
222 | def sample_dropout_mask(x, dropout_probability=.5, columns=None, stream=None, target=None,
223 |                         dropout_mask=None, dropout_prob_array=None):
224 |     """ Samples a dropout mask and applies it in place"""
225 | 
226 |     assert x.flags.c_contiguous
227 | 
228 |     if columns is not None:
229 |         assert len(columns) == 2
230 |         x_tmp = x
231 |         x = extract_columns(x, columns[0], columns[1])
232 | 
233 |     shape = x.shape
234 | 
235 |     if dropout_prob_array is None:
236 |         dropout_prob_array = gpuarray.empty(shape, x.dtype, allocator=memory_pool.allocate)
237 |     sampler.fill_uniform(dropout_prob_array, stream)
238 | 
239 |     if dropout_mask is None:
240 |         dropout_mask = gpuarray.empty(shape, np.int8, allocator=memory_pool.allocate)
241 | 
242 |     if target is None: target = x
243 |     
244 |     all_kernels['sample_dropout_mask'](
245 |         x, target, dropout_mask, dropout_prob_array,
246 |         np.float32(dropout_probability))
247 | 
248 |     if columns is not None:
249 |         insert_columns(x, x_tmp, columns[0])
250 | 
251 |     return dropout_mask
252 | 
253 | def apply_dropout_mask(x, mask, columns=None, stream=None):
254 |     assert x.flags.c_contiguous
255 | 
256 |     if columns is not None:
257 |         assert len(columns) == 2
258 |         x_tmp = x
259 |         x = extract_columns(x, columns[0], columns[1])
260 | 
261 |     assert x.shape == mask.shape
262 |     shape = x.shape
263 | 
264 |     all_kernels['apply_dropout_mask'](x, mask)
265 | 
266 |     if columns is not None:
267 |         insert_columns(x, x_tmp, columns[0])
268 | 
269 | def nan_to_zeros(x, target=None):
270 |     assert x.flags.c_contiguous
271 |     if target is None:
272 |         target = gpuarray.empty_like(x)
273 |     assert target.flags.c_contiguous
274 |     all_kernels['nan_to_zeros'](x, target)
275 |     return target
276 | 
277 | def mult_matrix(a, b, target=None):
278 |     assert a.shape == b.shape
279 |     if target is None:
280 |         target = gpuarray.empty_like(a)
281 | 
282 |     all_kernels['mult_matrix'](a, b, target)
283 |     return target
284 | 
285 | def substract_matrix(a, b, target=None):
286 |     assert a.shape == b.shape
287 |     if target is None:
288 |         target = gpuarray.empty_like(a)
289 | 
290 |     all_kernels['substract_matrix'](a, b, target)
291 |     return target
292 | 


--------------------------------------------------------------------------------
/hebel/pycuda_ops/linalg.py:
--------------------------------------------------------------------------------
  1 | # This file is modified from scikits.cuda (https://github.com/lebedov/scikits.cuda)
  2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved.
  3 | 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | 
  8 | # Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | # Redistributions in binary form must reproduce the above copyright
 11 | # notice, this list of conditions and the following disclaimer in the
 12 | # documentation and/or other materials provided with the distribution.
 13 | # Neither the name of Lev Givon nor the names of any contributors may
 14 | # be used to endorse or promote products derived from this software
 15 | # without specific prior written permission.  THIS SOFTWARE IS
 16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 27 | # SUCH DAMAGE.
 28 | 
 29 | from string import lower
 30 | import pycuda.gpuarray as gpuarray
 31 | import numpy as np
 32 | from . import cublas
 33 | from .. import memory_pool
 34 | 
 35 | def init():
 36 |     global _global_cublas_handle
 37 |     _global_cublas_handle = cublas.cublasCreate()
 38 | 
 39 | def dot(x_gpu, y_gpu, transa='N', transb='N', handle=None, target=None):
 40 |     """
 41 |     Dot product of two arrays.
 42 | 
 43 |     For 1D arrays, this function computes the inner product. For 2D
 44 |     arrays of shapes `(m, k)` and `(k, n)`, it computes the matrix
 45 |     product; the result has shape `(m, n)`.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     x_gpu : pycuda.gpuarray.GPUArray
 50 |         Input array.
 51 |     y_gpu : pycuda.gpuarray.GPUArray
 52 |         Input array.
 53 |     transa : char
 54 |         If 'T', compute the product of the transpose of `x_gpu`.
 55 |         If 'C', compute the product of the Hermitian of `x_gpu`.
 56 |     transb : char
 57 |         If 'T', compute the product of the transpose of `y_gpu`.
 58 |         If 'C', compute the product of the Hermitian of `y_gpu`.
 59 |     handle : int
 60 |         CUBLAS context. If no context is specified, the default handle from
 61 |         `scikits.cuda.misc._global_cublas_handle` is used.
 62 | 
 63 |     Returns
 64 |     -------
 65 |     c_gpu : pycuda.gpuarray.GPUArray, float{32,64}, or complex{64,128}
 66 |         Inner product of `x_gpu` and `y_gpu`. When the inputs are 1D
 67 |         arrays, the result will be returned as a scalar.
 68 | 
 69 |     Notes
 70 |     -----
 71 |     The input matrices must all contain elements of the same data type.
 72 | 
 73 |     Examples
 74 |     --------
 75 |     >>> import pycuda.gpuarray as gpuarray
 76 |     >>> import pycuda.autoinit
 77 |     >>> import numpy as np
 78 |     >>> import linalg
 79 |     >>> import misc
 80 |     >>> linalg.init()
 81 |     >>> a = np.asarray(np.random.rand(4, 2), np.float32)
 82 |     >>> b = np.asarray(np.random.rand(2, 2), np.float32)
 83 |     >>> a_gpu = gpuarray.to_gpu(a)
 84 |     >>> b_gpu = gpuarray.to_gpu(b)
 85 |     >>> c_gpu = linalg.dot(a_gpu, b_gpu)
 86 |     >>> np.allclose(np.dot(a, b), c_gpu.get())
 87 |     True
 88 |     >>> d = np.asarray(np.random.rand(5), np.float32)
 89 |     >>> e = np.asarray(np.random.rand(5), np.float32)
 90 |     >>> d_gpu = gpuarray.to_gpu(d)
 91 |     >>> e_gpu = gpuarray.to_gpu(e)
 92 |     >>> f = linalg.dot(d_gpu, e_gpu)
 93 |     >>> np.allclose(np.dot(d, e), f)
 94 |     True
 95 | 
 96 |     """
 97 | 
 98 |     if handle is None:
 99 |         handle = _global_cublas_handle
100 | 
101 |     if len(x_gpu.shape) == 1 and len(y_gpu.shape) == 1:
102 | 
103 |         if x_gpu.size != y_gpu.size:
104 |             raise ValueError('arrays must be of same length: '
105 |                              'x_gpu.size = %d, y_gpu.size = %d' %
106 |                              (x_gpu.size, y_gpu.size))
107 | 
108 |         # Compute inner product for 1D arrays:
109 |         if (x_gpu.dtype == np.complex64 and y_gpu.dtype == np.complex64):
110 |             cublas_func = cublas.cublasCdotu
111 |         elif (x_gpu.dtype == np.float32 and y_gpu.dtype == np.float32):
112 |             cublas_func = cublas.cublasSdot
113 |         elif (x_gpu.dtype == np.complex128 and y_gpu.dtype == np.complex128):
114 |             cublas_func = cublas.cublasZdotu
115 |         elif (x_gpu.dtype == np.float64 and y_gpu.dtype == np.float64):
116 |             cublas_func = cublas.cublasDdot
117 |         else:
118 |             raise ValueError('unsupported combination of input types: '
119 |                              'x_gpu.dtype = %s, y_gpu.dtype = %s' %
120 |                              (str(x_gpu.dtype), str(y_gpu.dtype)))
121 | 
122 |         return cublas_func(handle, x_gpu.size, x_gpu.gpudata, 1,
123 |                            y_gpu.gpudata, 1)
124 |     else:
125 | 
126 |         # Get the shapes of the arguments (accounting for the
127 |         # possibility that one of them may only have one dimension):
128 |         x_shape = x_gpu.shape
129 |         y_shape = y_gpu.shape
130 |         if len(x_shape) == 1:
131 |             x_shape = (1, x_shape[0])
132 |         if len(y_shape) == 1:
133 |             y_shape = (1, y_shape[0])
134 | 
135 |         # Perform matrix multiplication for 2D arrays:
136 |         if (x_gpu.dtype == np.complex64 and y_gpu.dtype == np.complex64):
137 |             cublas_func = cublas.cublasCgemm
138 |             alpha = np.complex64(1.0)
139 |             beta = np.complex64(0.0)
140 |         elif (x_gpu.dtype == np.float32 and y_gpu.dtype == np.float32):
141 |             cublas_func = cublas.cublasSgemm
142 |             alpha = np.float32(1.0)
143 |             beta = np.float32(0.0)
144 |         elif (x_gpu.dtype == np.complex128 and y_gpu.dtype == np.complex128):
145 |             cublas_func = cublas.cublasZgemm
146 |             alpha = np.complex128(1.0)
147 |             beta = np.complex128(0.0)
148 |         elif (x_gpu.dtype == np.float64 and y_gpu.dtype == np.float64):
149 |             cublas_func = cublas.cublasDgemm
150 |             alpha = np.float64(1.0)
151 |             beta = np.float64(0.0)
152 |         else:
153 |             raise ValueError('unsupported combination of input types: '
154 |                              'x_gpu.dtype = %s, y_gpu.dtype = %s' %
155 |                              (str(x_gpu.dtype), str(y_gpu.dtype)))
156 | 
157 |         transa = lower(transa)
158 |         transb = lower(transb)
159 | 
160 |         if transb in ['t', 'c']:
161 |             m, k = y_shape
162 |         elif transb in ['n']:
163 |             k, m = y_shape
164 |         else:
165 |             raise ValueError('invalid value "%s" for transb' % transb)
166 | 
167 |         if transa in ['t', 'c']:
168 |             l, n = x_shape
169 |         elif transa in ['n']:
170 |             n, l = x_shape
171 |         else:
172 |             raise ValueError('invalid value "%s" for transa' % transa)
173 | 
174 |         if l != k:
175 |             raise ValueError('objects are not aligned: x_shape = %s, y_shape = %s' %
176 |                              (x_shape, y_shape))
177 | 
178 |         if transb == 'n':
179 |             lda = max(1, m)
180 |         else:
181 |             lda = max(1, k)
182 | 
183 |         if transa == 'n':
184 |             ldb = max(1, k)
185 |         else:
186 |             ldb = max(1, n)
187 | 
188 |         ldc = max(1, m)
189 | 
190 |         # Note that the desired shape of the output matrix is the transpose
191 |         # of what CUBLAS assumes:
192 | 
193 |         if target is None:
194 |             target = gpuarray.empty((n, ldc), x_gpu.dtype, allocator=memory_pool.allocate)
195 |         
196 |         cublas_func(handle, transb, transa, m, n, k, alpha, y_gpu.gpudata,
197 |                     lda, x_gpu.gpudata, ldb, beta, target.gpudata, ldc)
198 | 
199 |         return target
200 | 


--------------------------------------------------------------------------------
/hebel/pycuda_ops/matrix.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | from .. import memory_pool, sampler
 18 | import numpy as np
 19 | from pycuda import driver as drv
 20 | from pycuda import gpuarray
 21 | from ..utils.math import ceil_div
 22 | 
 23 | add_row_vec_kernel = None
 24 | add_col_vec_kernel = None
 25 | vector_normalize_kernel = None
 26 | _compilation_constants = {
 27 |     'add_vec_block_size': 16
 28 | }
 29 | def init():
 30 |     from pycuda.compiler import SourceModule
 31 |     
 32 |     global add_row_vec_kernel
 33 |     global add_col_vec_kernel
 34 |     global vector_normalize_kernel
 35 | 
 36 |     code = """
 37 |     #include <stdint.h>
 38 |     __global__ void addRowVecToMat(const float *mat,
 39 |                                    const float *vec,
 40 |                                    float *target,
 41 |                                    const unsigned int n,
 42 |                                    const unsigned int m,
 43 |                                    const int substract)
 44 |     {
 45 |       const int tx = threadIdx.x;
 46 |       const int ty = threadIdx.y;
 47 |       const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
 48 |       const int tidy = blockIdx.y * blockDim.y + threadIdx.y;
 49 | 
 50 |       __shared__ float shared_vec[%(add_vec_block_size)d];
 51 | 
 52 |       if ((tx == 0) & (tidy < m))
 53 |           shared_vec[ty] = vec[tidy];
 54 |       __syncthreads();
 55 | 
 56 |       if ((tidy < m) & (tidx < n))
 57 |       {
 58 |           if (substract)
 59 |               target[tidx*m+tidy] = mat[tidx*m+tidy] - shared_vec[ty];
 60 |           else
 61 |               target[tidx*m+tidy] = mat[tidx*m+tidy] + shared_vec[ty];      
 62 |       }
 63 |     }
 64 | 
 65 |     __global__ void addColVecToMat(const float *mat,
 66 |                                    const float *vec,
 67 |                                    float *target,
 68 |                                    const unsigned int n,
 69 |                                    const unsigned int m,
 70 |                                    const int substract)
 71 |     {
 72 |       const int tx = threadIdx.x;
 73 |       const int ty = threadIdx.y;
 74 |       const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
 75 |       const int tidy = blockIdx.y * blockDim.y + threadIdx.y;
 76 | 
 77 |       __shared__ float shared_vec[%(add_vec_block_size)d];
 78 | 
 79 |       if ((ty == 0) & (tidx < n))
 80 |           shared_vec[tx] = vec[tidx];
 81 |       __syncthreads();
 82 | 
 83 |       if ((tidy < m) & (tidx < n))
 84 |       {
 85 |           if (substract)
 86 |               target[tidx*m+tidy] = mat[tidx*m+tidy] - shared_vec[tx];
 87 |           else
 88 |               target[tidx*m+tidy] = mat[tidx*m+tidy] + shared_vec[tx];      
 89 |       }
 90 |     }
 91 | 
 92 |     __global__ void kVectorNormalize(float* mat,
 93 |                                      float max_vec_norm,
 94 |                                      unsigned int width,
 95 |                                      unsigned int height) {
 96 | 
 97 |         __shared__ float sum_shared[32];
 98 |         __shared__ float vec_norm;
 99 |         float sum = 0;
100 | 
101 |         for (unsigned int i = threadIdx.x; i < height; i += 32)
102 |             sum += powf(mat[blockIdx.x + i * width], 2);
103 | 
104 |         sum_shared[threadIdx.x] = sum;
105 | 
106 |         __syncthreads();
107 | 
108 |         if (threadIdx.x == 0) {
109 |             sum = 0;
110 | 
111 |             for (unsigned int i = 0; i < 32; i++)
112 |                 sum += sum_shared[i];
113 | 
114 |             vec_norm = sqrtf(sum);
115 |         }
116 |         __syncthreads();
117 | 
118 |         for (unsigned int i = threadIdx.x; i < height; i += 32) {
119 |             if (vec_norm > max_vec_norm)
120 |                 mat[blockIdx.x + i * width] /= (vec_norm / max_vec_norm);
121 |         }
122 |     }
123 |     """ % _compilation_constants
124 | 
125 |     mod = SourceModule(code)
126 |     add_row_vec_kernel = mod.get_function('addRowVecToMat').prepare('PPPIIi')
127 |     add_col_vec_kernel = mod.get_function('addColVecToMat').prepare('PPPIIi')
128 |     vector_normalize_kernel = mod.get_function("kVectorNormalize").prepare('PfII')
129 | 
130 | def add_vec_to_mat(mat, vec, axis=None, inplace=False,
131 |                    target=None, substract=False):
132 |     """ Add a vector to a matrix
133 |     """
134 | 
135 |     assert mat.flags.c_contiguous
136 | 
137 |     if axis is None:
138 |         if vec.shape[0] == mat.shape[0]:
139 |             axis = 0
140 |         elif vec.shape[0] == mat.shape[1]:
141 |             axis = 1
142 |         else:
143 |             raise ValueError('Vector length must be equal '
144 |                              'to one side of the matrix')
145 | 
146 |     n, m = mat.shape
147 | 
148 |     block = (_compilation_constants['add_vec_block_size'],
149 |              _compilation_constants['add_vec_block_size'], 1)
150 |     gridx = ceil_div(n, block[0])
151 |     gridy = ceil_div(m, block[1])
152 |     grid = (gridx, gridy, 1)
153 | 
154 |     if inplace:
155 |         target = mat
156 |     elif target is None:
157 |         target = gpuarray.empty_like(mat)
158 | 
159 |     if axis == 0:
160 |         assert vec.shape[0] == mat.shape[0]
161 |         add_col_vec_kernel.prepared_call(
162 |             grid, block,
163 |             mat.gpudata,
164 |             vec.gpudata,
165 |             target.gpudata,
166 |             np.uint32(n),
167 |             np.uint32(m),
168 |             np.int32(substract))
169 |     elif axis == 1:
170 |         assert vec.shape[0] == mat.shape[1]
171 |         add_row_vec_kernel.prepared_call(
172 |             grid, block,
173 |             mat.gpudata,
174 |             vec.gpudata,
175 |             target.gpudata,
176 |             np.uint32(n),
177 |             np.uint32(m),
178 |             np.int32(substract))
179 |     return target
180 | 
181 | 
182 | def vector_normalize(mat, max_vec_norm=1.):
183 |     """ Normalize each column vector in mat to length
184 |     max_vec_norm if it is longer than max_vec_norm
185 |     """
186 |     assert mat.flags.c_contiguous
187 |     n, m = mat.shape
188 | 
189 |     vector_normalize_kernel.prepared_call(
190 |         (m, 1, 1), (32, 1, 1),
191 |         mat.gpudata,
192 |         np.float32(max_vec_norm),
193 |         np.int32(m),
194 |         np.int32(n))
195 | 
196 | def extract_columns(mat, start=0, stop=None, target=None):
197 |     dtype = mat.dtype
198 |     itemsize = np.dtype(dtype).itemsize
199 | 
200 |     input_3d = False
201 |     if len(mat.shape) == 2:
202 |         N, M = mat.shape
203 |         if stop is None:
204 |             stop = M
205 |     elif len(mat.shape) == 3:
206 |         input_3d = True
207 |         N, M, Z = mat.shape
208 |         if stop is None:
209 |             stop = M
210 |         start = start * Z
211 |         stop = stop * Z
212 |         M = M * Z
213 |         mat = mat.reshape((N, M))
214 |     else:
215 |         raise ValueError("mat must have two or three dimensions")
216 |     m = stop - start
217 | 
218 |     assert mat.flags.c_contiguous
219 |     assert start >= 0 and start <= M and stop >= 0 and \
220 |         stop <= M and stop > start
221 | 
222 |     if target is None:
223 |         target = gpuarray.empty((N, m), dtype, allocator=memory_pool.allocate)
224 | 
225 |     copy = drv.Memcpy2D()
226 |     copy.set_src_device(mat.gpudata)
227 |     copy.src_x_in_bytes = start * itemsize
228 |     copy.set_dst_device(target.gpudata)
229 |     copy.src_pitch = M * itemsize
230 |     copy.dst_pitch = copy.width_in_bytes = m * itemsize
231 |     copy.height = N
232 |     copy(aligned=True)
233 | 
234 |     if input_3d:
235 |         assert not m % Z
236 |         target = target.reshape((N, m // Z, Z))
237 | 
238 |     return target
239 | 
240 | 
241 | def insert_columns(src, dst, offset):
242 |     dtype = src.dtype
243 |     itemsize = np.dtype(dtype).itemsize
244 |     if len(src.shape) == 2:
245 |         h_src, w_src = src.shape
246 |     elif len(src.shape) == 3:
247 |         h_src = src.shape[0]
248 |         w_src = np.prod(src.shape[1:])
249 |     h_dst, w_dst = dst.shape
250 | 
251 |     assert dst.dtype == dtype
252 |     assert h_src == h_dst
253 |     assert w_dst >= offset + w_src
254 | 
255 |     copy = drv.Memcpy2D()
256 |     copy.set_src_device(src.gpudata)
257 |     copy.set_dst_device(dst.gpudata)
258 |     copy.dst_x_in_bytes = offset * itemsize
259 |     copy.src_pitch = copy.width_in_bytes = w_src * itemsize
260 |     copy.dst_pitch = w_dst * itemsize
261 |     copy.height = h_src
262 |     copy(aligned=True)
263 | 
264 | def pad_array(mat, left=0, right=0, val=0., new_shape=None, stream=None):
265 |     assert mat.flags.c_contiguous
266 | 
267 |     is_chararray = False
268 |     if mat.dtype == '|S1':
269 |         is_chararray = True
270 |         mat.dtype = np.int8
271 |         if type(val) is str:
272 |             val = ord(val)
273 |     
274 |     if len(mat.shape) == 2:
275 |         height, width = mat.shape
276 |     elif len(mat.shape) > 2:
277 |         height = mat.shape[0]
278 |         width = np.prod(mat.shape[1:])
279 |         mat = mat.reshape((height, width))
280 |     else:
281 |         raise ValueError('Array must be at least two-dimensional.')
282 | 
283 |     padded_width = width + left + right
284 | 
285 |     padded_mat = gpuarray.empty((height, padded_width), dtype=mat.dtype,
286 |                                 allocator=memory_pool.allocate).fill(val)
287 | 
288 |     itemsize = np.dtype(padded_mat.dtype).itemsize
289 |     copy = drv.Memcpy2D()
290 |     copy.set_src_device(mat.gpudata)
291 |     copy.set_dst_device(padded_mat.gpudata)
292 |     copy.dst_x_in_bytes = left * itemsize
293 |     copy.src_pitch = copy.width_in_bytes = width * itemsize
294 |     copy.dst_pitch = padded_width * itemsize
295 |     copy.height = height
296 |     copy(stream)
297 | 
298 |     if new_shape is not None:
299 |         padded_mat = padded_mat.reshape(new_shape)
300 | 
301 |     if is_chararray:
302 |         mat.dtype = np.dtype('|S1')
303 |         padded_mat.dtype = np.dtype('|S1')
304 |         
305 |     return padded_mat
306 |     
307 | def rand_array(shape, dtype=np.float32, dist='uniform', stream=None):
308 |     mat = gpuarray.empty(shape, dtype, allocator=memory_pool.allocate)
309 |     if dist == 'uniform':
310 |         sampler.fill_uniform(mat, stream=stream)
311 |     elif dist == 'normal':
312 |         sampler.fill_normal(mat, stream=stream)
313 |     return mat
314 | 


--------------------------------------------------------------------------------
/hebel/pycuda_ops/reductions.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | import numpy as np
 18 | from pycuda import gpuarray
 19 | from . import linalg
 20 | from .. import memory_pool
 21 | 
 22 | max_column = None
 23 | max_row = None
 24 | def init():
 25 |     from pycuda.compiler import SourceModule
 26 | 
 27 |     global max_column
 28 |     global max_row
 29 | 
 30 |     code = """
 31 | #include "float.h"
 32 | 
 33 | __global__ void kMaxColumnwise(float* mat,
 34 |                                float* target,
 35 |                                unsigned int width,
 36 |                                unsigned int height) {
 37 |     __shared__ float max_vals[32];
 38 |     float cur_max = -FLT_MAX;
 39 |     float val = 0;
 40 | 
 41 |     for (unsigned int i = threadIdx.x; i < height; i += 32) {
 42 |         val = mat[blockIdx.x + i * width];
 43 | 
 44 |         if (val > cur_max)
 45 |             cur_max = val;
 46 |     }
 47 | 
 48 |     max_vals[threadIdx.x] = cur_max;
 49 | 
 50 |     __syncthreads();
 51 | 
 52 |     if (threadIdx.x == 0) {
 53 |         cur_max = -FLT_MAX;
 54 | 
 55 |         for (unsigned int i = 0; i < 32; i++)
 56 |             if (max_vals[i] > cur_max)
 57 |                 cur_max = max_vals[i];
 58 | 
 59 |         target[blockIdx.x] = cur_max;
 60 |     }
 61 |     // __syncthreads();
 62 | }
 63 | 
 64 | __global__ void kMaxRowwise(float* mat,
 65 |                             float* target,
 66 |                             unsigned int width,
 67 |                             unsigned int height) {
 68 |     __shared__ float max_vals[32];
 69 |     float cur_max = -FLT_MAX;
 70 |     float val = 0;
 71 | 
 72 |     for (unsigned int i = threadIdx.x; i < width; i += 32) {
 73 |         val = mat[blockIdx.x * width + i];
 74 | 
 75 |         if (val > cur_max)
 76 |             cur_max = val;
 77 |     }
 78 | 
 79 |     max_vals[threadIdx.x] = cur_max;
 80 | 
 81 |     __syncthreads();
 82 | 
 83 |     if (threadIdx.x == 0) {
 84 |         cur_max = -FLT_MAX;
 85 | 
 86 |         for (unsigned int i = 0; i < 32; i++)
 87 |             if (max_vals[i] > cur_max)
 88 |                 cur_max = max_vals[i];
 89 | 
 90 |         target[blockIdx.x] = cur_max;
 91 |     }
 92 |     // __syncthreads();
 93 | }
 94 | """
 95 | 
 96 |     mod = SourceModule(code)
 97 |     max_column = mod.get_function("kMaxColumnwise").prepare('PPII')
 98 |     max_row = mod.get_function("kMaxRowwise").prepare('PPII')
 99 | 
100 | 
101 | def max_by_axis(mat, axis=0):
102 |     assert mat.flags.c_contiguous
103 |     assert axis in (0, 1)
104 | 
105 |     n, m = mat.shape
106 | 
107 |     if axis == 0:
108 |         target = gpuarray.empty(m, dtype=np.float32)
109 |         max_column.prepared_call(
110 |             (m, 1, 1), (32, 1, 1),
111 |             mat.gpudata, target.gpudata,
112 |             np.int32(m), np.int32(n))
113 | 
114 |     elif axis == 1:
115 |         target = gpuarray.empty(n, dtype=np.float32)
116 |         max_row.prepared_call(
117 |             (n, 1, 1), (32, 1, 1),
118 |             mat.gpudata, target.gpudata,
119 |             np.int32(m), np.int32(n))
120 | 
121 |     return target
122 | 
123 | 
124 | def _matrix_sum_out_axis_wrapper():
125 |     one_vector_cache = {}
126 | 
127 |     def f(mat, axis=0, cache_one_vector=True, target=None):
128 |         assert mat.flags.c_contiguous
129 |         N, M = mat.shape
130 | 
131 |         if axis == 0:
132 |             vec_shape = (N, 1)
133 |             try:
134 |                 ones = one_vector_cache[vec_shape]
135 |             except KeyError:
136 |                 ones = gpuarray.empty(vec_shape, dtype=mat.dtype,
137 |                                       allocator=memory_pool.allocate).fill(1.)
138 |                 if cache_one_vector: one_vector_cache[vec_shape] = ones
139 | 
140 |             if target is None:
141 |                 target = gpuarray.empty((M,), mat.dtype, allocator=memory_pool.allocate)
142 | 
143 |             # if len(target.shape) == 1:
144 |                 # target = target.reshape((target.shape[0], 1))
145 |                 # target.shape = (target.shape[0], 1)
146 |             assert target.shape == (M,)
147 |             linalg.dot(mat, ones, transa='T', target=target)
148 |         elif axis == 1:
149 |             vec_shape = (M, 1)
150 |             try:
151 |                 ones = one_vector_cache[vec_shape]
152 |             except KeyError:
153 |                 ones = gpuarray.empty((M, 1), dtype=mat.dtype,
154 |                                       allocator=memory_pool.allocate).fill(1.)
155 |                 if cache_one_vector: one_vector_cache[vec_shape] = ones
156 | 
157 |             if target is None:
158 |                 target = gpuarray.empty((N,), mat.dtype, allocator=memory_pool.allocate)
159 | 
160 |             # if len(target.shape) == 1:
161 |             #     target = target.reshape((target.shape[0], 1))
162 |             assert target.shape == (N,)
163 |             linalg.dot(mat, ones, target=target)
164 |         else:
165 |             raise ValueError('axis must be 0 or 1')
166 | 
167 |         # target.shape = (target.shape[0], 1)
168 |         return target
169 |     return f
170 | matrix_sum_out_axis = _matrix_sum_out_axis_wrapper()
171 | 


--------------------------------------------------------------------------------
/hebel/pycuda_ops/softmax.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from . import eps
18 | from .. import memory_pool
19 | from .reductions import max_by_axis
20 | from .matrix import add_vec_to_mat
21 | from .reductions import matrix_sum_out_axis
22 | from .elementwise import nan_to_zeros
23 | from pycuda import cumath, gpuarray
24 | import numpy as np
25 | 
26 | def logsumexp(mat):
27 |     max_dim = max_by_axis(mat, 1)
28 |     tmp = add_vec_to_mat(mat, max_dim, 0, substract=True)
29 | 
30 |     tmp = cumath.exp(tmp)
31 |     
32 |     tmp = matrix_sum_out_axis(tmp, 1)
33 |     tmp = cumath.log(tmp)
34 |     max_dim += tmp
35 |     return max_dim
36 | 
37 | def softmax(mat):
38 |     tmp = gpuarray.empty_like(mat)
39 |     L = logsumexp(mat)
40 |     tmp = add_vec_to_mat(mat, L, substract=True)
41 |     tmp = cumath.exp(tmp)
42 |     return tmp
43 | 
44 | def cross_entropy(x, y):
45 |     loss = y * cumath.log(x + eps)
46 |     nan_to_zeros(loss, loss)
47 |     loss = -gpuarray.sum(loss)
48 |     return loss
49 | 
50 | def cross_entropy_logistic(x, y):
51 |     loss = y * cumath.log(x + eps) + (1. - y) * cumath.log(1. - x + eps)
52 |     loss = -gpuarray.sum(loss)
53 |     return loss


--------------------------------------------------------------------------------
/hebel/pycuda_ops/utils.py:
--------------------------------------------------------------------------------
  1 | # This file is taken from scikits.cuda (https://github.com/lebedov/scikits.cuda)
  2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved.
  3 | 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | 
  8 | # Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | # Redistributions in binary form must reproduce the above copyright
 11 | # notice, this list of conditions and the following disclaimer in the
 12 | # documentation and/or other materials provided with the distribution.
 13 | # Neither the name of Lev Givon nor the names of any contributors may
 14 | # be used to endorse or promote products derived from this software
 15 | # without specific prior written permission.  THIS SOFTWARE IS
 16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 27 | # SUCH DAMAGE.
 28 | 
 29 | #!/usr/bin/env python
 30 | 
 31 | """
 32 | Utility functions.
 33 | """
 34 | 
 35 | import ctypes
 36 | import re
 37 | import subprocess
 38 | 
 39 | try:
 40 |     import elftools
 41 | except ImportError:
 42 |     import re
 43 | 
 44 |     def get_soname(filename):
 45 |         """
 46 |         Retrieve SONAME of shared library.
 47 | 
 48 |         Parameters
 49 |         ----------
 50 |         filename : str
 51 |             Full path to shared library.
 52 | 
 53 |         Returns
 54 |         -------
 55 |         soname : str
 56 |             SONAME of shared library.
 57 | 
 58 |         Notes
 59 |         -----
 60 |         This function uses the `objdump` system command.
 61 |         
 62 |         """
 63 |         
 64 |         try:
 65 |             p = subprocess.Popen(['objdump', '-p', filename],
 66 |                                  stdout=subprocess.PIPE)
 67 |             out = p.communicate()[0]
 68 |         except:
 69 |             raise RuntimeError('error executing objdump')
 70 |         else:
 71 |             result = re.search('^\s+SONAME\s+(.+)$',out,re.MULTILINE)
 72 |             if result:
 73 |                 return result.group(1)
 74 |             else:
 75 | 
 76 |                 # No SONAME found:
 77 |                 return ''
 78 | 
 79 | else:
 80 |     import ctypes
 81 |     import elftools.elf.elffile as elffile
 82 |     import elftools.construct.macros as macros
 83 |     import elftools.elf.structs as structs
 84 | 
 85 |     def get_soname(filename):
 86 |         """
 87 |         Retrieve SONAME of shared library.
 88 | 
 89 |         Parameters
 90 |         ----------
 91 |         filename : str
 92 |             Full path to shared library.
 93 | 
 94 |         Returns
 95 |         -------
 96 |         soname : str
 97 |             SONAME of shared library.
 98 | 
 99 |         Notes
100 |         -----
101 |         This function uses the pyelftools [ELF] package.
102 | 
103 |         References
104 |         ----------
105 |         .. [ELF] http://pypi.python.org/pypi/pyelftools
106 |         
107 |         """
108 | 
109 |         stream = open(filename, 'rb')
110 |         f = elffile.ELFFile(stream)
111 |         dynamic = f.get_section_by_name('.dynamic')
112 |         dynstr = f.get_section_by_name('.dynstr')
113 | 
114 |         # Handle libraries built for different machine architectures:         
115 |         if f.header['e_machine'] == 'EM_X86_64':
116 |             st = structs.Struct('Elf64_Dyn',
117 |                                 macros.ULInt64('d_tag'),
118 |                                 macros.ULInt64('d_val'))
119 |         elif f.header['e_machine'] == 'EM_386':
120 |             st = structs.Struct('Elf32_Dyn',
121 |                                 macros.ULInt32('d_tag'),
122 |                                 macros.ULInt32('d_val'))
123 |         else:
124 |             raise RuntimeError('unsupported machine architecture')
125 | 
126 |         entsize = dynamic['sh_entsize']
127 |         for k in xrange(dynamic['sh_size']/entsize):
128 |             result = st.parse(dynamic.data()[k*entsize:(k+1)*entsize])
129 | 
130 |             # The following value for the SONAME tag is specified in elf.h:  
131 |             if result.d_tag == 14:
132 |                 return dynstr.get_string(result.d_val)
133 | 
134 |         # No SONAME found:
135 |         return ''
136 | 


--------------------------------------------------------------------------------
/hebel/schedulers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | """ A bunch of different schedulers to scale learning
18 | parameters. These are used e.g. to slowly reduce the learning rate
19 | during training or scale momentum up and down during the early and
20 | late phases of training.
21 | """
22 | 
23 | 
24 | def constant_scheduler(value):
25 |     while True:
26 |         yield value
27 | 
28 | 
29 | def exponential_scheduler(init_value, decay):
30 |     """ Decreases exponentially """
31 | 
32 |     value = init_value
33 |     while True:
34 |         yield value
35 |         value *= decay
36 | 
37 | 
38 | def linear_scheduler_up(init_value, target_value, duration):
39 |     """ Increases linearly and then stays flat """
40 | 
41 |     value = init_value
42 |     t = 0
43 |     while True:
44 |         yield value
45 |         t += 1
46 |         if t < duration:
47 |             value = init_value + t * (target_value - init_value) / duration
48 |         else:
49 |             value = target_value
50 | 
51 | 
52 | def linear_scheduler_up_down(init_value, target_value, final_value,
53 |                              duration_up, t_decrease, duration_down):
54 |     """ Increases linearly to target_value, stays at target_value until
55 |     t_decrease and then decreases linearly
56 |     """
57 | 
58 |     value = init_value
59 |     t = 0
60 | 
61 |     while True:
62 |         yield value
63 |         t += 1
64 |         if t < duration_up:
65 |             value = init_value + t * (target_value - init_value) / \
66 |                     float(duration_up)
67 |         elif t > t_decrease:
68 |             value = target_value - (t - t_decrease) * \
69 |                     (target_value - final_value) / \
70 |                     float(duration_down)
71 |         else:
72 |             value = target_value
73 | 


--------------------------------------------------------------------------------
/hebel/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 


--------------------------------------------------------------------------------
/hebel/utils/call_check.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | """
 18 | Utility functions for checking passed arguments against call signature
 19 | of a function or class constructor.
 20 | """
 21 | import functools
 22 | import inspect
 23 | import types
 24 | from .string_utils import match
 25 | 
 26 | def check_call_arguments(to_call, kwargs):
 27 |     """
 28 |     Check the call signature against a dictionary of proposed arguments,
 29 |     raising an informative exception in the case of mismatch.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     to_call : class or callable
 34 |         Function or class to examine (in the case of classes, the
 35 |         constructor call signature is analyzed)
 36 |     kwargs : dict
 37 |         Dictionary mapping parameter names (including positional
 38 |         arguments) to proposed values.
 39 |     """
 40 |     if 'self' in kwargs.keys():
 41 |         raise TypeError("Your dictionary includes an entry for 'self', "
 42 |                         "which is just asking for trouble")
 43 | 
 44 |     orig_to_call = getattr(to_call, '__name__', str(to_call))
 45 |     if not isinstance(to_call, types.FunctionType):
 46 |         if hasattr(to_call, '__init__'):
 47 |             to_call = to_call.__init__
 48 |         elif hasattr(to_call, '__call__'):
 49 |             to_call = to_call.__call__
 50 | 
 51 |     args, varargs, keywords, defaults = inspect.getargspec(to_call)
 52 | 
 53 |     if any(not isinstance(arg, str) for arg in args):
 54 |         raise TypeError('%s uses argument unpacking, which is deprecated and '
 55 |                         'unsupported by this pylearn2' % orig_to_call)
 56 | 
 57 |     if varargs is not None:
 58 |         raise TypeError('%s has a variable length argument list, but '
 59 |                         'this is not supported by config resolution' %
 60 |                         orig_to_call)
 61 | 
 62 |     if keywords is None:
 63 |         bad_keywords = [arg_name for arg_name in kwargs.keys()
 64 |                         if arg_name not in args]
 65 | 
 66 |         if len(bad_keywords) > 0:
 67 |             bad = ', '.join(bad_keywords)
 68 |             args = [ arg for arg in args if arg != 'self' ]
 69 |             if len(args) == 0:
 70 |                 matched_str = '(It does not support any keywords, actually)'
 71 |             else:
 72 |                 matched = [ match(keyword, args) for keyword in bad_keywords ]
 73 |                 matched_str = 'Did you mean %s?' % (', '.join(matched))
 74 |             raise TypeError('%s does not support the following '
 75 |                             'keywords: %s. %s' %
 76 |                             (orig_to_call, bad, matched_str))
 77 | 
 78 |     if defaults is None:
 79 |         num_defaults = 0
 80 |     else:
 81 |         num_defaults = len(defaults)
 82 | 
 83 |     required = args[:len(args) - num_defaults]
 84 |     missing = [arg for arg in required if arg not in kwargs]
 85 | 
 86 |     if len(missing) > 0:
 87 |         #iff the im_self (or __self__) field is present, this is a
 88 |         # bound method, which has 'self' listed as an argument, but
 89 |         # which should not be supplied by kwargs
 90 |         is_bound = hasattr(to_call, 'im_self') or hasattr(to_call, '__self__')
 91 |         if len(missing) > 1 or missing[0] != 'self' or not is_bound:
 92 |             if 'self' in missing:
 93 |                 missing.remove('self')
 94 |             missing = ', '.join([str(m) for m in missing])
 95 |             raise TypeError('%s did not get these expected '
 96 |                             'arguments: %s' % (orig_to_call, missing))
 97 | 
 98 | def checked_call(to_call, kwargs):
 99 |     """
100 |     Attempt calling a function or instantiating a class with a given set of
101 |     arguments, raising a more helpful exception in the case of argument
102 |     mismatch.
103 | 
104 |     Parameters
105 |     ----------
106 |     to_call : class or callable
107 |         Function or class to examine (in the case of classes, the
108 |         constructor call signature is analyzed)
109 |     kwargs : dict
110 |         Dictionary mapping parameter names (including positional
111 |         arguments) to proposed values.
112 |     """
113 |     try:
114 |         return to_call(**kwargs)
115 |     except TypeError:
116 |         check_call_arguments(to_call, kwargs)
117 |         raise
118 | 
119 | def sensible_argument_errors(func):
120 |     @functools.wraps(func)
121 |     def wrapped_func(*args, **kwargs):
122 |         try:
123 |             func(*args, **kwargs)
124 |         except TypeError:
125 |             argnames, varargs, keywords, defaults = inspect.getargspec(func)
126 |             posargs = dict(zip(argnames, args))
127 |             bad_keywords = []
128 |             for keyword in kwargs:
129 |                 if keyword not in argnames:
130 |                     bad_keywords.append(keyword)
131 | 
132 |             if len(bad_keywords) > 0:
133 |                 bad = ', '.join(bad_keywords)
134 |                 raise TypeError('%s() does not support the following '
135 |                                 'keywords: %s' % (str(func.func_name), bad))
136 |             allargsgot = set(list(kwargs.keys()) + list(posargs.keys()))
137 |             numrequired = len(argnames) - len(defaults)
138 |             diff = list(set(argnames[:numrequired]) - allargsgot)
139 |             if len(diff) > 0:
140 |                 raise TypeError('%s() did not get required args: %s' %
141 |                                 (str(func.func_name), ', '.join(diff)))
142 |             raise
143 |     return wrapped_func
144 | 
145 | 


--------------------------------------------------------------------------------
/hebel/utils/environ.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | #Utilities for working with environment variables
18 | import os
19 | 
20 | def putenv(key, value):
21 |     #this makes the change visible to other parts of the code
22 |     #in this same process
23 |     os.environ[key] = value
24 |     # this makes it available to any subprocesses we launch
25 |     os.putenv(key, value)
26 | 
27 | 


--------------------------------------------------------------------------------
/hebel/utils/exc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | __author__ = "Ian Goodfellow"
18 | """
19 | Exceptions related to datasets
20 | """
21 | 
22 | class EnvironmentVariableError(Exception):
23 |     """ An exception raised when a required environment variable is not defined """
24 | 
25 |     def __init__(self, *args):
26 |         super(EnvironmentVariableError,self).__init__(*args)
27 | 
28 | class NoDataPathError(EnvironmentVariableError):
29 |     """
30 |     Exception raised when PYLEARN2_DATA_PATH is required but has not been
31 |     defined.
32 |     """
33 |     def __init__(self):
34 |         super(NoDataPathError, self).__init__(data_path_essay)
35 | 
36 | data_path_essay = """\
37 | You need to define your PYLEARN2_DATA_PATH environment variable. If you are
38 | using a computer at LISA, this should be set to /data/lisa/data.
39 | """
40 | 
41 | class NotInstalledError(Exception):
42 |     """
43 |     Exception raised when a dataset appears not to be installed.
44 |     This is different from an individual file missing within a dataset,
45 |     the file not loading correctly, etc.
46 |     This exception is used to make unit tests skip testing of datasets
47 |     that haven't been installed.
48 |     We do want the unit test to run and crash if the dataset is installed
49 |     incorrectly.
50 |     """
51 | 


--------------------------------------------------------------------------------
/hebel/utils/math.py:
--------------------------------------------------------------------------------
1 | ceil_div = lambda x, y: int((x + y - 1) / y)
2 | div_up = lambda x, y: y * ceil_div(x, y)


--------------------------------------------------------------------------------
/hebel/utils/plotting.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013  Hannes Bretschneider
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | 
13 | # You should have received a copy of the GNU General Public License along
14 | # with this program; if not, write to the Free Software Foundation, Inc.,
15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 | 
17 | from pycuda import gpuarray
18 | import numpy as np
19 | from math import ceil
20 | 
21 | def show_filters(W, img_dims, columns=10, normalize=True, **kwargs):
22 |     import matplotlib.pyplot as plt
23 |     if isinstance(W, gpuarray.GPUArray): W = W.get()
24 | 
25 |     D, N = W.shape
26 | 
27 |     if normalize:
28 |         W = W - W.min() #[np.newaxis,:]
29 |         W = W / W.max() #[np.newaxis,:]
30 | 
31 |     rows = int(ceil(N / columns))
32 |         
33 |     fig = plt.figure(1, **kwargs)
34 |     plt.subplots_adjust(left=0., right=.51, wspace=.1, hspace=.01)    
35 | 
36 |     filters = np.rollaxis(W.reshape(img_dims + (N,)), 2)
37 |     filters = np.vstack([np.hstack(filters[i:i+columns]) for i in range(0, N, columns)])
38 |     plt.axis('off')
39 |     plt.imshow(filters, cmap=plt.cm.gray, interpolation='nearest', figure=fig)
40 | 


--------------------------------------------------------------------------------
/hebel/utils/string_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | """ Utilities for modifying strings"""
 18 | 
 19 | import os
 20 | import warnings
 21 | import re
 22 | import functools
 23 | 
 24 | from .exc import EnvironmentVariableError, NoDataPathError
 25 | 
 26 | def preprocess(string):
 27 |     """
 28 |     Preprocesses a string, by replacing ${VARNAME} with
 29 |     os.environ['VARNAME']
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     string: the str object to preprocess
 34 | 
 35 |     Returns
 36 |     -------
 37 |     the preprocessed string
 38 |     """
 39 | 
 40 |     split = string.split('${')
 41 | 
 42 |     rval = [split[0]]
 43 | 
 44 |     for candidate in split[1:]:
 45 |         subsplit = candidate.split('}')
 46 | 
 47 |         if len(subsplit) < 2:
 48 |             raise ValueError('Open ${ not followed by } before ' \
 49 |                     + 'end of string or next ${ in "' \
 50 |                     + string + '"')
 51 | 
 52 |         varname = subsplit[0]
 53 | 
 54 |         if varname == 'PYLEARN2_TRAIN_FILE_NAME':
 55 |             warnings.warn("PYLEARN2_TRAIN_FILE_NAME is deprecated and may be "
 56 |                     "removed from the library on or after Oct 22, 2013. Switch"
 57 |                     " to PYLEARN2_TRAIN_FILE_FULL_STEM")
 58 | 
 59 |         try:
 60 |             val = os.environ[varname]
 61 |         except KeyError:
 62 |             if varname == 'PYLEARN2_DATA_PATH':
 63 |                 raise NoDataPathError()
 64 |             if varname == 'PYLEARN2_VIEWER_COMMAND':
 65 |                 raise EnvironmentVariableError(environment_variable_essay)
 66 | 
 67 |             raise ValueError('Unrecognized environment variable "' + varname
 68 |                     + '". Did you mean ' + match(varname, os.environ.keys())
 69 |                     + '?')
 70 | 
 71 |         rval.append(val)
 72 | 
 73 |         rval.append('}'.join(subsplit[1:]))
 74 | 
 75 |     rval = ''.join(rval)
 76 | 
 77 |     return rval
 78 | 
 79 | 
 80 | 
 81 | 
 82 | def find_number(s):
 83 |     """ s is a string
 84 |         returns None if there are no numbers in the string
 85 |         otherwise returns the range of characters occupied by the first
 86 |         number in the string """
 87 | 
 88 |     r = re.search('-?\d+[.e]?\d*',s)
 89 |     if r is not None:
 90 |         return r.span(0)
 91 |     return None
 92 | 
 93 | def tokenize_by_number(s):
 94 |     """ splits a string into a list of tokens
 95 |         each is either a string containing no numbers
 96 |         or a float """
 97 | 
 98 |     r = find_number(s)
 99 | 
100 |     if r == None:
101 |         return [ s ]
102 |     else:
103 |         tokens = []
104 |         if r[0] > 0:
105 |             tokens.append(s[0:r[0]])
106 |         tokens.append( float(s[r[0]:r[1]]) )
107 |         if r[1] < len(s):
108 |             tokens.extend(tokenize_by_number(s[r[1]:]))
109 |         return tokens
110 |     assert False #line should be unreached
111 | 
112 | 
113 | def number_aware_alphabetical_cmp(str1, str2):
114 |     """ cmp function for sorting a list of strings by alphabetical order, but with
115 |         numbers sorted numerically.
116 | 
117 |         i.e., foo1, foo2, foo10, foo11
118 |         instead of foo1, foo10
119 |     """
120 | 
121 |     def flatten_tokens(tokens):
122 |         l = []
123 |         for token in tokens:
124 |             if isinstance(token, str):
125 |                 for char in token:
126 |                     l.append(char)
127 |             else:
128 |                 assert isinstance(token, float)
129 |                 l.append(token)
130 |         return l
131 | 
132 |     seq1 = flatten_tokens(tokenize_by_number(str1))
133 |     seq2 = flatten_tokens(tokenize_by_number(str2))
134 | 
135 |     l = min(len(seq1),len(seq2))
136 | 
137 |     i = 0
138 | 
139 |     while i < l:
140 |         if seq1[i] < seq2[i]:
141 |             return -1
142 |         elif seq1[i] > seq2[i]:
143 |             return 1
144 |         i += 1
145 | 
146 |     if len(seq1) < len(seq2):
147 |         return -1
148 |     elif len(seq1) > len(seq2):
149 |         return 1
150 | 
151 |     return 0
152 | 
153 | def match(wrong, candidates):
154 |     """
155 |         wrong: a mispelling
156 |         candidates: a set of correct words
157 | 
158 |         returns a guess of which candidate is the right one
159 | 
160 |         This should be used with a small number of candidates and a high potential
161 |         edit distance.
162 |         ie, use it to correct a wrong filename in a directory, wrong class name
163 |         in a module, etc. Don't use it to correct small typos of freeform natural
164 |         language words.
165 |     """
166 | 
167 |     assert len(candidates) > 0
168 | 
169 |     # Current implementation tries all candidates and outputs the one
170 |     # with the min score
171 |     # Could try to do something smarter
172 | 
173 |     def score(w1,w2):
174 |         # Current implementation returns negative dot product of
175 |         # the two words mapped into a feature space by mapping phi
176 |         # w -> [ phi(w1), .1 phi(first letter of w), .1 phi(last letter of w) ]
177 |         # Could try to do something smarter
178 | 
179 |         w1 = w1.lower()
180 |         w2 = w2.lower()
181 | 
182 |         def phi(w):
183 |             # Current feature mapping is to the vector of counts of
184 |             # all letters and two-letter sequences
185 |             # Could try to do something smarter
186 |             rval = {}
187 | 
188 |             for i in xrange(len(w)):
189 |                 l = w[i]
190 |                 rval[l] = rval.get(l,0.) + 1.
191 |                 if i < len(w)-1:
192 |                     b = w[i:i+2]
193 |                     rval[b] = rval.get(b,0.) + 1.
194 | 
195 |             return rval
196 | 
197 |         d1 = phi(w1)
198 |         d2 = phi(w2)
199 | 
200 |         def mul(d1, d2):
201 |             rval = 0
202 | 
203 |             for key in set(d1).union(d2):
204 |                 rval += d1.get(key,0) * d2.get(key,0)
205 | 
206 |             return rval
207 | 
208 |         tot_score = mul(phi(w1),phi(w2)) / float(len(w1)*len(w2)) + \
209 |             0.1 * mul(phi(w1[0:1]), phi(w2[0:1])) + \
210 |             0.1 * mul(phi(w1[-1:]), phi(w2[-1:]))
211 | 
212 |         return  tot_score
213 | 
214 |     scored_candidates = [ (-score(wrong, candidate), candidate)
215 |             for candidate in candidates ]
216 | 
217 |     scored_candidates.sort()
218 | 
219 |     return scored_candidates[0][1]
220 | 
221 | def censor_non_alphanum(s):
222 |     """
223 |     Returns s with all non-alphanumeric characters replaced with *
224 |     """
225 | 
226 |     def censor(ch):
227 |         if (ch >= 'A' and ch <= 'z') or (ch >= '0' and ch <= '9'):
228 |             return ch
229 |         return '*'
230 | 
231 |     return ''.join([censor(ch) for ch in s])
232 | 
233 | environment_variable_essay = """
234 | PYLEARN2_VIEWER_COMMAND not defined. PLEASE READ THE FOLLOWING MESSAGE CAREFULLY
235 | TO SET UP THIS ENVIRONMENT VARIABLE:
236 | 
237 | pylearn2 uses an external program to display images. Because different systems have different
238 | image programs available, pylearn2 requires the user to specify what image viewer program to
239 | use.
240 | 
241 | You need to choose an image viewer program that pylearn2 should use. Then tell pylearn2 to use
242 | that image viewer program by defining your PYLEARN2_VIEWER_COMMAND environment variable.
243 | 
244 | You need to choose PYLEARN_VIEWER_COMMAND such that running
245 | 
246 | ${PYLEARN2_VIEWER_COMMAND} image.png
247 | 
248 | in a command prompt on your machine will do the following:
249 |     -open an image viewer in a new process.
250 |     -not return until you have closed the image.
251 | 
252 | Acceptable commands include:
253 |     gwenview
254 |     eog --new-instance
255 | 
256 | This is assuming that you have gwenview or a version of eog that supports --new-instance
257 | installed on your machine. If you don't, install one of those, or figure out a command
258 | that has the above properties that is available from your setup.
259 | 
260 | On most linux setups, you can define your environment variable by adding this line to your
261 | ~/.bashrc file:
262 | 
263 | export PYLEARN2_VIEWER_COMMAND="eog --new-instance"
264 | 
265 | *** YOU MUST INCLUDE THE WORD "export". DO NOT JUST ASSIGN TO THE ENVIRONMENT VARIABLE ***
266 | If you do not include the word "export", the environment variable will be set in your
267 | bash shell, but will not be visible to processes that you launch from it, like the python
268 | interpreter.
269 | 
270 | Don't forget that changes from your .bashrc file won't apply until you run
271 | 
272 | source ~/.bashrc
273 | 
274 | or open a new terminal window. If you're seeing this from an ipython notebook
275 | you'll need to restart the ipython notebook, or maybe modify os.environ from
276 | an ipython cell.
277 | """
278 | 


--------------------------------------------------------------------------------
/hebel/version.py:
--------------------------------------------------------------------------------
1 | version = '0.03-dev'
2 | release = '0.02'


--------------------------------------------------------------------------------
/hebel_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2013  Hannes Bretschneider
  2 | 
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | 
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | 
 13 | # You should have received a copy of the GNU General Public License along
 14 | # with this program; if not, write to the Free Software Foundation, Inc.,
 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 16 | 
 17 | import hebel
 18 | hebel.init(0)
 19 | 
 20 | import unittest
 21 | import random
 22 | import numpy as np
 23 | from pycuda import gpuarray
 24 | from pycuda.curandom import rand as curand
 25 | from hebel import sampler, memory_pool
 26 | from hebel.models import NeuralNet, NeuralNetRegression
 27 | from hebel.optimizers import SGD
 28 | from hebel.parameter_updaters import SimpleSGDUpdate, \
 29 |     MomentumUpdate, NesterovMomentumUpdate
 30 | from hebel.data_providers import MNISTDataProvider, BatchDataProvider
 31 | from hebel.monitors import SimpleProgressMonitor
 32 | from hebel.schedulers import exponential_scheduler, linear_scheduler_up, \
 33 |     constant_scheduler
 34 | from hebel.pycuda_ops.matrix import extract_columns, insert_columns
 35 | from hebel.pycuda_ops.elementwise import sample_dropout_mask
 36 | 
 37 | 
 38 | class TestNeuralNetMNIST(unittest.TestCase):
 39 |     def setUp(self):
 40 |         self.train_data = MNISTDataProvider('train', 100)
 41 |         self.test_data = MNISTDataProvider('test')
 42 |         self.D = self.train_data.D
 43 |         self.n_out = 10
 44 | 
 45 |     def test_relu(self):
 46 |         model = NeuralNet(n_in=self.D, n_out=self.n_out,
 47 |                           layers=[1000], activation_function='relu',
 48 |                           dropout=True)
 49 |         optimizer = SGD(model, SimpleSGDUpdate, self.train_data,
 50 |                         self.test_data,
 51 |                         learning_rate_schedule=exponential_scheduler(1., .99),
 52 |                         progress_monitor=SimpleProgressMonitor())
 53 |         optimizer.run(20)
 54 |         self.assertLess(optimizer.progress_monitor.train_error[-1][1],
 55 |                         optimizer.progress_monitor.train_error[0][1])
 56 |         del model, optimizer
 57 | 
 58 |     def test_momentum(self):
 59 |         model = NeuralNet(n_in=self.D, n_out=self.n_out,
 60 |                           layers=[1000], activation_function='relu',
 61 |                           dropout=True)
 62 |         optimizer = SGD(model, MomentumUpdate, self.train_data,
 63 |                         self.test_data,
 64 |                         learning_rate_schedule=exponential_scheduler(1., .99),
 65 |                         momentum_schedule=linear_scheduler_up(.5, .9, 5),
 66 |                         progress_monitor=SimpleProgressMonitor())
 67 |         optimizer.run(20)
 68 |         self.assertLess(optimizer.progress_monitor.train_error[-1][1],
 69 |                         optimizer.progress_monitor.train_error[0][1])
 70 |         del model, optimizer
 71 | 
 72 |     def test_nesterov_momentum(self):
 73 |         model = NeuralNet(n_in=self.D, n_out=self.n_out,
 74 |                           layers=[100], activation_function='relu',
 75 |                           dropout=True)
 76 |         optimizer = SGD(model, NesterovMomentumUpdate, self.train_data,
 77 |                         self.test_data,
 78 |                         learning_rate_schedule=exponential_scheduler(1., .99),
 79 |                         momentum_schedule=linear_scheduler_up(.5, .9, 5),
 80 |                         progress_monitor=SimpleProgressMonitor())
 81 |         optimizer.run(20)
 82 |         self.assertLess(optimizer.progress_monitor.train_error[-1][1],
 83 |                         optimizer.progress_monitor.train_error[0][1])
 84 |         del model, optimizer
 85 | 
 86 | 
 87 | class TestColumnSlicing(unittest.TestCase):
 88 |     def test_extract_columns(self):
 89 |         for _ in range(20):
 90 |             dtype = random.choice((np.float32, np.float64))
 91 |             N = np.random.randint(100, 1000)
 92 |             M = np.random.randint(100, 1000)
 93 |             a = np.random.randint(0, M)
 94 |             b = np.random.randint(a + 1, M)
 95 |             m = b - a
 96 |             assert m > 0
 97 | 
 98 |             X = curand((N, M), dtype)
 99 |             Y = extract_columns(X, a, b)
100 | 
101 |             self.assertTrue(np.all(X.get()[:, a:b] == Y.get()))
102 | 
103 |     def test_insert_columns(self):
104 |         for _ in range(20):
105 |             dtype = random.choice((np.float32, np.float64))
106 |             N = np.random.randint(100, 1000)
107 |             M = np.random.randint(100, 1000)
108 |             m = np.random.randint(1, M)
109 |             offset = np.random.randint(0, M - m)
110 | 
111 |             X = curand((N, M), dtype)
112 |             Y = curand((N, m), dtype)
113 |             insert_columns(Y, X, offset)
114 | 
115 |             self.assertTrue(np.all(X.get()[:, offset:offset+m] == Y.get()))
116 | 
117 | 
118 | class TestSampleDropoutMask(unittest.TestCase):
119 |     TOL = 1e-3
120 | 
121 |     def test_sample_dropout_mask(self):
122 |         for _ in range(20):
123 |             height = 1000
124 |             width = 10000
125 |             dropout_prob = np.random.rand()
126 |             X = sampler.gen_uniform((height, width), np.float32)
127 |             dropout_mask = sample_dropout_mask(X, dropout_prob)
128 |             dropout_rate = 1. - dropout_mask.get().mean()
129 | 
130 |             self.assertLess(np.abs(dropout_prob - dropout_rate), self.TOL)
131 |             self.assertTrue(np.all((X.get() != 0.) == dropout_mask.get()))
132 | 
133 |     def test_sample_dropout_mask_columns(self):
134 |         for _ in range(20):
135 |             height = 10000
136 |             width = 10000
137 |             dropout_prob = np.random.rand()
138 |             X = sampler.gen_uniform((height, width), np.float32)
139 | 
140 |             start = np.random.randint(0, width - 1000)
141 |             end = start + 1000
142 |             columns = (start, end)
143 | 
144 |             dropout_mask = sample_dropout_mask(X, dropout_prob, columns)
145 |             dropout_rate = 1. - dropout_mask.get().mean()
146 | 
147 |             self.assertEqual(dropout_mask.shape, (X.shape[0], end - start))
148 |             self.assertLess(np.abs(dropout_prob - dropout_rate),
149 |                             self.TOL)
150 |             self.assertTrue(np.all((X.get()[:, start:end] != 0.)
151 |                                    == dropout_mask.get()))
152 | 
153 | class TestNeuralNetRegression(unittest.TestCase):
154 |     def test_neural_net_regression(self):
155 |         for _ in range(20):
156 |             N = 10000    # Number of data points
157 |             D = 100      # Dimensionality of exogenous data
158 |             P = 50       # Dimensionality of endogenous data
159 | 
160 |             W_true = 10 * np.random.rand(D, P) - 5
161 |             b_true = 100 * np.random.rand(P) - 50
162 | 
163 |             X = np.random.randn(N, D)
164 |             Y = np.dot(X, W_true) + b_true[np.newaxis, :] + np.random.randn(N, P)        
165 | 
166 |             W_lstsq = np.linalg.lstsq(np.c_[np.ones((N, 1)), X], Y)[0]
167 |             b_lstsq = W_lstsq[0]
168 |             W_lstsq = W_lstsq[1:]
169 | 
170 |             data_provider = BatchDataProvider(gpuarray.to_gpu(X.astype(np.float32),
171 |                                                               allocator=memory_pool.allocate),
172 |                                               gpuarray.to_gpu(Y.astype(np.float32),
173 |                                                               allocator=memory_pool.allocate))
174 | 
175 |             model = NeuralNetRegression([], n_in=D, n_out=P)
176 |             optimizer = SGD(model, SimpleSGDUpdate, 
177 |                             data_provider, data_provider,
178 |                             learning_rate_schedule=constant_scheduler(10.),
179 |                             early_stopping=True)
180 |             optimizer.run(100)
181 | 
182 |             self.assertLess(np.abs(W_lstsq - model.top_layer.W.get()).max(),
183 |                             1e-5)
184 |         
185 | if __name__ == '__main__':
186 |     unittest.main()
187 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from hebel.version import version
 3 | 
 4 | try:
 5 |     from pypandoc import convert
 6 |     read_md = lambda f: convert(f, 'rst')
 7 | except ImportError:
 8 |     print("warning: pypandoc module not found, could not convert Markdown to RST")
 9 |     read_md = lambda f: open(f, 'r').read()
10 | 
11 | setup( name='Hebel',
12 |        version=version,
13 |        description='GPU-Accelerated '
14 |        'Deep Learning Library in Python',
15 |        long_description=read_md('README.md'),
16 |        keywords='cuda gpu machine-learning deep-learning neural-networks',
17 |        classifiers=[
18 |            'Development Status :: 3 - Alpha',
19 |            'Intended Audience :: Science/Research',
20 |            'License :: OSI Approved :: GNU General Public License v2 (GPLv2)',
21 |            'Programming Language :: C',
22 |            'Programming Language :: Python :: 2.7',
23 |            'Topic :: Scientific/Engineering :: Artificial Intelligence',
24 |            'Topic :: Scientific/Engineering :: Image Recognition'
25 |        ],
26 |        url='https://github.com/hannes-brt/hebel',
27 |        author='Hannes Bretschneider',
28 |        author_email='hannes@psi.utoronto.ca',
29 |        license='GPLv2',
30 |        packages=['hebel',
31 |                  'hebel.models',
32 |                  'hebel.layers',
33 |                  'hebel.utils',
34 |                  'hebel.pycuda_ops'],
35 |        install_requires=[
36 |            'pycuda',
37 |            'numpy',
38 |            'pyyaml',
39 |            'skdata'
40 |        ],
41 |        test_suite='nose.collector',
42 |        tests_require=['nose'],
43 |        scripts=['train_model.py'],
44 |        include_package_data=True,
45 |        zip_safe=False
46 | )
47 | 


--------------------------------------------------------------------------------
/train_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (C) 2013  Hannes Bretschneider
 3 | 
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 2 of the License, or
 7 | # (at your option) any later version.
 8 | 
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | 
14 | # You should have received a copy of the GNU General Public License along
15 | # with this program; if not, write to the Free Software Foundation, Inc.,
16 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 | 
18 | from hebel.config import run_from_config
19 | 
20 | description = """ Run this script with a yaml configuration file as input.
21 | E.g.:
22 | 
23 | python train_model.py examples/mnist_neural_net_deep.yml
24 | 
25 | """
26 | 
27 | if __name__ == "__main__":
28 |     import argparse
29 | 
30 |     parser = argparse.ArgumentParser(description=description)
31 |     parser.add_argument('config_file')
32 |     args = parser.parse_args()
33 | 
34 |     if not args.config_file.endswith('.yml') and not args.config_file.endswith('.yaml'):
35 |         args.config_file = args.config_file + '.yml'
36 | 
37 |     yaml_src = ''.join(open(args.config_file).readlines())
38 | 
39 |     run_from_config(yaml_src)
40 | 


--------------------------------------------------------------------------------