├── .gitignore ├── CHANGES.md ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── conf.py ├── data_providers.rst ├── getting_started.rst ├── index.rst ├── initialization.rst ├── installation.rst ├── introduction.rst ├── layers.rst ├── make.bat ├── models.rst ├── monitors.rst ├── optimizers.rst ├── parameter_updaters.rst ├── requirements_sphinx.txt └── schedulers.rst ├── examples ├── mnist_neural_net_deep.yml ├── mnist_neural_net_deep_script.py ├── mnist_neural_net_shallow.yml └── neural_net_regression_example.py ├── hebel ├── __init__.py ├── config.py ├── cross_validation.py ├── data_providers.py ├── layers │ ├── __init__.py │ ├── column.py │ ├── dummy_layer.py │ ├── flattening_layer.py │ ├── hidden_layer.py │ ├── input_dropout.py │ ├── linear_regression_layer.py │ ├── logistic_layer.py │ ├── multi_column_layer.py │ ├── multitask_top_layer.py │ ├── softmax_layer.py │ └── top_layer.py ├── models │ ├── __init__.py │ ├── logistic_regression.py │ ├── model.py │ ├── multitask_neural_net.py │ ├── neural_net.py │ └── neural_net_regression.py ├── monitors.py ├── optimizers.py ├── parameter_updaters.py ├── pycuda_ops │ ├── __init__.py │ ├── cublas.py │ ├── cuda.py │ ├── cudadrv.py │ ├── cudart.py │ ├── elementwise.py │ ├── linalg.py │ ├── matrix.py │ ├── reductions.py │ ├── softmax.py │ └── utils.py ├── schedulers.py ├── utils │ ├── __init__.py │ ├── call_check.py │ ├── environ.py │ ├── exc.py │ ├── math.py │ ├── plotting.py │ ├── serial.py │ └── string_utils.py └── version.py ├── hebel_test.py ├── setup.py └── train_model.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | bin/ 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Installer logs 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | 27 | # Unit test / coverage reports 28 | .tox/ 29 | .coverage 30 | .cache 31 | nosetests.xml 32 | coverage.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Django stuff: 43 | *.log 44 | *.pot 45 | 46 | # Sphinx documentation 47 | docs/_build/ 48 | 49 | # Example output directory 50 | examples/mnist/ 51 | 52 | # Backup files 53 | *~ 54 | #*# 55 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | Hebel Changelog 2 | =============== 3 | 4 | Version 0.02.1 5 | -------------- 6 | 7 | 05-22-2014 8 | 9 | * Added setup.py 10 | * Added to PyPi 11 | 12 | Version 0.02 13 | ------------ 14 | 15 | 05-08-2014 16 | 17 | * Windows compatibility (Thanks to [@Wainberg](https://github.com/Wainberg)) 18 | * CUDA 4.x is no longer supported, please upgrade to CUDA 5 or CUDA 6 19 | * All initialization is now handled through `hebel.init()`. No need to 20 | initialize PyCUDA separately anymore. 21 | * `LogisticLayer` has been renamed to `SoftmaxLayer`. `LogisticLayer` 22 | now does binary classification while `SoftmaxLayer` is for 23 | multiclass classification. 24 | * Framework for cross-validation. 25 | * When `ProgressMonitor` has `save_interval=None`, then only the 26 | currently best model is serialized. If it is a positive integer, 27 | then regular snapshots of the model are stored with that frequency. 28 | 29 | Version 0.01 30 | ------------ 31 | 32 | 01-01-2014 33 | 34 | * Removed dependency on scikits.cuda (this should make Hebel 35 | compatible with Windows, but I couldn't test that yet) 36 | 37 | * Serious speed-ups by avoiding freeing and reallocating memory for 38 | temporary objects. Previously, many temporary gpuarrays were 39 | reallocated in every single minibatch and then discarded, which was 40 | very inefficient. By using persistent objects for temporary objects 41 | across minibatches and some other improvements such as doing more 42 | computations in-place, a roughly 2x speed-up could be realised. 43 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE.txt 3 | include CHANGES.md 4 | include examples/*.yml 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hebel 2 | 3 | GPU-Accelerated Deep Learning Library in Python 4 | 5 | Hebel is a library for deep learning with neural networks in Python using GPU acceleration with CUDA through PyCUDA. It implements the most important types of neural network models and offers a variety of different activation functions and training methods such as momentum, Nesterov momentum, dropout, and early stopping. 6 | 7 | I no longer actively develop Hebel. If you are looking for a deep learning framework in Python, I now recommend [Chainer](https://github.com/pfnet/chainer). 8 | 9 | ## Models 10 | 11 | Right now, Hebel implements feed-forward neural networks for classification and regression on one or multiple tasks. Other models such as Autoencoder, Convolutional neural nets, and Restricted Boltzman machines are planned for the future. 12 | 13 | Hebel implements dropout as well as L1 and L2 weight decay for regularization. 14 | 15 | ## Optimization 16 | 17 | Hebel implements stochastic gradient descent (SGD) with regular and Nesterov momentum. 18 | 19 | ## Compatibility 20 | 21 | Currently, Hebel will run on Linux and Windows, and probably Mac OS X (not tested). 22 | 23 | ## Dependencies 24 | - PyCUDA 25 | - numpy 26 | - PyYAML 27 | - skdata (only for MNIST example) 28 | 29 | ## Installation 30 | 31 | Hebel is on PyPi, so you can install it with 32 | 33 | pip install hebel 34 | 35 | ## Getting started 36 | Study the yaml configuration files in `examples/` and run 37 | 38 | python train_model.py examples/mnist_neural_net_shallow.yml 39 | 40 | The script will create a directory in `examples/mnist` where the models and logs are saved. 41 | 42 | Read the Getting started guide at [hebel.readthedocs.org/en/latest/getting_started.html](http://hebel.readthedocs.org/en/latest/getting_started.html) for more information. 43 | 44 | ## Documentation 45 | [hebel.readthedocs.org](http://hebel.readthedocs.org) 46 | 47 | ## Contact 48 | Maintained by [Hannes Bretschneider](http://github.com/hannes-brt) (hannes@psi.utoronto.ca). 49 | If your are using Hebel, please let me know whether you find it useful and file a Github issue if you find any bugs or have feature requests. 50 | 51 | ## Citing 52 | [![http://dx.doi.org/10.5281/zenodo.10050](https://zenodo.org/badge/doi/10.5281/zenodo.10050.png)](http://dx.doi.org/10.5281/zenodo.10050) 53 | 54 | If you make use of Hebel in your research, please cite it. The BibTeX reference is 55 | 56 | @article{Bretschneider:10050, 57 | author = "Hannes Bretschneider", 58 | title = "{Hebel - GPU-Accelerated Deep Learning Library in Python}", 59 | month = "May", 60 | year = "2014", 61 | doi = "10.5281/zenodo.10050", 62 | url = "https://zenodo.org/record/10050", 63 | } 64 | 65 | ## What's with the name? 66 | _Hebel_ is the German word for _lever_, one of the oldest tools that humans use. As Archimedes said it: _"Give me a lever long enough and a fulcrum on which to place it, and I shall move the world."_ 67 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Hebel.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Hebel.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Hebel" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Hebel" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Hebel documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Nov 25 19:20:29 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | # Copyright (C) 2013 Hannes Bretschneider 15 | 16 | # This program is free software; you can redistribute it and/or modify 17 | # it under the terms of the GNU General Public License as published by 18 | # the Free Software Foundation; either version 2 of the License, or 19 | # (at your option) any later version. 20 | 21 | # This program is distributed in the hope that it will be useful, 22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 | # GNU General Public License for more details. 25 | 26 | # You should have received a copy of the GNU General Public License along 27 | # with this program; if not, write to the Free Software Foundation, Inc., 28 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 29 | 30 | import mock 31 | import sys, os 32 | 33 | MOCK_MODULES = ['hebel.pycuda_ops', 'hebel.pycuda_ops.linalg', 34 | 'hebel.pycuda_ops.cuda', 'hebel.pycuda_ops.cudart', 35 | 'hebel.pycuda_ops.elementwise', 'hebel.pycuda_ops.matrix', 36 | 'hebel.pycuda_ops.reductions', 'hebel.pycuda_ops.softmax', 37 | 'hebel.pycuda_ops.cublas', 'hebel.pycuda_ops.cudadrv', 'skdata', 38 | 'skdata.mnist', 'skdata.mnist.view', 'pycuda', 'pycuda.autoinit', 39 | 'pycuda.compiler', 'pycuda.cumath', 'pycuda.driver', 'pycuda.tools', 40 | 'pycuda.elementwise', 'pycuda.gpuarray', 'numpy'] 41 | 42 | for mod_name in MOCK_MODULES: 43 | sys.modules[mod_name] = mock.Mock() 44 | 45 | sys.path = ['../'] + sys.path 46 | 47 | # If extensions (or modules to document with autodoc) are in another directory, 48 | # add these directories to sys.path here. If the directory is relative to the 49 | # documentation root, use os.path.abspath to make it absolute, like shown here. 50 | #sys.path.insert(0, os.path.abspath('.')) 51 | 52 | # -- General configuration ----------------------------------------------------- 53 | 54 | # If your documentation needs a minimal Sphinx version, state it here. 55 | #needs_sphinx = '1.0' 56 | 57 | # Add any Sphinx extension module names here, as strings. They can be extensions 58 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 59 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.mathjax'] 60 | 61 | # Add any paths that contain templates here, relative to this directory. 62 | templates_path = ['_templates'] 63 | 64 | # The suffix of source filenames. 65 | source_suffix = '.rst' 66 | 67 | # The encoding of source files. 68 | #source_encoding = 'utf-8-sig' 69 | 70 | # The master toctree document. 71 | master_doc = 'index' 72 | 73 | # General information about the project. 74 | project = u'Hebel' 75 | copyright = u'2013, Hannes Bretschneider' 76 | 77 | # The version info for the project you're documenting, acts as replacement for 78 | # |version| and |release|, also used in various other places throughout the 79 | # built documents. 80 | # 81 | # The short X.Y version. 82 | from .hebel.version import version 83 | # The full version, including alpha/beta/rc tags. 84 | from .hebel.version import release 85 | 86 | # The language for content autogenerated by Sphinx. Refer to documentation 87 | # for a list of supported languages. 88 | #language = None 89 | 90 | # There are two options for replacing |today|: either, you set today to some 91 | # non-false value, then it is used: 92 | #today = '' 93 | # Else, today_fmt is used as the format for a strftime call. 94 | #today_fmt = '%B %d, %Y' 95 | 96 | # List of patterns, relative to source directory, that match files and 97 | # directories to ignore when looking for source files. 98 | exclude_patterns = ['_build'] 99 | 100 | # The reST default role (used for this markup: `text`) to use for all documents. 101 | #default_role = None 102 | 103 | # If true, '()' will be appended to :func: etc. cross-reference text. 104 | #add_function_parentheses = True 105 | 106 | # If true, the current module name will be prepended to all description 107 | # unit titles (such as .. function::). 108 | #add_module_names = True 109 | 110 | # If true, sectionauthor and moduleauthor directives will be shown in the 111 | # output. They are ignored by default. 112 | #show_authors = False 113 | 114 | # The name of the Pygments (syntax highlighting) style to use. 115 | pygments_style = 'sphinx' 116 | 117 | # A list of ignored prefixes for module index sorting. 118 | #modindex_common_prefix = [] 119 | 120 | 121 | # -- Options for HTML output --------------------------------------------------- 122 | 123 | # The theme to use for HTML and HTML Help pages. See the documentation for 124 | # a list of builtin themes. 125 | html_theme = 'default' 126 | 127 | # Theme options are theme-specific and customize the look and feel of a theme 128 | # further. For a list of options available for each theme, see the 129 | # documentation. 130 | #html_theme_options = {} 131 | 132 | # Add any paths that contain custom themes here, relative to this directory. 133 | #html_theme_path = [] 134 | 135 | # The name for this set of Sphinx documents. If None, it defaults to 136 | # " v documentation". 137 | #html_title = None 138 | 139 | # A shorter title for the navigation bar. Default is the same as html_title. 140 | #html_short_title = None 141 | 142 | # The name of an image file (relative to this directory) to place at the top 143 | # of the sidebar. 144 | #html_logo = None 145 | 146 | # The name of an image file (within the static path) to use as favicon of the 147 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 148 | # pixels large. 149 | #html_favicon = None 150 | 151 | # Add any paths that contain custom static files (such as style sheets) here, 152 | # relative to this directory. They are copied after the builtin static files, 153 | # so a file named "default.css" will overwrite the builtin "default.css". 154 | html_static_path = ['_static'] 155 | 156 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 157 | # using the given strftime format. 158 | #html_last_updated_fmt = '%b %d, %Y' 159 | 160 | # If true, SmartyPants will be used to convert quotes and dashes to 161 | # typographically correct entities. 162 | #html_use_smartypants = True 163 | 164 | # Custom sidebar templates, maps document names to template names. 165 | #html_sidebars = {} 166 | 167 | # Additional templates that should be rendered to pages, maps page names to 168 | # template names. 169 | #html_additional_pages = {} 170 | 171 | # If false, no module index is generated. 172 | #html_domain_indices = True 173 | 174 | # If false, no index is generated. 175 | #html_use_index = True 176 | 177 | # If true, the index is split into individual pages for each letter. 178 | #html_split_index = False 179 | 180 | # If true, links to the reST sources are added to the pages. 181 | #html_show_sourcelink = True 182 | 183 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 184 | #html_show_sphinx = True 185 | 186 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 187 | #html_show_copyright = True 188 | 189 | # If true, an OpenSearch description file will be output, and all pages will 190 | # contain a tag referring to it. The value of this option must be the 191 | # base URL from which the finished HTML is served. 192 | #html_use_opensearch = '' 193 | 194 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 195 | #html_file_suffix = None 196 | 197 | # Output file base name for HTML help builder. 198 | htmlhelp_basename = 'Hebeldoc' 199 | 200 | 201 | # -- Options for LaTeX output -------------------------------------------------- 202 | 203 | latex_elements = { 204 | # The paper size ('letterpaper' or 'a4paper'). 205 | #'papersize': 'letterpaper', 206 | 207 | # The font size ('10pt', '11pt' or '12pt'). 208 | #'pointsize': '10pt', 209 | 210 | # Additional stuff for the LaTeX preamble. 211 | #'preamble': '', 212 | } 213 | 214 | # Grouping the document tree into LaTeX files. List of tuples 215 | # (source start file, target name, title, author, documentclass [howto/manual]). 216 | latex_documents = [ 217 | ('index', 'Hebel.tex', u'Hebel Documentation', 218 | u'Hannes Bretschneider', 'manual'), 219 | ] 220 | 221 | # The name of an image file (relative to this directory) to place at the top of 222 | # the title page. 223 | #latex_logo = None 224 | 225 | # For "manual" documents, if this is true, then toplevel headings are parts, 226 | # not chapters. 227 | #latex_use_parts = False 228 | 229 | # If true, show page references after internal links. 230 | #latex_show_pagerefs = False 231 | 232 | # If true, show URL addresses after external links. 233 | #latex_show_urls = False 234 | 235 | # Documents to append as an appendix to all manuals. 236 | #latex_appendices = [] 237 | 238 | # If false, no module index is generated. 239 | #latex_domain_indices = True 240 | 241 | 242 | # -- Options for manual page output -------------------------------------------- 243 | 244 | # One entry per manual page. List of tuples 245 | # (source start file, name, description, authors, manual section). 246 | man_pages = [ 247 | ('index', 'hebel', u'Hebel Documentation', 248 | [u'Hannes Bretschneider'], 1) 249 | ] 250 | 251 | # If true, show URL addresses after external links. 252 | #man_show_urls = False 253 | 254 | 255 | # -- Options for Texinfo output ------------------------------------------------ 256 | 257 | # Grouping the document tree into Texinfo files. List of tuples 258 | # (source start file, target name, title, author, 259 | # dir menu entry, description, category) 260 | texinfo_documents = [ 261 | ('index', 'Hebel', u'Hebel Documentation', 262 | u'Hannes Bretschneider', 'Hebel', 'One line description of project.', 263 | 'Miscellaneous'), 264 | ] 265 | 266 | # Documents to append as an appendix to all manuals. 267 | #texinfo_appendices = [] 268 | 269 | # If false, no module index is generated. 270 | #texinfo_domain_indices = True 271 | 272 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 273 | #texinfo_show_urls = 'footnote' 274 | -------------------------------------------------------------------------------- /docs/data_providers.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Data Providers 18 | ============== 19 | 20 | .. automodule:: hebel.data_providers 21 | 22 | Abstract Base Class 23 | ------------------- 24 | 25 | .. autoclass:: hebel.data_providers.DataProvider 26 | :members: 27 | 28 | Minibatch Data Provider 29 | ----------------------- 30 | 31 | .. autoclass:: hebel.data_providers.MiniBatchDataProvider 32 | :members: 33 | 34 | Multi-Task Data Provider 35 | ------------------------ 36 | 37 | .. autoclass:: hebel.data_providers.MultiTaskDataProvider 38 | :members: 39 | 40 | Batch Data Provider 41 | ------------------- 42 | 43 | .. autoclass:: hebel.data_providers.BatchDataProvider 44 | :members: 45 | 46 | Dummy Data Provider 47 | ------------------- 48 | 49 | .. autoclass:: hebel.data_providers.DummyDataProvider 50 | :members: 51 | 52 | MNIST Data Provider 53 | ------------------- 54 | 55 | .. autoclass:: hebel.data_providers.MNISTDataProvider 56 | :members: 57 | 58 | -------------------------------------------------------------------------------- /docs/getting_started.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Getting Started 18 | *************** 19 | 20 | There are two basic methods how you can run Hebel: 21 | 22 | #. You can write a YAML configuration file that describes your model 23 | architecture, data set, and hyperparameters and run it using the 24 | :file:`train_model.py` script. 25 | #. In your own Python script or program, you can create instances of 26 | models and optimizers programmatically. 27 | 28 | The first makes estimating a model the easiest, as you don't have to 29 | write any actual code. You simply specify all your parameters and data 30 | set in an easy to read YAML configuration file and pass it to the 31 | :file:`train_model.py` script. The script will create a directory for your 32 | results where it will save intermediary models (in pickle-format), the 33 | logs and final results. 34 | 35 | The second method gives you more control over how exactly the model is 36 | estimated and lets you interact with Hebel from other Python programs. 37 | 38 | 39 | Running models from YAML configuration files 40 | ============================================ 41 | 42 | If you check the example YAML files in ``examples/`` you will see that the configuration file defines three top-level sections: 43 | 44 | #. ``run_conf``: These options are passed to the method 45 | :meth:`hebel.optimizers.SGD.run()`. 46 | #. ``optimizer``: Here you instantiate a :class:`hebel.optimizers.SGD` 47 | object, including the model you want to train and the data to use 48 | for training and validation. 49 | #. ``test_dataset``: This section is optional, but here you can define 50 | test data to evaluate the model on after training. 51 | 52 | Check out :file:`examples/mnist_neural_net_shallow.yml`, which 53 | includes everything to train a one layer neural network on the `MNIST 54 | dataset `_: 55 | 56 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml 57 | 58 | You can see that the only option we pass to ``run_conf`` is the number 59 | of iterations to train the model. 60 | 61 | The ``optimizer`` section is more interesting. Hebel uses the special 62 | ``!obj``, ``!import``, and ``!pkl`` directives from `PyLearn 2 63 | `_. The 64 | ``!obj`` directive is used most extensively and can be used to 65 | instantiate any Python class. First the optimizer 66 | :class:`hebel.optimizers.SGD` is instantiated and in the lines below 67 | we are instantiating the model: 68 | 69 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml 70 | :lines: 3-17 71 | 72 | We are designing a model with one hidden layer that has 784 input 73 | units (the dimensionality of the MNIST data) and 2000 hidden units. We 74 | are also using `dropout `_ for 75 | regularization. The logistic output layer uses 10 classes (the number 76 | of classes in the MNIST data). You can also add different amounts of 77 | L1 or L2 penalization to each layer, which we are not doing here. 78 | 79 | .. _parameter-updaters: 80 | 81 | Next, we define a ``parameter_updater``, which is a rule that defines 82 | how the weights are updated given the gradients: 83 | 84 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml 85 | :lines: 18 86 | 87 | There are currently three choices: 88 | 89 | * :class:`hebel.parameter_updaters.SimpleSGDUpdate`, which performs 90 | regular gradient descent 91 | * :class:`hebel.parameter_updaters.MomentumUpdate`, which performs 92 | gradient descent with momentum, and 93 | * :class:`hebel.parameter_updaters.NesterovMomentumUpdate`, which performs 94 | gradient descent with Nesterov momentum. 95 | 96 | The next two sections define the data for the model. All data must be 97 | given as instances of ``DataProvider`` objects: 98 | 99 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml 100 | :lines: 19-25 101 | 102 | A ``DataProvider`` is a class that defines an iterator which returns 103 | successive minibatches of the data as well as saves some metadata, 104 | such as the number of data points. There is a special 105 | :class:`hebel.data_providers.MNISTDataProvider` especially for the 106 | MNIST data. We use the standard splits for training and validation 107 | data here. There are several ``DataProviders`` defined in 108 | :mod:`hebel.data_providers`. 109 | 110 | The next few lines define how some of the hyperparameters are changed 111 | over the course of the training: 112 | 113 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml 114 | :lines: 26-31 115 | 116 | The module :mod:`hebel.schedulers` defines several schedulers, which 117 | are basically just simple rules how certain parameters should 118 | evolve. Here, we define that the learning rate should decay 119 | exponentially with a factor of 0.995 in every epoch and the momentum 120 | should increase from 0.5 to 0.9 during the first 10 epochs and then 121 | stay at this value. 122 | 123 | The last entry argument to :class:`hebel.optimizers.SGD` is 124 | ``progress_monitor``: 125 | 126 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml 127 | :lines: 32-38 128 | 129 | A progress monitor is an object that takes care of reporting periodic 130 | progress of our model, saving snapshots of the model at regular 131 | intervals, etc. When you are using the YAML configuration system, 132 | you'll probably want to use :class:`hebel.monitors.ProgressMonitor`, 133 | which will save logs, outputs, and snapshots to disk. In contrast, 134 | :class:`hebel.monitors.SimpleProgressMonitor` will only print progress 135 | to the terminal without saving the model itself. 136 | 137 | Finally, you can define a test data set to be evaluated after the training completes: 138 | 139 | .. literalinclude:: ../examples/mnist_neural_net_shallow.yml 140 | :lines: 40-43 141 | 142 | Here, we are specifying the MNIST test split. 143 | 144 | Once you have your configuration file defined, you can run it such as in:: 145 | 146 | python train_model.py examples/mnist_neural_net_shallow.yml 147 | 148 | The script will create the output directory you specified in 149 | ``save_model_path`` if it doesn't exist yet and start writing the log 150 | into a file called ``output_log``. If you are interested in keeping an 151 | eye on the training process you can check on that file with:: 152 | 153 | tail -f output_log 154 | 155 | Using Hebel in Your Own Code 156 | ============================ 157 | 158 | If you want more control over the training procedure or integrate 159 | Hebel with your own code, then you can use Hebel programmatically. 160 | 161 | Unlike the simpler one hidden layer model from the previous part, here 162 | we are going to build a more powerful deep neural net with multiple 163 | hidden layers. 164 | 165 | For an example, have a look at :file:`examples/mnist_neural_net_deep_script.py`: 166 | 167 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py 168 | 169 | There are three basic tasks you have to do to train a model in Hebel: 170 | 171 | #. Define the data you want to use for training, validation, or 172 | testing using ``DataProvider`` objects, 173 | #. instantiate a ``Model`` object, and 174 | #. instantiate an ``SGD`` object that will train the model using 175 | stochastic gradient descent. 176 | 177 | Defining a Data Set 178 | ------------------- 179 | 180 | In this example we're using the MNIST data set again through the 181 | :class:`hebel.data_providers.MNISTDataProvider` class: 182 | 183 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py 184 | :lines: 9-12 185 | 186 | We create three data sets, corresponding to the official training, 187 | validation, and test data splits of MNIST. For the training data set, 188 | we set a batch size of 100 training examples, while the validation and 189 | test data sets are used as complete batches. 190 | 191 | Instantiating a model 192 | --------------------- 193 | 194 | To train a model, you simply need to create an object representing a 195 | model that inherits from the abstract base class 196 | :class:`hebel.models.Model`. 197 | 198 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py 199 | :lines: 17-21 200 | 201 | Currently, Hebel implements the following models: 202 | 203 | * :class:`hebel.models.NeuralNet`: A neural net with any number of 204 | hidden layers for classification, using the cross-entropy loss 205 | function and softmax units in the output layer. 206 | 207 | * :class:`hebel.models.LogisticRegression`: Multi-class logistic 208 | regression. Like :class:`hebel.models.NeuralNet` but does not have 209 | any hidden layers. 210 | 211 | * :class:`hebel.models.MultitaskNeuralNet`: A neural net trained on 212 | multiple tasks simultaneously. A multi-task neural net can have any 213 | number of hidden layers with weights that are shared between the 214 | tasks and any number of output layers with separate weights for each 215 | task. 216 | 217 | * :class:`hebel.models.NeuralNetRegression`: A neural net with a 218 | linear regression output layer to model continuous variables. 219 | 220 | The :class:`hebel.models.NeuralNet` model we are using here takes as 221 | input the dimensionality of the data, the number of classes, the sizes 222 | of the hidden layers, the activation function to use, and whether to 223 | use dropout for regularization. There are also a few more options such 224 | as for L1 or L2 weight regularization, that we don't use here. 225 | 226 | Here, we are using the simpler form of the constructor rather than the 227 | extended form that we used in the YAML example. Also we are adding a 228 | small amount of dropout (20%) to the input layer. 229 | 230 | Training the model 231 | ------------------ 232 | 233 | To train the model, you first need to create an instance of 234 | :class:`hebel.optimizers.SGD`: 235 | 236 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py 237 | :lines: 23-35 238 | 239 | First we are creating a :class:`hebel.monitors.ProgressMonitor` 240 | object, that will save regular snapshots of the model during training 241 | and save the logs and results to disk. 242 | 243 | Next, we are creating the :class:`hebel.optimizers.SGD` object. We 244 | instantiate the optimizer with the model, the parameter update rule, 245 | training data, validation data, and the schedulers for the learning 246 | rate and the momentum parameters. 247 | 248 | Finally, we can start the training by invoking the 249 | :meth:`hebel.optimizers.SGD.run` method. Here we train the model for 250 | 100 epochs. However, by default :class:`hebel.optimizers.SGD` uses 251 | early stopping which means that it remembers the parameters that give 252 | the best result on the validation set and will reset the model 253 | parameters to them after the end of training. 254 | 255 | Evaluating on test data 256 | ----------------------- 257 | 258 | After training is complete we can do anything we want with the trained 259 | model, such as using it in some prediction pipeline, pickle it to 260 | disk, etc. Here we are evaluating the performance of the model on the 261 | MNIST test data split: 262 | 263 | .. literalinclude:: ../examples/mnist_neural_net_deep_script.py 264 | :lines: 37-40 265 | 266 | 267 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | 18 | Welcome to Hebel's documentation! 19 | ================================= 20 | 21 | Contents: 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | 26 | introduction 27 | installation 28 | getting_started 29 | initialization 30 | data_providers 31 | layers 32 | monitors 33 | models 34 | optimizers 35 | parameter_updaters 36 | schedulers 37 | 38 | 39 | 40 | Indices and tables 41 | ================== 42 | 43 | * :ref:`genindex` 44 | * :ref:`modindex` 45 | * :ref:`search` 46 | 47 | -------------------------------------------------------------------------------- /docs/initialization.rst: -------------------------------------------------------------------------------- 1 | Initialization 2 | ************** 3 | 4 | .. automodule:: hebel 5 | 6 | .. autofunction:: hebel.init 7 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | -------------------------------------------------------------------------------- /docs/introduction.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | -------------------------------------------------------------------------------- /docs/layers.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Layers 18 | ****** 19 | 20 | Hidden Layer 21 | ============ 22 | 23 | .. autoclass:: hebel.layers.HiddenLayer 24 | :members: 25 | 26 | .. autoclass:: hebel.layers.InputDropout 27 | :members: 28 | 29 | .. autoclass:: hebel.layers.DummyLayer 30 | :members: 31 | 32 | Top Layers 33 | ========== 34 | 35 | Abstract Base Class Top Layer 36 | ----------------------------- 37 | 38 | .. autoclass:: hebel.layers.TopLayer 39 | :members: 40 | 41 | Logistic Layer 42 | -------------- 43 | 44 | .. autoclass:: hebel.layers.LogisticLayer 45 | :members: 46 | 47 | Softmax Layer 48 | ------------- 49 | 50 | .. autoclass:: hebel.layers.SoftmaxLayer 51 | :members: 52 | 53 | Linear Regression Layer 54 | ----------------------- 55 | 56 | .. autoclass:: hebel.layers.LinearRegressionLayer 57 | :members: 58 | 59 | Multitask Top Layer 60 | ------------------- 61 | 62 | .. autoclass:: hebel.layers.MultitaskTopLayer 63 | :members: 64 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Hebel.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Hebel.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /docs/models.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Models 18 | ****** 19 | 20 | Abstract Base Class Model 21 | ========================= 22 | 23 | .. autoclass:: hebel.models.Model 24 | :members: 25 | 26 | Neural Network 27 | ============== 28 | 29 | .. autoclass:: hebel.models.NeuralNet 30 | :members: 31 | 32 | Neural Network Regression 33 | ========================= 34 | 35 | .. autoclass:: hebel.models.NeuralNetRegression 36 | :members: 37 | 38 | Logistic Regression 39 | =================== 40 | 41 | .. autoclass:: hebel.models.LogisticRegression 42 | :members: 43 | 44 | Multi-Task Neural Net 45 | ===================== 46 | 47 | .. autoclass:: hebel.models.MultitaskNeuralNet 48 | :members: 49 | 50 | -------------------------------------------------------------------------------- /docs/monitors.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Monitors 18 | ******** 19 | 20 | Progress Monitor 21 | ================ 22 | 23 | .. autoclass:: hebel.monitors.ProgressMonitor 24 | :members: 25 | :undoc-members: 26 | 27 | Simple Progress Monitor 28 | ======================= 29 | 30 | .. autoclass:: hebel.monitors.SimpleProgressMonitor 31 | :members: 32 | :undoc-members: 33 | -------------------------------------------------------------------------------- /docs/optimizers.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Optimizers 18 | ********** 19 | 20 | Stochastic Gradient Descent 21 | =========================== 22 | 23 | .. autoclass:: hebel.optimizers.SGD 24 | :members: 25 | :undoc-members: 26 | -------------------------------------------------------------------------------- /docs/parameter_updaters.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Parameter Updaters 18 | ****************** 19 | 20 | Abstract Base Class 21 | =================== 22 | 23 | .. autoclass:: hebel.parameter_updaters.ParameterUpdater 24 | :members: 25 | :undoc-members: 26 | 27 | Simple SGD Update 28 | ================= 29 | 30 | .. autoclass:: hebel.parameter_updaters.SimpleSGDUpdate 31 | :members: 32 | :undoc-members: 33 | 34 | Momentum Update 35 | =============== 36 | 37 | .. autoclass:: hebel.parameter_updaters.MomentumUpdate 38 | :members: 39 | :undoc-members: 40 | 41 | Nesterov Momentum Update 42 | ======================== 43 | 44 | .. autoclass:: hebel.parameter_updaters.NesterovMomentumUpdate 45 | :members: 46 | :undoc-members: 47 | -------------------------------------------------------------------------------- /docs/requirements_sphinx.txt: -------------------------------------------------------------------------------- 1 | mock 2 | -------------------------------------------------------------------------------- /docs/schedulers.rst: -------------------------------------------------------------------------------- 1 | .. Copyright (C) 2013 Hannes Bretschneider 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License along 14 | with this program; if not, write to the Free Software Foundation, Inc., 15 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | Schedulers 18 | ********** 19 | 20 | Constant Scheduler 21 | ================== 22 | 23 | .. autofunction:: hebel.schedulers.constant_scheduler 24 | 25 | Exponential Scheduler 26 | ===================== 27 | 28 | .. autofunction:: hebel.schedulers.exponential_scheduler 29 | 30 | Linear Scheduler Up 31 | =================== 32 | 33 | .. autofunction:: hebel.schedulers.linear_scheduler_up 34 | 35 | Linear Scheduler Up-Down 36 | ======================== 37 | 38 | .. autofunction:: hebel.schedulers.linear_scheduler_up_down 39 | -------------------------------------------------------------------------------- /examples/mnist_neural_net_deep.yml: -------------------------------------------------------------------------------- 1 | run_conf: 2 | iterations: 3000 3 | optimizer: !obj:hebel.optimizers.SGD { 4 | model: !obj:hebel.models.NeuralNet { 5 | layers: [ 6 | !obj:hebel.layers.InputDropout { 7 | n_in: 784, 8 | dropout_probability: .2, 9 | }, 10 | !obj:hebel.layers.HiddenLayer { 11 | n_in: 784, 12 | n_units: 2000, 13 | activation_function: relu, 14 | dropout: yes, 15 | l2_penalty_weight: .0 16 | }, 17 | !obj:hebel.layers.HiddenLayer { 18 | n_in: 2000, 19 | n_units: 2000, 20 | activation_function: relu, 21 | dropout: yes, 22 | l2_penalty_weight: .0 23 | }, 24 | !obj:hebel.layers.HiddenLayer { 25 | n_in: 2000, 26 | n_units: 2000, 27 | activation_function: relu, 28 | dropout: yes, 29 | l2_penalty_weight: .0 30 | }, 31 | !obj:hebel.layers.HiddenLayer { 32 | n_in: 2000, 33 | n_units: 500, 34 | activation_function: relu, 35 | dropout: yes, 36 | l2_penalty_weight: .0 37 | }, 38 | !obj:hebel.layers.HiddenLayer { 39 | n_in: 500, 40 | n_units: 500, 41 | activation_function: relu, 42 | dropout: yes, 43 | l2_penalty_weight: .0 44 | } 45 | ], 46 | top_layer: !obj:hebel.layers.SoftmaxLayer { 47 | n_in: 500, 48 | n_out: 10 49 | } 50 | }, 51 | parameter_updater: !import hebel.parameter_updaters.NesterovMomentumUpdate, 52 | train_data: !obj:hebel.data_providers.MNISTDataProvider { 53 | batch_size: 100, 54 | array: train 55 | }, 56 | validation_data: !obj:hebel.data_providers.MNISTDataProvider { 57 | array: val 58 | }, 59 | learning_rate_schedule: !obj:hebel.schedulers.exponential_scheduler { 60 | init_value: 2., decay: .995 61 | }, 62 | momentum_schedule: !obj:hebel.schedulers.linear_scheduler_up { 63 | init_value: .1, target_value: .99, duration: 200 64 | }, 65 | progress_monitor: 66 | !obj:hebel.monitors.ProgressMonitor { 67 | experiment_name: mnist_deep, 68 | save_model_path: examples/mnist, 69 | output_to_log: yes 70 | } 71 | } 72 | test_dataset: 73 | test_data: !obj:hebel.data_providers.MNISTDataProvider { 74 | array: test 75 | } 76 | -------------------------------------------------------------------------------- /examples/mnist_neural_net_deep_script.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import hebel 4 | from hebel.models import NeuralNet 5 | from hebel.optimizers import SGD 6 | from hebel.parameter_updaters import MomentumUpdate 7 | from hebel.data_providers import MNISTDataProvider 8 | from hebel.monitors import ProgressMonitor 9 | from hebel.schedulers import exponential_scheduler, linear_scheduler_up 10 | 11 | hebel.init(random_seed=0) 12 | 13 | # Initialize data providers 14 | train_data = MNISTDataProvider('train', batch_size=100) 15 | validation_data = MNISTDataProvider('val') 16 | test_data = MNISTDataProvider('test') 17 | 18 | D = train_data.D # Dimensionality of inputs 19 | K = 10 # Number of classes 20 | 21 | # Create model object 22 | model = NeuralNet(n_in=train_data.D, n_out=K, 23 | layers=[2000, 2000, 2000, 500], 24 | activation_function='relu', 25 | dropout=True, input_dropout=0.2) 26 | 27 | # Create optimizer object 28 | progress_monitor = ProgressMonitor( 29 | experiment_name='mnist', 30 | save_model_path='examples/mnist', 31 | save_interval=5, 32 | output_to_log=True) 33 | 34 | optimizer = SGD(model, MomentumUpdate, train_data, validation_data, progress_monitor, 35 | learning_rate_schedule=exponential_scheduler(5., .995), 36 | momentum_schedule=linear_scheduler_up(.1, .9, 100)) 37 | 38 | # Run model 39 | optimizer.run(50) 40 | 41 | # Evaulate error on test set 42 | test_error = model.test_error(test_data) 43 | print "Error on test set: %.3f" % test_error 44 | -------------------------------------------------------------------------------- /examples/mnist_neural_net_shallow.yml: -------------------------------------------------------------------------------- 1 | run_conf: 2 | iterations: 50 3 | optimizer: !obj:hebel.optimizers.SGD { 4 | model: !obj:hebel.models.NeuralNet { 5 | layers: [ 6 | !obj:hebel.layers.HiddenLayer { 7 | n_in: 784, 8 | n_units: 2000, 9 | dropout: yes, 10 | l2_penalty_weight: .0 11 | } 12 | ], 13 | top_layer: !obj:hebel.layers.SoftmaxLayer { 14 | n_in: 2000, 15 | n_out: 10 16 | } 17 | }, 18 | parameter_updater: !import hebel.parameter_updaters.MomentumUpdate, 19 | train_data: !obj:hebel.data_providers.MNISTDataProvider { 20 | batch_size: 100, 21 | array: train 22 | }, 23 | validation_data: !obj:hebel.data_providers.MNISTDataProvider { 24 | array: val 25 | }, 26 | learning_rate_schedule: !obj:hebel.schedulers.exponential_scheduler { 27 | init_value: 30., decay: .995 28 | }, 29 | momentum_schedule: !obj:hebel.schedulers.linear_scheduler_up { 30 | init_value: .5, target_value: .9, duration: 10 31 | }, 32 | progress_monitor: 33 | !obj:hebel.monitors.ProgressMonitor { 34 | experiment_name: mnist_shallow, 35 | save_model_path: examples/mnist, 36 | output_to_log: yes 37 | } 38 | } 39 | test_dataset: 40 | test_data: !obj:hebel.data_providers.MNISTDataProvider { 41 | array: test 42 | } 43 | -------------------------------------------------------------------------------- /examples/neural_net_regression_example.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """Example of neural net with a linear regression output layer, using 18 | the Boston dataset. 19 | 20 | """ 21 | 22 | def main(): 23 | import numpy as np 24 | import pycuda.autoinit 25 | from pycuda import gpuarray 26 | from skdata import toy 27 | from hebel import memory_pool 28 | from hebel.data_providers import BatchDataProvider 29 | from hebel.models import NeuralNetRegression 30 | from hebel.optimizers import SGD 31 | from hebel.parameter_updaters import SimpleSGDUpdate 32 | from hebel.monitors import SimpleProgressMonitor 33 | from hebel.schedulers import exponential_scheduler 34 | 35 | # Get data 36 | data_cpu, targets_cpu = toy.Boston().regression_task() 37 | data = gpuarray.to_gpu(data_cpu.astype(np.float32), allocator=memory_pool.allocate) 38 | targets = gpuarray.to_gpu(targets_cpu.astype(np.float32), allocator=memory_pool.allocate) 39 | data_provider = BatchDataProvider(data, targets) 40 | 41 | # Create model object 42 | model = NeuralNetRegression(n_in=data_cpu.shape[1], n_out=targets_cpu.shape[1], 43 | layers=[100], activation_function='relu') 44 | 45 | # Create optimizer object 46 | optimizer = SGD(model, SimpleSGDUpdate, data_provider, data_provider, 47 | learning_rate_schedule=exponential_scheduler(.1, .9999), 48 | early_stopping=True) 49 | optimizer.run(3000) 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /hebel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """Before Hebel can be used, it must be initialized using the function 18 | :func:`hebel.init`. 19 | 20 | """ 21 | 22 | import numpy as np 23 | import pycuda.driver as cuda 24 | cuda.init() 25 | from pycuda.tools import make_default_context, DeviceMemoryPool 26 | 27 | import os as _os 28 | neural_nets_root = _os.path.split( 29 | _os.path.abspath(_os.path.dirname(__file__)))[0] 30 | 31 | is_initialized = False 32 | 33 | class _Sampler(object): 34 | _sampler = None 35 | 36 | def __getattribute__(self, name): 37 | if name in ('seed', 'set_seed'): 38 | return object.__getattribute__(self, name) 39 | 40 | sampler = object.__getattribute__(self, '_sampler') 41 | if sampler is None: 42 | from pycuda import curandom, gpuarray 43 | seed_func = curandom.seed_getter_uniform if self.seed is None \ 44 | else lambda N: gpuarray.to_gpu( 45 | np.array(N * [self.seed], dtype=np.int32), 46 | allocator=memory_pool.allocate) 47 | sampler = curandom.XORWOWRandomNumberGenerator(seed_func) 48 | self._sampler = sampler 49 | return sampler.__getattribute__(name) 50 | 51 | def set_seed(self, seed): 52 | self.seed = seed 53 | self._sampler = None 54 | sampler = _Sampler() 55 | 56 | class _Context(object): 57 | _context = None 58 | 59 | def init_context(self, device_id=None): 60 | if device_id is None: 61 | context = make_default_context() 62 | self._context = context 63 | else: 64 | context = cuda.Device(device_id).make_context() 65 | self._context = context 66 | 67 | def __getattribute__(self, name): 68 | if name in 'init_context': 69 | return object.__getattribute__(self, name) 70 | 71 | if object.__getattribute__(self, '_context') is None: 72 | raise RuntimeError("Context hasn't been initialized yet") 73 | 74 | return object.__getattribute__(self, '_context').__getattribute__(name) 75 | 76 | context = _Context() 77 | 78 | 79 | class _MemoryPool(object): 80 | _memory_pool = None 81 | 82 | def init(self): 83 | self._memory_pool = DeviceMemoryPool() 84 | 85 | def __getattribute__(self, name): 86 | if name == 'init': 87 | return object.__getattribute__(self, name) 88 | 89 | if object.__getattribute__(self, '_memory_pool') is None: 90 | raise RuntimeError("Memory Pool hasn't been initialized yet") 91 | 92 | return object.__getattribute__(self, '_memory_pool').__getattribute__(name) 93 | memory_pool = _MemoryPool() 94 | 95 | 96 | def init(device_id=None, random_seed=None): 97 | """Initialize Hebel. 98 | 99 | This function creates a CUDA context, CUBLAS context and 100 | initializes and seeds the pseudo-random number generator. 101 | 102 | **Parameters:** 103 | 104 | device_id : integer, optional 105 | The ID of the GPU device to use. If this is omitted, PyCUDA's 106 | default context is used, which by default uses the fastest 107 | available device on the system. Alternatively, you can put the 108 | device id in the environment variable ``CUDA_DEVICE`` or into 109 | the file ``.cuda-device`` in the user's home directory. 110 | 111 | random_seed : integer, optional 112 | The seed to use for the pseudo-random number generator. If 113 | this is omitted, the seed is taken from the environment 114 | variable ``RANDOM_SEED`` and if that is not defined, a random 115 | integer is used as a seed. 116 | """ 117 | 118 | if device_id is None: 119 | random_seed = _os.environ.get('CUDA_DEVICE') 120 | 121 | if random_seed is None: 122 | random_seed = _os.environ.get('RANDOM_SEED') 123 | 124 | global is_initialized 125 | if not is_initialized: 126 | is_initialized = True 127 | 128 | global context 129 | context.init_context(device_id) 130 | 131 | from pycuda import gpuarray, driver, curandom 132 | 133 | # Initialize memory pool 134 | global memory_pool 135 | memory_pool.init() 136 | 137 | # Initialize PRG 138 | global sampler 139 | sampler.set_seed(random_seed) 140 | 141 | # Initialize pycuda_ops 142 | from hebel import pycuda_ops 143 | pycuda_ops.init() 144 | 145 | 146 | def _finish_up(): 147 | global is_initialized 148 | if is_initialized: 149 | global context 150 | context.pop() 151 | context = None 152 | 153 | from pycuda.tools import clear_context_caches 154 | clear_context_caches() 155 | is_initialized = False 156 | 157 | import atexit 158 | atexit.register(_finish_up) 159 | -------------------------------------------------------------------------------- /hebel/cross_validation.py: -------------------------------------------------------------------------------- 1 | from .utils.math import ceil_div 2 | import numpy as np 3 | import os 4 | from hebel.optimizers import SGD 5 | from hebel import memory_pool 6 | 7 | class CrossValidation(object): 8 | def __init__(self, config, data): 9 | 10 | self.n_folds = config['n_folds'] 11 | self.n_data = config['n_data'] 12 | self.validation_share = config['validation_share'] 13 | 14 | self.fold_size = ceil_div(self.n_data, self.n_folds) 15 | self.N_train_validate = self.n_data - self.fold_size 16 | self.N_train = int(np.ceil((1. - self.validation_share) * self.N_train_validate)) 17 | 18 | self.models_cv = [] 19 | self.progress_monitors_cv = [] 20 | self.fold_idx = [] 21 | 22 | self.fold_stats = [] 23 | 24 | self.train_error = { 25 | 'training_error': [], 26 | 'validation_error': [] 27 | } 28 | 29 | self.predictions = None 30 | self.config = config 31 | self.data = data 32 | 33 | np.random.seed(config.get('numpy_seed')) 34 | 35 | def run_fold(self, k): 36 | memory_pool.free_held() 37 | fold_range = (k*self.fold_size, min((k+1)*self.fold_size, self.n_data)) 38 | test_idx = np.arange(fold_range[0], fold_range[1], dtype=np.int32) 39 | 40 | train_validate_idx = np.random.permutation( 41 | np.r_[np.arange(0, fold_range[0], dtype=np.int32), 42 | np.arange(fold_range[1], self.n_data, dtype=np.int32)]) 43 | train_idx = train_validate_idx[:self.N_train] 44 | validate_idx = train_validate_idx[self.N_train:] 45 | 46 | self.fold_idx.append({ 47 | 'test_idx': test_idx, 48 | 'train_idx': train_idx, 49 | 'validate_idx': validate_idx 50 | }) 51 | 52 | dp_train = self.make_data_provider(train_idx, 53 | self.config.get('batch_size_train')) 54 | dp_validate = self.make_data_provider(validate_idx, 55 | self.config.get('batch_size_validate')) 56 | dp_test = self.make_data_provider(test_idx, 57 | self.config.get('batch_size_test')) 58 | 59 | model = self.make_model() 60 | model.calibrate_learning_rate(dp_train) 61 | self.models_cv.append(model) 62 | 63 | progress_monitor = self.make_progress_monitor(k) 64 | self.progress_monitors_cv.append(progress_monitor) 65 | 66 | learning_rate_schedule = self.config['learning_rate_fct'](**self.config['learning_rate_params']) 67 | 68 | momentum_schedule = self.config['momentum_schedule_fct'](**self.config['momentum_schedule_params']) \ 69 | if 'momentum_schedule_fct' in self.config else None 70 | 71 | optimizer = SGD(model, self.config['parameter_updater'], dp_train, dp_validate, 72 | progress_monitor, 73 | learning_rate_schedule=learning_rate_schedule, 74 | momentum_schedule=momentum_schedule, 75 | early_stopping=self.config.get('early_stopping', True)) 76 | 77 | optimizer.run(self.config['epochs'], 78 | validation_interval=self.config.get('validation_interval', 5), 79 | yaml_config=self.config['yaml_config']) 80 | 81 | stats = self.get_stats(dp_train, dp_test, model) 82 | self.fold_stats.append(stats) 83 | 84 | predictions_fold = model.feed_forward(dp_test.data).get() 85 | self.predictions = np.r_[self.predictions, predictions_fold] \ 86 | if self.predictions is not None else predictions_fold 87 | 88 | self.make_figures(model, progress_monitor, k) 89 | 90 | self.train_error['training_error'].append(progress_monitor.train_error) 91 | self.train_error['validation_error'].append(progress_monitor.validation_error) 92 | 93 | 94 | del optimizer, dp_train, dp_validate, dp_test 95 | 96 | def run(self): 97 | for k in range(self.n_folds): 98 | self.run_fold(k) 99 | 100 | def make_data_provider(self, idx, batch_size): 101 | raise NotImplementedError 102 | 103 | def make_model(self): 104 | raise NotImplementedError 105 | 106 | def make_progress_monitor(self, fold): 107 | raise NotImplementedError 108 | 109 | def get_stats_func(self, dp_train, dp_test, model): 110 | return {} 111 | 112 | def make_figures(self, model, progress_monitor, fold): 113 | pass 114 | 115 | def post_run(self): 116 | pass 117 | -------------------------------------------------------------------------------- /hebel/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .dummy_layer import DummyLayer 18 | from .hidden_layer import HiddenLayer 19 | from .softmax_layer import SoftmaxLayer 20 | from .logistic_layer import LogisticLayer 21 | from .multitask_top_layer import MultitaskTopLayer 22 | from .top_layer import TopLayer 23 | from .linear_regression_layer import LinearRegressionLayer 24 | from .input_dropout import InputDropout 25 | from .column import Column 26 | from .multi_column_layer import MultiColumnLayer 27 | from .flattening_layer import FlatteningLayer -------------------------------------------------------------------------------- /hebel/layers/column.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from . import HiddenLayer 18 | from itertools import chain 19 | 20 | class Column(object): 21 | l1_penalty_weight = True 22 | l2_penalty_weight = True 23 | 24 | def __init__(self, hidden_layers): 25 | assert all([isinstance(hl, HiddenLayer) for hl in hidden_layers]) 26 | self.hidden_layers = hidden_layers 27 | 28 | @property 29 | def n_parameters(self): 30 | return sum(hl.n_parameters for hl in self.hidden_layers) 31 | 32 | @property 33 | def n_units(self): 34 | return self.hidden_layers[-1].n_units 35 | 36 | @property 37 | def n_in(self): 38 | return self.hidden_layers[0].n_in 39 | 40 | @property 41 | def parameters(self): 42 | return list(chain.from_iterable(hl.parameters for hl in self.hidden_layers)) 43 | 44 | @parameters.setter 45 | def parameters(self, new_parameters): 46 | for hl in self.hidden_layers: 47 | hl.parameters = new_parameters[:hl.n_parameters] 48 | new_parameters = new_parameters[hl.n_parameters:] 49 | 50 | def update_parameters(self, values, stream=None): 51 | assert len(values) == self.n_parameters 52 | 53 | for hl in self.hidden_layers: 54 | hl.update_parameters(values[:hl.n_parameters]) 55 | values = values[hl.n_parameters:] 56 | 57 | @property 58 | def l1_penalty(self): 59 | return sum(hl.l1_penalty for hl in self.hidden_layers) 60 | 61 | @property 62 | def l2_penalty(self): 63 | return sum(hl.l2_penalty for hl in self.hidden_layers) 64 | 65 | @property 66 | def lr_multiplier(self): 67 | return tuple(chain.from_iterable((hl.lr_multiplier for hl in self.hidden_layers))) 68 | 69 | @lr_multiplier.setter 70 | def lr_multiplier(self, value): 71 | assert self.n_parameters == len(value) 72 | i = 0 73 | for hl in self.hidden_layers: 74 | hl.lr_multiplier = value[i:i+hl.n_parameters] 75 | i += hl.n_parameters 76 | 77 | def feed_forward(self, input_data, prediction=False): 78 | cache = [] 79 | activations = [input_data] 80 | a = input_data 81 | for hl in self.hidden_layers: 82 | c = hl.feed_forward(a, prediction) 83 | a = c[0] 84 | activations.append(c[0]) 85 | cache.append(c) 86 | 87 | del activations[-1] 88 | return a, (activations, cache) 89 | 90 | def backprop(self, input_data, df_output, cache=None): 91 | if cache is None: 92 | _, (activations, cache) = self.feed_forward(input_data, False) 93 | else: 94 | _, (activations, cache) = cache 95 | 96 | df_param = [] 97 | df_input = df_output 98 | for hl, a, c in zip(self.hidden_layers[::-1], activations[::-1], cache[::-1]): 99 | df_p, df_input = hl.backprop(a, df_input, c) 100 | df_param.append(df_p) 101 | 102 | df_param.reverse() 103 | df_param = list(chain.from_iterable(df_param)) 104 | 105 | return df_param, df_input -------------------------------------------------------------------------------- /hebel/layers/dummy_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .hidden_layer import HiddenLayer 18 | 19 | 20 | class DummyLayer(HiddenLayer): 21 | """ This class has no hidden units and simply passes through its 22 | input 23 | """ 24 | 25 | lr_multiplier = [] 26 | n_parameters = 0 27 | l1_penalty_weight = 0. 28 | l2_penalty_weight = 0. 29 | dropout = 0. 30 | 31 | def __init__(self, n_in): 32 | self.n_in = n_in 33 | self.n_units = n_in 34 | 35 | @property 36 | def parameters(self): 37 | return [] 38 | 39 | @parameters.setter 40 | def parameters(self, value): 41 | pass 42 | 43 | def update_parameters(self, values, stream=None): 44 | pass 45 | 46 | @property 47 | def l1_penalty(self): 48 | return 0. 49 | 50 | @property 51 | def l2_penalty(self): 52 | return 0. 53 | 54 | def feed_forward(self, input_data, prediction=False): 55 | if input_data.shape[1] != self.n_in: 56 | raise ValueError('Number of outputs from previous layer (%d) ' 57 | 'does not match number of inputs to this layer (%d)' % 58 | (input_data.shape[1], self.n_in)) 59 | return (input_data,) 60 | 61 | def backprop(self, input_data, df_output, cache=None): 62 | return tuple(), df_output 63 | -------------------------------------------------------------------------------- /hebel/layers/flattening_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | import numpy as np 18 | from . import HiddenLayer 19 | 20 | class FlatteningLayer(HiddenLayer): 21 | n_parameters = 0 22 | lr_multiplier = [] 23 | 24 | def __init__(self, n_in, n_filters, 25 | l1_penalty_weight=0., l2_penalty_weight=0.): 26 | self.n_in = n_in 27 | self.n_filters = n_filters 28 | self.n_units = n_in * n_filters 29 | 30 | self.l1_penalty_weight = 0. 31 | self.l2_penalty_weight = 0. 32 | 33 | def feed_forward(self, input_data, prediction=False): 34 | N = input_data.shape[0] 35 | return input_data.reshape((N, self.n_units)), None 36 | 37 | def backprop(self, input_data, df_output, cache=None): 38 | N = input_data.shape[0] 39 | return tuple(), df_output.reshape((N, self.n_in, self.n_filters)) 40 | 41 | @property 42 | def parameters(self): 43 | return [] 44 | 45 | @parameters.setter 46 | def parameters(self, value): 47 | pass 48 | 49 | def update_parameters(self, values, stream=None): 50 | pass 51 | 52 | @property 53 | def l1_penalty(self): 54 | return 0. 55 | 56 | @property 57 | def l2_penalty(self): 58 | return 0. 59 | -------------------------------------------------------------------------------- /hebel/layers/input_dropout.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | import numpy as np 18 | import cPickle 19 | from pycuda import gpuarray 20 | from .dummy_layer import DummyLayer 21 | from .. import memory_pool 22 | from ..pycuda_ops.elementwise import sample_dropout_mask, \ 23 | apply_dropout_mask 24 | from ..pycuda_ops.matrix import add_vec_to_mat 25 | from ..pycuda_ops.reductions import matrix_sum_out_axis 26 | 27 | class InputDropout(DummyLayer): 28 | r"""This layer performs dropout on the input data. 29 | 30 | It does not have any learnable parameters of its own. It should be 31 | used as the first layer and will perform dropout with any dropout 32 | probability on the incoming data. 33 | 34 | **Parameters:** 35 | 36 | n_in : integer 37 | Number of input units. 38 | 39 | dropout_probability : float in [0, 1) 40 | Probability of dropping out each input during training. Default is 0.2. 41 | 42 | compute_input_gradients : Bool 43 | Whether to compute the gradients with respect to the input 44 | data. This only necessary if you're training a model where the 45 | input itself is learned. 46 | 47 | """ 48 | 49 | def __init__(self, n_in, dropout_probability=.2, 50 | compute_input_gradients=False): 51 | self.n_in = n_in 52 | self.n_units = n_in 53 | 54 | assert dropout_probability >= 0. and \ 55 | dropout_probability <= 1. 56 | self.dropout_probability = dropout_probability 57 | self.compute_input_gradients = compute_input_gradients 58 | 59 | def feed_forward(self, input_data, prediction=False): 60 | """Propagate forward through the layer 61 | 62 | **Parameters:** 63 | 64 | input_data : ``GPUArray`` 65 | Inpute data to perform dropout on. 66 | 67 | prediction : bool, optional 68 | Whether to use prediction model. If true, then the data is 69 | scaled by ``1 - dropout_probability`` uses dropout. 70 | 71 | **Returns:** 72 | 73 | dropout_data : ``GPUArray`` 74 | The data after performing dropout. 75 | """ 76 | 77 | if input_data.shape[1] != self.n_in: 78 | raise ValueError('Number of outputs from previous layer (%d) ' 79 | 'does not match number of inputs to this layer (%d)' % 80 | (input_data.shape[1], self.n_in)) 81 | 82 | if not prediction: 83 | dropout_input = gpuarray.empty_like(input_data) 84 | dropout_mask = sample_dropout_mask(input_data, 85 | self.dropout_probability, target=dropout_input 86 | ) 87 | return dropout_input, dropout_mask 88 | else: 89 | return (input_data * (1 - self.dropout_probability),) 90 | 91 | def backprop(self, input_data, df_output, cache=None): 92 | """ Backpropagate through the hidden layer 93 | 94 | **Parameters:** 95 | 96 | input_data : ``GPUArray`` 97 | Inpute data to perform dropout on. 98 | 99 | df_output : ``GPUArray`` 100 | Gradients with respect to the output of this layer 101 | (received from the layer above). 102 | 103 | cache : list of ``GPUArray`` 104 | Cache obtained from forward pass. If the cache is 105 | provided, then the activations are not recalculated. 106 | 107 | **Returns:** 108 | 109 | gradients : empty tuple 110 | Gradients are empty since this layer has no parameters. 111 | 112 | df_input : ``GPUArray`` 113 | Gradients with respect to the input. 114 | """ 115 | 116 | if self.compute_input_gradients: 117 | apply_dropout_mask(df_output, dropout_mask) 118 | 119 | return tuple(), df_output 120 | -------------------------------------------------------------------------------- /hebel/layers/linear_regression_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | import numpy as np 18 | from pycuda import gpuarray, cumath 19 | from math import sqrt 20 | from .. import sampler, memory_pool 21 | from .softmax_layer import SoftmaxLayer 22 | from ..pycuda_ops.elementwise import sign, nan_to_zeros 23 | from ..pycuda_ops.reductions import matrix_sum_out_axis 24 | from ..pycuda_ops.matrix import add_vec_to_mat 25 | from ..pycuda_ops import linalg 26 | 27 | 28 | class LinearRegressionLayer(SoftmaxLayer): 29 | r"""Linear regression layer with linear outputs and squared loss error function. 30 | 31 | **Parameters:** 32 | 33 | n_in : integer 34 | Number of input units. 35 | 36 | n_out : integer 37 | Number of output units (classes). 38 | 39 | parameters : array_like of ``GPUArray`` 40 | Parameters used to initialize the layer. If this is omitted, 41 | then the weights are initalized randomly using *Bengio's rule* 42 | (uniform distribution with scale :math:`4 \cdot \sqrt{6 / 43 | (\mathtt{n\_in} + \mathtt{n\_out})}`) and the biases are 44 | initialized to zero. If ``parameters`` is given, then is must 45 | be in the form ``[weights, biases]``, where the shape of 46 | weights is ``(n_in, n_out)`` and the shape of ``biases`` is 47 | ``(n_out,)``. Both weights and biases must be ``GPUArray``. 48 | 49 | weights_scale : float, optional 50 | If ``parameters`` is omitted, then this factor is used as 51 | scale for initializing the weights instead of *Bengio's rule*. 52 | 53 | l1_penalty_weight : float, optional 54 | Weight used for L1 regularization of the weights. 55 | 56 | l2_penalty_weight : float, optional 57 | Weight used for L2 regularization of the weights. 58 | 59 | lr_multiplier : float, optional 60 | If this parameter is omitted, then the learning rate for the 61 | layer is scaled by :math:`2 / \sqrt{\mathtt{n\_in}}`. You may 62 | specify a different factor here. 63 | 64 | test_error_fct : {``class_error``, ``kl_error``, ``cross_entropy_error``}, optional 65 | Which error function to use on the test set. Default is 66 | ``class_error`` for classification error. Other choices are 67 | ``kl_error``, the Kullback-Leibler divergence, or 68 | ``cross_entropy_error``. 69 | 70 | **See also:** 71 | 72 | :class:`hebel.models.NeuralNetRegression`, 73 | :class:`hebel.models.NeuralNet`, 74 | :class:`hebel.layers.LogisticLayer` 75 | 76 | """ 77 | 78 | 79 | n_parameters = 2 80 | 81 | def __init__(self, n_in, n_out, 82 | parameters=None, 83 | weights_scale=None, 84 | l1_penalty_weight=0., 85 | l2_penalty_weight=0., 86 | lr_multiplier=None): 87 | 88 | # Initialize weight using Bengio's rule 89 | self.weights_scale = 4 * sqrt(6. / (n_in + n_out)) \ 90 | if weights_scale is None \ 91 | else weights_scale 92 | 93 | if parameters is not None: 94 | self.W, self.b = parameters 95 | else: 96 | self.W = gpuarray.empty((n_in, n_out), dtype=np.float32, 97 | allocator=memory_pool.allocate) 98 | sampler.fill_uniform(self.W) 99 | self.W = self.weights_scale * (self.W -.5) 100 | 101 | self.b = gpuarray.zeros((n_out,), dtype=np.float32, 102 | allocator=memory_pool.allocate) 103 | 104 | self.n_in = n_in 105 | self.n_out = n_out 106 | 107 | self.l1_penalty_weight = l1_penalty_weight 108 | self.l2_penalty_weight = l2_penalty_weight 109 | 110 | self.lr_multiplier = 2 * [1. / np.sqrt(n_in, dtype=np.float32)] \ 111 | if lr_multiplier is None else lr_multiplier 112 | 113 | def feed_forward(self, input_data, prediction=False): 114 | """Propagate forward through the layer. 115 | 116 | **Parameters:** 117 | 118 | input_data : ``GPUArray`` 119 | Inpute data to compute activations for. 120 | 121 | prediction : bool, optional 122 | Whether to use prediction model. Only relevant when using 123 | dropout. If true, then weights are multiplied by 124 | 1 - dropout if the layer uses dropout. 125 | 126 | **Returns:** 127 | 128 | activations : ``GPUArray`` 129 | The activations of the output units. 130 | """ 131 | 132 | if input_data.shape[1] != self.W.shape[0]: 133 | raise ValueError('Number of outputs from previous layer (%d) ' 134 | 'does not match number of inputs to this layer (%d)' % 135 | (input_data.shape[1], self.W.shape[0])) 136 | 137 | activations = linalg.dot(input_data, self.W) 138 | activations = add_vec_to_mat(activations, self.b, inplace=True) 139 | 140 | return activations 141 | 142 | def test_error(self, input_data, targets, average=True, 143 | cache=None, prediction=True): 144 | """Compute the test error function given some data and targets. 145 | 146 | Uses the error function defined in 147 | :class:`SoftmaxLayer.test_error_fct`, which may be different 148 | from the cross-entropy error function used for 149 | training'. Alternatively, the other test error functions may 150 | be called directly. 151 | 152 | **Parameters:** 153 | 154 | input_data : ``GPUArray`` 155 | Inpute data to compute the test error function for. 156 | 157 | targets : ``GPUArray`` 158 | The target values of the units. 159 | 160 | average : bool 161 | Whether to divide the value of the error function by the 162 | number of data points given. 163 | 164 | cache : list of ``GPUArray`` 165 | Cache obtained from forward pass. If the cache is 166 | provided, then the activations are not recalculated. 167 | 168 | prediction : bool, optional 169 | Whether to use prediction model. Only relevant when using 170 | dropout. If true, then weights are multiplied by 171 | 1 - dropout if the layer uses dropout. 172 | 173 | **Returns:** 174 | test_error : float 175 | """ 176 | 177 | return self.squared_loss(input_data, targets, average, 178 | cache, prediction) 179 | 180 | def squared_loss(self, input_data, targets, average=True, 181 | cache=None, prediction=False): 182 | if cache is not None: 183 | activations = cache 184 | else: 185 | activations = \ 186 | self.feed_forward(input_data, prediction=prediction) 187 | 188 | loss = gpuarray.sum( 189 | matrix_sum_out_axis((targets - activations) ** 2, 1)) 190 | 191 | if average: loss = loss.mean() 192 | return loss.get() 193 | train_error = squared_loss 194 | -------------------------------------------------------------------------------- /hebel/layers/multi_column_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .. import memory_pool 18 | from . import HiddenLayer, Column 19 | from pycuda import gpuarray 20 | import numpy as np 21 | from ..pycuda_ops.matrix import insert_columns, extract_columns 22 | from itertools import chain 23 | 24 | class MultiColumnLayer(HiddenLayer): 25 | l1_penalty_weight = True 26 | l2_penalty_weight = True 27 | 28 | def __init__(self, columns, input_as_list=False): 29 | assert all([isinstance(c, (Column, HiddenLayer)) for c in columns]) 30 | self.columns = columns 31 | self.input_as_list = input_as_list 32 | self._setup_weight_sharing() 33 | 34 | def _setup_weight_sharing(self): 35 | i = 0 36 | shared_idx = [] 37 | master_layers = [hl for column in self.columns 38 | for hl in column.hidden_layers if hl.is_master_layer] 39 | master_param_idx = [] 40 | for column in self.columns: 41 | for hl in column.hidden_layers: 42 | if not hl.is_master_layer: 43 | ml = hl.master_layer 44 | ml_idx = master_layers.index(ml) 45 | idx_start = sum(hl.n_parameters for hl in master_layers[:ml_idx]) 46 | shared_idx.extend([(n, m) for n, m in 47 | zip(range(i, i+ml.n_parameters), 48 | range(idx_start, idx_start+ml.n_parameters))]) 49 | i += ml.n_parameters 50 | else: 51 | master_param_idx.extend(range(i, i+hl.n_parameters)) 52 | i += hl.n_parameters 53 | self.master_param_idx = master_param_idx 54 | self.shared_idx = shared_idx 55 | 56 | @property 57 | def n_in(self): 58 | return sum(c.n_in for c in self.columns) 59 | 60 | @property 61 | def n_units(self): 62 | return sum(c.n_units for c in self.columns) 63 | 64 | @property 65 | def lr_multiplier(self): 66 | return tuple(chain.from_iterable((c.lr_multiplier for c in self.columns))) 67 | 68 | @property 69 | def n_parameters(self): 70 | return sum(c.n_parameters for c in self.columns) 71 | 72 | @property 73 | def parameters(self): 74 | return tuple(chain.from_iterable((c.parameters for c in self.columns))) 75 | 76 | @parameters.setter 77 | def parameters(self, value): 78 | assert len(value) == self.n_parameters 79 | 80 | i = 0 81 | for c in self.columns: 82 | c.parameters = value[i:i+c.n_parameters] 83 | i += c.n_parameters 84 | 85 | def update_parameters(self, values, stream=None): 86 | assert len(values) == self.n_parameters 87 | 88 | i = 0 89 | for c in self.columns: 90 | c.update_parameters(values[i:i+c.n_parameters]) 91 | i += c.n_parameters 92 | 93 | @property 94 | def l1_penalty(self): 95 | return sum(c.l1_penalty for c in self.columns if c.l1_penalty_weight) 96 | 97 | @property 98 | def l2_penalty(self): 99 | return sum(c.l2_penalty for c in self.columns if c.l2_penalty_weight) 100 | 101 | @property 102 | def lr_multiplier(self): 103 | return [lr for column in 104 | self.columns 105 | for lr in column.lr_multiplier] 106 | 107 | @lr_multiplier.setter 108 | def lr_multiplier(self, value): 109 | assert len(value) == self.n_parameters 110 | 111 | i = 0 112 | for column in self.columns: 113 | column.lr_multiplier = value[i:i+column.n_parameters] 114 | i += column.n_parameters 115 | 116 | def feed_forward(self, input_data, prediction=False): 117 | if self.input_as_list: 118 | return self._feed_forward_list(input_data, prediction) 119 | else: 120 | return self._feed_forward_array(input_data, prediction) 121 | 122 | def _feed_forward_list(self, input_data, prediction=False): 123 | output = gpuarray.empty((input_data[0].shape[0], self.n_units), np.float32, 124 | allocator=memory_pool.allocate) 125 | cache = [] 126 | i_out = 0 127 | for column, input_column in zip(self.columns, input_data): 128 | c = column.feed_forward(input_column, prediction) 129 | cache.append((input_column, c)) 130 | insert_columns(c[0], output, i_out) 131 | i_out += column.n_units 132 | 133 | return output, cache 134 | 135 | def _feed_forward_array(self, input_data, prediction=False): 136 | output = gpuarray.empty((input_data.shape[0], self.n_units), np.float32, 137 | allocator=memory_pool.allocate) 138 | cache = [] 139 | i_in = 0 140 | i_out = 0 141 | for column in self.columns: 142 | input_column = extract_columns(input_data, i_in, i_in + column.n_in) 143 | c = column.feed_forward(input_column, prediction) 144 | cache.append((input_column, c)) 145 | insert_columns(c[0], output, i_out) 146 | i_in += column.n_in 147 | i_out += column.n_units 148 | 149 | return output, cache 150 | 151 | def backprop(self, input_data, df_output, cache=None): 152 | if cache is None: 153 | _, cache = self.feed_forward(input_data, False) 154 | else: 155 | cache = cache[1] 156 | 157 | df_params = [] 158 | df_input = [] 159 | i = 0 160 | for column, cache_column in zip(self.columns, cache): 161 | df_output_column = extract_columns(df_output, i, i + column.n_units) 162 | df_params_column, df_input_column = column.backprop(cache_column[0], df_output_column, cache_column[1]) 163 | df_params.extend(df_params_column) 164 | df_input.append(df_input_column) 165 | i += column.n_units 166 | 167 | df_params_master = [df_params[idx] for idx in self.master_param_idx] 168 | for slave_idx, master_idx in self.shared_idx: 169 | df_params_master[master_idx] += df_params[slave_idx] 170 | 171 | del df_params 172 | 173 | if not self.input_as_list: 174 | df_input_list = df_input 175 | df_input = gpuarray.empty(input_data.shape, np.float32, 176 | allocator=memory_pool.allocate) 177 | 178 | i = 0 179 | for dfi, column in zip(df_input_list, self.columns): 180 | insert_columns(dfi, df_input, i) 181 | i += column.n_in 182 | 183 | return df_params_master, df_input 184 | -------------------------------------------------------------------------------- /hebel/layers/top_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .hidden_layer import HiddenLayer 18 | 19 | 20 | class TopLayer(HiddenLayer): 21 | """Abstract base class for a top-level layer.""" 22 | 23 | n_tasks = 1 24 | -------------------------------------------------------------------------------- /hebel/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .logistic_regression import LogisticRegression 18 | from .multitask_neural_net import MultitaskNeuralNet 19 | from .neural_net import NeuralNet 20 | from .neural_net_regression import NeuralNetRegression 21 | from .model import Model 22 | -------------------------------------------------------------------------------- /hebel/models/logistic_regression.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .neural_net import NeuralNet 18 | 19 | 20 | class LogisticRegression(NeuralNet): 21 | """ A logistic regression model 22 | 23 | """ 24 | 25 | def __init__(self, n_in, n_out, test_error_fct='class_error'): 26 | super(LogisticRegression, self).\ 27 | __init__(n_in, n_out, [], 28 | test_error_fct=test_error_fct) 29 | -------------------------------------------------------------------------------- /hebel/models/model.py: -------------------------------------------------------------------------------- 1 | class Model(object): 2 | """ Abstract base-class for a Hebel model 3 | """ 4 | 5 | def __init__(self): 6 | raise NotImplementedError 7 | 8 | @property 9 | def parameters(self): 10 | raise NotImplementedError 11 | 12 | @parameters.setter 13 | def parameters(self, value): 14 | raise NotImplementedError 15 | 16 | def update_parameters(self, value): 17 | raise NotImplementedError 18 | 19 | def evaluate(self, input_data, targets, 20 | return_cache=False, prediction=True): 21 | """ Evaluate the loss function without computing gradients 22 | """ 23 | 24 | raise NotImplementedError 25 | 26 | def training_pass(self, input_data, targets): 27 | """ Perform a full forward and backward pass through the model 28 | """ 29 | 30 | raise NotImplementedError 31 | 32 | def test_error(self, input_data, targets, average=True, cache=None): 33 | """ Evaulate performance on a test set 34 | 35 | """ 36 | raise NotImplementedError 37 | 38 | def feed_forward(self, input_data, return_cache=False, prediction=True): 39 | """ Get predictions from the model 40 | """ 41 | 42 | raise NotImplementedError 43 | -------------------------------------------------------------------------------- /hebel/models/multitask_neural_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .neural_net import NeuralNet 18 | from ..layers import MultitaskTopLayer 19 | 20 | class MultitaskNeuralNet(NeuralNet): 21 | TopLayerClass = MultitaskTopLayer 22 | -------------------------------------------------------------------------------- /hebel/models/neural_net_regression.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .neural_net import NeuralNet 18 | from ..layers import LinearRegressionLayer 19 | 20 | class NeuralNetRegression(NeuralNet): 21 | """A neural network for regression using the squared error loss 22 | function. 23 | 24 | This class exists for convenience. The same results can be 25 | achieved by creating a :class:`hebel.models.NeuralNet` instance 26 | and passing a :class:`hebel.layers.LinearRegressionLayer` instance 27 | as the ``top_layer`` argument. 28 | 29 | **Parameters:** 30 | 31 | layers : array_like 32 | An array of either integers or instances of 33 | :class:`hebel.models.HiddenLayer` objects. If integers are 34 | given, they represent the number of hidden units in each layer 35 | and new ``HiddenLayer`` objects will be created. If 36 | ``HiddenLayer`` instances are given, the user must make sure 37 | that each ``HiddenLayer`` has ``n_in`` set to the preceding 38 | layer's ``n_units``. If ``HiddenLayer`` instances are passed, 39 | then ``activation_function``, ``dropout``, ``n_in``, 40 | ``l1_penalty_weight``, and ``l2_penalty_weight`` are ignored. 41 | 42 | top_layer : :class:`hebel.models.TopLayer` instance, optional 43 | If ``top_layer`` is given, then it is used for the output 44 | layer, otherwise, a ``LinearRegressionLayer`` instance is created. 45 | 46 | activation_function : {'sigmoid', 'tanh', 'relu', or 'linear'}, optional 47 | The activation function to be used in the hidden layers. 48 | 49 | dropout : float in [0, 1) 50 | Probability of dropping out each hidden unit during training. Default is 0. 51 | 52 | n_in : integer, optional 53 | The dimensionality of the input. Must be given, if the first 54 | hidden layer is not passed as a 55 | :class:`hebel.models.HiddenLayer` instance. 56 | 57 | n_out : integer, optional 58 | The number of classes to predict from. Must be given, if a 59 | :class:`hebel.models.HiddenLayer` instance is not given in 60 | ``top_layer``. 61 | 62 | l1_penalty_weight : float, optional 63 | Weight for L1 regularization 64 | 65 | l2_penalty_weight : float, optional 66 | Weight for L2 regularization 67 | 68 | kwargs : optional 69 | Any additional arguments are passed on to ``top_layer`` 70 | 71 | **See also:** 72 | 73 | :class:`hebel.models.NeuralNet`, 74 | :class:`hebel.models.MultitaskNeuralNet`, 75 | :class:`hebel.layers.LinearRegressionLayer` 76 | 77 | """ 78 | TopLayerClass = LinearRegressionLayer 79 | -------------------------------------------------------------------------------- /hebel/monitors.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """ Implements monitors that report on the progress of training, such 18 | as error rates and parameters. Currently, we just have 19 | SimpleProgressMonitor, which simply prints the current error to the 20 | shell. 21 | 22 | """ 23 | 24 | import numpy as np 25 | import time, cPickle, os, sys 26 | from datetime import datetime 27 | 28 | class ProgressMonitor(object): 29 | def __init__(self, experiment_name=None, save_model_path=None, 30 | save_interval=None, output_to_log=False, 31 | model=None, make_subdir=True): 32 | 33 | self.experiment_name = experiment_name 34 | self.save_model_path = save_model_path 35 | self.save_interval = save_interval 36 | self.output_to_log = output_to_log 37 | self.model = model 38 | 39 | self.train_error = [] 40 | self.validation_error = [] 41 | self.avg_epoch_t = None 42 | self._time = datetime.now().strftime('%Y-%m-%dT%H-%M-%S') 43 | 44 | self.epochs = 0 45 | 46 | self.makedir(make_subdir) 47 | 48 | def print_(self, obj): 49 | if self.log is not None: 50 | self.log.write(str(obj) + '\n') 51 | print obj 52 | sys.stdout.flush() 53 | 54 | @property 55 | def yaml_config(self): 56 | return self._yaml_config 57 | 58 | @yaml_config.setter 59 | def yaml_config(self, yaml_config): 60 | if yaml_config is not None: 61 | self._yaml_config = yaml_config 62 | yaml_path = os.path.join(self.save_path, 'yaml_config.yml') 63 | f = open(yaml_path, 'w') 64 | f.write(self._yaml_config) 65 | self._yaml_config = yaml_config 66 | 67 | @property 68 | def test_error(self): 69 | return self._test_error 70 | 71 | @test_error.setter 72 | def test_error(self, test_error): 73 | self._test_error = test_error 74 | self.print_("Test error: %.4f" % test_error) 75 | f = open(os.path.join(self.save_path, "test_error"), 'w') 76 | f.write('%.5f\n' % test_error) 77 | 78 | def makedir(self, make_subdir=True): 79 | if make_subdir: 80 | experiment_dir_name = '_'.join(( 81 | self.experiment_name, 82 | datetime.now().strftime('%Y-%m-%dT%H-%M-%S'))) 83 | 84 | path = os.path.join(self.save_model_path, 85 | experiment_dir_name) 86 | else: 87 | path = self.save_model_path 88 | if not os.path.exists(path): 89 | os.makedirs(path) 90 | self.save_path = path 91 | 92 | if self.output_to_log: 93 | self.log = open(os.path.join(self.save_path, 'output.log'), 'w', 1) 94 | # sys.stdout = self.log 95 | # sys.stderr = self.log 96 | 97 | def start_training(self): 98 | self.start_time = datetime.now() 99 | 100 | def report(self, epoch, train_error, validation_error=None, 101 | new_best=None, epoch_t=None): 102 | # Print logs 103 | self.train_error.append((epoch, train_error)) 104 | if validation_error is not None: 105 | self.validation_error.append((epoch, validation_error)) 106 | self.print_error(epoch, train_error, validation_error, new_best) 107 | 108 | if epoch_t is not None: 109 | self.avg_epoch_t = ((epoch - 1) * \ 110 | self.avg_epoch_t + epoch_t) / epoch \ 111 | if self.avg_epoch_t is not None else epoch_t 112 | 113 | # Pickle model 114 | if self.save_interval is not None: 115 | if not epoch % self.save_interval: 116 | filename = 'model_%s_epoch%04d.pkl' % ( 117 | self.experiment_name, 118 | epoch) 119 | path = os.path.join(self.save_path, filename) 120 | cPickle.dump(self.model, open(path, 'wb')) 121 | elif new_best is not None and new_best: 122 | filename = 'model_%s_current_best.pkl' % self.experiment_name 123 | path = os.path.join(self.save_path, filename) 124 | cPickle.dump(self.model, open(path, 'wb')) 125 | 126 | def print_error(self, epoch, train_error, validation_error=None, new_best=None): 127 | if validation_error is not None: 128 | report_str = 'Epoch %d, Validation error: %.5g, Train Loss: %.3f' % \ 129 | (epoch, validation_error, train_error) 130 | if new_best is not None and new_best: 131 | report_str = '* ' + report_str 132 | else: 133 | report_str = 'Epoch %d, Train Loss: %.3f' % \ 134 | (epoch, train_error) 135 | self.print_(report_str) 136 | 137 | def avg_weight(self): 138 | self.print_("\nAvg weights:") 139 | 140 | i = 0 141 | for param in self.model.parameters: 142 | if len(param.shape) != 2: continue 143 | param_cpu = np.abs(param.get()) 144 | mean_weight = param_cpu.mean() 145 | std_weight = param_cpu.std() 146 | self.print_('Layer %d: %.4f [%.4f]' % (i, mean_weight, std_weight)) 147 | 148 | i += 1 149 | 150 | def finish_training(self): 151 | # Print logs 152 | end_time = datetime.now() 153 | self.train_time = end_time - self.start_time 154 | self.print_("Runtime: %dm %ds" % (self.train_time.total_seconds() // 60, 155 | self.train_time.total_seconds() % 60)) 156 | self.print_("Avg. time per epoch %.2fs" % self.avg_epoch_t) 157 | 158 | # Pickle model 159 | filename = 'model_%s_final.pkl' % self.experiment_name 160 | path = os.path.join(self.save_path, filename) 161 | self.print_("Saving model to %s" % path) 162 | cPickle.dump(self.model, open(path, 'wb')) 163 | if self.save_interval is None: 164 | os.remove(os.path.join( 165 | self.save_path, 'model_%s_current_best.pkl' % self.experiment_name)) 166 | 167 | def __del__(self): 168 | if self.output_to_log: 169 | self.log.close() 170 | 171 | 172 | class SimpleProgressMonitor(object): 173 | def __init__(self, model=None): 174 | self.model = model 175 | 176 | self.train_error = [] 177 | self.validation_error = [] 178 | self.avg_epoch_t = None 179 | self._time = datetime.now().strftime('%Y-%m-%dT%H-%M-%S') 180 | 181 | def start_training(self): 182 | self.start_time = datetime.now() 183 | 184 | def report(self, epoch, train_error, validation_error=None, 185 | new_best=None, epoch_t=None): 186 | self.train_error.append((epoch, train_error)) 187 | if validation_error is not None: 188 | self.validation_error.append((epoch, validation_error)) 189 | 190 | # Print logs 191 | self.print_error(epoch, train_error, validation_error, new_best) 192 | 193 | if epoch_t is not None and epoch > 0: 194 | self.avg_epoch_t = ((epoch - 1) * \ 195 | self.avg_epoch_t + epoch_t) / epoch \ 196 | if self.avg_epoch_t is not None else epoch_t 197 | sys.stdout.flush() 198 | 199 | def print_error(self, epoch, train_error, validation_error=None, new_best=None): 200 | if validation_error is not None: 201 | report_str = 'Epoch %d, Validation error: %.5g, Train Loss: %.3f' % \ 202 | (epoch, validation_error, train_error) 203 | if new_best is not None and new_best: 204 | report_str = '* ' + report_str 205 | print report_str 206 | else: 207 | print 'Epoch %d, Train Loss: %.3f' % \ 208 | (epoch, train_error) 209 | 210 | def avg_weight(self): 211 | print "\nAvg weights:" 212 | 213 | i = 0 214 | for param in self.model.parameters: 215 | if len(param.shape) != 2: continue 216 | param_cpu = np.abs(param.get()) 217 | mean_weight = param_cpu.mean() 218 | std_weight = param_cpu.std() 219 | print 'Layer %d: %.4f [%.4f]' % (i, mean_weight, std_weight) 220 | i += 1 221 | sys.stdout.flush() 222 | 223 | def finish_training(self): 224 | # Print logs 225 | end_time = datetime.now() 226 | self.train_time = end_time - self.start_time 227 | print "Runtime: %dm %ds" % (self.train_time.total_seconds() // 60, 228 | self.train_time.total_seconds() % 60) 229 | print "Avg. time per epoch %.2fs" % self.avg_epoch_t 230 | sys.stdout.flush() 231 | 232 | 233 | class DummyProgressMonitor(object): 234 | def __init__(self, model=None): 235 | self.model = model 236 | 237 | def start_training(self): 238 | pass 239 | 240 | def report(self, epoch, train_error, validation_error=None, 241 | new_best=None, epoch_t=None): 242 | pass 243 | 244 | def finish_training(self): 245 | pass 246 | -------------------------------------------------------------------------------- /hebel/optimizers.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """ Implements optimization algorithms to train the models. The single 18 | algorithm we have in online stochastic gradient descent (SGD). 19 | 20 | """ 21 | 22 | import numpy as np 23 | import time, cPickle, sys, os, inspect 24 | from .pycuda_ops.matrix import vector_normalize 25 | from .schedulers import constant_scheduler 26 | from .monitors import SimpleProgressMonitor, DummyProgressMonitor 27 | from . import memory_pool 28 | from pycuda._driver import MemoryError 29 | 30 | 31 | class EarlyStoppingModule(object): 32 | def __init__(self, model, verbose): 33 | self.model = model 34 | self.best_validation_loss = np.inf 35 | self.verbose = verbose 36 | 37 | def update(self, epoch, validation_loss): 38 | if validation_loss < self.best_validation_loss: 39 | self.best_validation_loss = validation_loss 40 | try: 41 | del self.best_model 42 | except AttributeError: 43 | pass 44 | 45 | try: 46 | self.best_model = cPickle.dumps(self.model) 47 | except MemoryError: 48 | memory_pool.free_held() 49 | self.best_model = cPickle.dumps(self.model) 50 | 51 | # assert self.best_model[0] is not self.model.parameters[0] 52 | self.best_epoch = epoch 53 | return True 54 | return False 55 | 56 | def finish(self): 57 | # self.model.parameters = self.best_model 58 | try: 59 | self.model = cPickle.loads(self.best_model) 60 | except AttributeError: 61 | # Training has not yet reached the first validation epoch, so there is no self.best_model 62 | return 63 | if self.verbose: 64 | print "Optimization complete. " \ 65 | "Best validation error of %.5g obtained in self.epoch %d" % \ 66 | (self.best_validation_loss, self.best_epoch) 67 | 68 | 69 | class SGD(object): 70 | @property 71 | def best_validation_loss(self): 72 | return self.early_stopping_module.best_validation_loss 73 | 74 | def __init__(self, 75 | model, parameter_updater, 76 | train_data, 77 | validation_data=None, 78 | progress_monitor=None, 79 | learning_rate_schedule=constant_scheduler(.1), 80 | momentum_schedule=None, 81 | early_stopping=True, 82 | verbose=True): 83 | 84 | """ Stochastic gradient descent 85 | """ 86 | 87 | ### Initialization 88 | 89 | self.model = model 90 | 91 | ### Training data 92 | self.train_data = train_data 93 | 94 | ### Validation data 95 | self.validation_data = validation_data 96 | 97 | ### Data size 98 | self.N_train = self.train_data.N 99 | 100 | if validation_data is not None: 101 | self.N_validation = self.validation_data.N 102 | 103 | ### Learning rate schedule 104 | self.learning_parameter_iterators = [learning_rate_schedule] 105 | 106 | ### Momentum, rmsprop, etc 107 | 108 | self.parameter_updater = parameter_updater(self.model) 109 | 110 | if momentum_schedule is not None: 111 | self.learning_parameter_iterators.append(momentum_schedule) 112 | 113 | if progress_monitor is None: 114 | if verbose: 115 | self.progress_monitor = SimpleProgressMonitor(model=self.model) 116 | else: 117 | self.progress_monitor = DummyProgressMonitor() 118 | else: 119 | self.progress_monitor = progress_monitor 120 | 121 | if self.progress_monitor.model is None: 122 | self.progress_monitor.model = self.model 123 | 124 | self.early_stopping = early_stopping 125 | self.verbose = verbose 126 | self.epoch = 0 127 | 128 | def run(self, iterations=200, validation_interval=5, 129 | yaml_config=None, 130 | task_id=None): 131 | 132 | self.early_stopping_module = EarlyStoppingModule(self.model, self.verbose) \ 133 | if self.early_stopping else None 134 | 135 | keyboard_interrupt = False 136 | 137 | self.progress_monitor.start_training() 138 | 139 | self.progress_monitor.task_id = task_id 140 | self.progress_monitor.yaml_config = yaml_config 141 | 142 | # Main loop 143 | for self.epoch in range(self.epoch + 1, self.epoch + iterations + 1): 144 | learning_parameters = map(lambda lp: lp.next(), 145 | self.learning_parameter_iterators) 146 | if keyboard_interrupt: break 147 | 148 | try: 149 | t = time.time() 150 | 151 | # Train on mini-batches 152 | train_loss = 0. 153 | 154 | for batch_idx, (batch_data, batch_targets) in \ 155 | enumerate(self.train_data): 156 | batch_size = self.train_data.batch_size 157 | 158 | self.parameter_updater.pre_gradient_update() 159 | 160 | batch_loss, gradients = \ 161 | self.model.training_pass(batch_data, batch_targets) 162 | train_loss += batch_loss 163 | self.parameter_updater\ 164 | .post_gradient_update(gradients, batch_size, 165 | learning_parameters) 166 | 167 | # Evaluate on validation data 168 | if self.validation_data is not None and \ 169 | not self.epoch % validation_interval: 170 | validation_loss_rate = self.model.test_error( 171 | self.validation_data) 172 | # validation_loss = 0. 173 | # for batch_idx, (batch_data, batch_targets) in \ 174 | # enumerate(self.validation_data): 175 | 176 | # validation_loss += self.model.test_error(batch_data, 177 | # batch_targets, 178 | # average=False) 179 | 180 | # validation_loss_rate = \ 181 | # validation_loss / float(self.N_validation) 182 | 183 | new_best = self.early_stopping_module.update( 184 | self.epoch, validation_loss_rate) \ 185 | if self.early_stopping_module is not None else None 186 | 187 | epoch_t = time.time() - t 188 | 189 | self.progress_monitor.report(self.epoch, train_loss, 190 | validation_loss_rate, 191 | new_best, 192 | epoch_t=epoch_t) 193 | else: 194 | epoch_t = time.time() - t 195 | self.progress_monitor.report(self.epoch, train_loss, 196 | epoch_t=epoch_t) 197 | 198 | except KeyboardInterrupt: 199 | print "Keyboard interrupt. Stopping training and cleaning up." 200 | keyboard_interrupt = True 201 | 202 | if self.early_stopping_module is not None: 203 | self.early_stopping_module.finish() 204 | # self.model = self.early_stopping_module.model 205 | 206 | self.progress_monitor.finish_training() 207 | 208 | if keyboard_interrupt: 209 | sys.exit() 210 | 211 | def norm_v_norm(self): 212 | if self.max_vec_norm: 213 | for w in self.model.parameters: 214 | if len(w.shape) == 2: 215 | vector_normalize(w, self.max_vec_norm) 216 | -------------------------------------------------------------------------------- /hebel/parameter_updaters.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """ Implements different variants of updating the parameters in SGD, 18 | such as momentum and Nesterov momentum. 19 | 20 | """ 21 | 22 | from pycuda import gpuarray 23 | from itertools import izip 24 | 25 | 26 | class ParameterUpdater(object): 27 | def __init__(self, model): 28 | self.model = model 29 | 30 | def pre_gradient_update(self, stream=None): 31 | pass 32 | 33 | def post_gradient_update(self, gradients, stream=None): 34 | pass 35 | 36 | 37 | class SimpleSGDUpdate(ParameterUpdater): 38 | def post_gradient_update(self, gradients, batch_size, 39 | learning_parameters, 40 | stream=None): 41 | learning_rate = learning_parameters[0] 42 | 43 | multiplier = [-lr_mult * learning_rate / batch_size for lr_mult in 44 | self.model.lr_multiplier] 45 | update = zip(gradients, multiplier) 46 | self.model.update_parameters(update) 47 | 48 | 49 | class MomentumUpdate(ParameterUpdater): 50 | def __init__(self, model): 51 | self.model = model 52 | self.velocity = [gpuarray.zeros_like(p) 53 | for p in self.model.parameters] 54 | 55 | def post_gradient_update(self, gradients, batch_size, 56 | learning_parameters, stream=None): 57 | learning_rate, momentum = learning_parameters 58 | 59 | updates = [] 60 | for gparam, vparam, lr_multiplier in \ 61 | izip(gradients, self.velocity, self.model.lr_multiplier): 62 | vparam._axpbyz(momentum, 63 | gparam, -learning_rate * lr_multiplier / batch_size, 64 | vparam, stream=stream) 65 | updates.append((vparam, 1.)) 66 | self.model.update_parameters(updates) 67 | 68 | 69 | class NesterovMomentumUpdate(MomentumUpdate): 70 | def pre_gradient_update(self): 71 | """ First step of Nesterov momentum method: 72 | take step in direction of accumulated gradient 73 | """ 74 | 75 | updates = zip(self.velocity, self.model.n_parameters * [1.]) 76 | self.model.update_parameters(updates) 77 | 78 | def post_gradient_update(self, gradients, batch_size, 79 | learning_parameters, stream=None): 80 | """ Second step of Nesterov momentum method: 81 | take step in direction of new gradient and update velocity 82 | """ 83 | 84 | learning_rate, momentum = learning_parameters 85 | 86 | updates = [] 87 | for param, gparam, vparam, lr_multiplier in \ 88 | izip(self.model.parameters, gradients, 89 | self.velocity, self.model.lr_multiplier): 90 | 91 | updates.append( 92 | (gparam, -learning_rate * lr_multiplier / batch_size)) 93 | # param -= learning_rate*lr_multiplier/batch_size*gparam 94 | # param._axpbyz(1., gparam, -learning_rate*lr_multiplier/batch_size, 95 | # param, stream=stream) 96 | # vparam = momentum*vparam \ 97 | # - learning_rate*lr_multiplier/batch_size*gparam 98 | vparam._axpbyz(momentum, gparam, -learning_rate*lr_multiplier/batch_size, 99 | vparam, stream=stream) 100 | self.model.update_parameters(updates) 101 | -------------------------------------------------------------------------------- /hebel/pycuda_ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | import numpy as np 18 | eps = np.finfo(np.float32).eps 19 | 20 | def init(): 21 | from . import elementwise 22 | from . import matrix 23 | from . import reductions 24 | from . import softmax 25 | from . import linalg 26 | 27 | elementwise.init() 28 | matrix.init() 29 | reductions.init() 30 | # softmax.init() 31 | linalg.init() -------------------------------------------------------------------------------- /hebel/pycuda_ops/cuda.py: -------------------------------------------------------------------------------- 1 | # This file is taken from scikits.cuda (https://github.com/lebedov/scikits.cuda) 2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved. 3 | 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | 8 | # Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # Neither the name of Lev Givon nor the names of any contributors may 14 | # be used to endorse or promote products derived from this software 15 | # without specific prior written permission. THIS SOFTWARE IS 16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | # SUCH DAMAGE. 28 | 29 | #!/usr/bin/env python 30 | 31 | """ 32 | Python interface to CUDA functions. 33 | """ 34 | 35 | from cudart import * 36 | from cudadrv import * 37 | 38 | -------------------------------------------------------------------------------- /hebel/pycuda_ops/cudadrv.py: -------------------------------------------------------------------------------- 1 | # This file is taken from scikits.cuda (https://github.com/lebedov/scikits.cuda) 2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved. 3 | 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | 8 | # Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # Neither the name of Lev Givon nor the names of any contributors may 14 | # be used to endorse or promote products derived from this software 15 | # without specific prior written permission. THIS SOFTWARE IS 16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | # SUCH DAMAGE. 28 | 29 | #!/usr/bin/env python 30 | 31 | """ 32 | Python interface to CUDA driver functions. 33 | """ 34 | 35 | import sys, ctypes 36 | from ctypes.util import find_library 37 | 38 | # Load CUDA driver library: 39 | _libcuda_shortname = 'nvcuda' if sys.platform == 'win32' else 'cuda' 40 | _libcuda_name = find_library(_libcuda_shortname) # on Windows, this is the full path, not just the name 41 | if _libcuda_name is None: 42 | raise OSError('CUDA driver library not found') 43 | _libcuda = ctypes.cdll.LoadLibrary(_libcuda_name) 44 | 45 | # Exceptions corresponding to various CUDA driver errors: 46 | 47 | class CUDA_ERROR(Exception): 48 | """CUDA error.""" 49 | pass 50 | 51 | class CUDA_ERROR_INVALID_VALUE(CUDA_ERROR): 52 | pass 53 | 54 | class CUDA_ERROR_OUT_OF_MEMORY(CUDA_ERROR): 55 | pass 56 | 57 | class CUDA_ERROR_NOT_INITIALIZED(CUDA_ERROR): 58 | pass 59 | 60 | class CUDA_ERROR_DEINITIALIZED(CUDA_ERROR): 61 | pass 62 | 63 | class CUDA_ERROR_PROFILER_DISABLED(CUDA_ERROR): 64 | pass 65 | 66 | class CUDA_ERROR_PROFILER_NOT_INITIALIZED(CUDA_ERROR): 67 | pass 68 | 69 | class CUDA_ERROR_PROFILER_ALREADY_STARTED(CUDA_ERROR): 70 | pass 71 | 72 | class CUDA_ERROR_PROFILER_ALREADY_STOPPED(CUDA_ERROR): 73 | pass 74 | 75 | class CUDA_ERROR_NO_DEVICE(CUDA_ERROR): 76 | pass 77 | 78 | class CUDA_ERROR_INVALID_DEVICE(CUDA_ERROR): 79 | pass 80 | 81 | class CUDA_ERROR_INVALID_IMAGE(CUDA_ERROR): 82 | pass 83 | 84 | class CUDA_ERROR_INVALID_CONTEXT(CUDA_ERROR): 85 | pass 86 | 87 | class CUDA_ERROR_CONTEXT_ALREADY_CURRENT(CUDA_ERROR): 88 | pass 89 | 90 | class CUDA_ERROR_MAP_FAILED(CUDA_ERROR): 91 | pass 92 | 93 | class CUDA_ERROR_UNMAP_FAILED(CUDA_ERROR): 94 | pass 95 | 96 | class CUDA_ERROR_ARRAY_IS_MAPPED(CUDA_ERROR): 97 | pass 98 | 99 | class CUDA_ERROR_ALREADY_MAPPED(CUDA_ERROR): 100 | pass 101 | 102 | class CUDA_ERROR_NO_BINARY_FOR_GPU(CUDA_ERROR): 103 | pass 104 | 105 | class CUDA_ERROR_ALREADY_ACQUIRED(CUDA_ERROR): 106 | pass 107 | 108 | class CUDA_ERROR_NOT_MAPPED(CUDA_ERROR): 109 | pass 110 | 111 | class CUDA_ERROR_NOT_MAPPED_AS_ARRAY(CUDA_ERROR): 112 | pass 113 | 114 | class CUDA_ERROR_NOT_MAPPED_AS_POINTER(CUDA_ERROR): 115 | pass 116 | 117 | class CUDA_ERROR_ECC_UNCORRECTABLE(CUDA_ERROR): 118 | pass 119 | 120 | class CUDA_ERROR_UNSUPPORTED_LIMIT(CUDA_ERROR): 121 | pass 122 | 123 | class CUDA_ERROR_CONTEXT_ALREADY_IN_USE(CUDA_ERROR): 124 | pass 125 | 126 | class CUDA_ERROR_INVALID_SOURCE(CUDA_ERROR): 127 | pass 128 | 129 | class CUDA_ERROR_FILE_NOT_FOUND(CUDA_ERROR): 130 | pass 131 | 132 | class CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND(CUDA_ERROR): 133 | pass 134 | 135 | class CUDA_ERROR_SHARED_OBJECT_INIT_FAILED(CUDA_ERROR): 136 | pass 137 | 138 | class CUDA_ERROR_OPERATING_SYSTEM(CUDA_ERROR): 139 | pass 140 | 141 | class CUDA_ERROR_INVALID_HANDLE(CUDA_ERROR): 142 | pass 143 | 144 | class CUDA_ERROR_NOT_FOUND(CUDA_ERROR): 145 | pass 146 | 147 | class CUDA_ERROR_NOT_READY(CUDA_ERROR): 148 | pass 149 | 150 | 151 | CUDA_EXCEPTIONS = { 152 | 1: CUDA_ERROR_INVALID_VALUE, 153 | 2: CUDA_ERROR_OUT_OF_MEMORY, 154 | 3: CUDA_ERROR_NOT_INITIALIZED, 155 | 4: CUDA_ERROR_DEINITIALIZED, 156 | 5: CUDA_ERROR_PROFILER_DISABLED, 157 | 6: CUDA_ERROR_PROFILER_NOT_INITIALIZED, 158 | 7: CUDA_ERROR_PROFILER_ALREADY_STARTED, 159 | 8: CUDA_ERROR_PROFILER_ALREADY_STOPPED, 160 | 100: CUDA_ERROR_NO_DEVICE, 161 | 101: CUDA_ERROR_INVALID_DEVICE, 162 | 200: CUDA_ERROR_INVALID_IMAGE, 163 | 201: CUDA_ERROR_INVALID_CONTEXT, 164 | 202: CUDA_ERROR_CONTEXT_ALREADY_CURRENT, 165 | 205: CUDA_ERROR_MAP_FAILED, 166 | 206: CUDA_ERROR_UNMAP_FAILED, 167 | 207: CUDA_ERROR_ARRAY_IS_MAPPED, 168 | 208: CUDA_ERROR_ALREADY_MAPPED, 169 | 209: CUDA_ERROR_NO_BINARY_FOR_GPU, 170 | 210: CUDA_ERROR_ALREADY_ACQUIRED, 171 | 211: CUDA_ERROR_NOT_MAPPED, 172 | 212: CUDA_ERROR_NOT_MAPPED_AS_ARRAY, 173 | 213: CUDA_ERROR_NOT_MAPPED_AS_POINTER, 174 | 214: CUDA_ERROR_ECC_UNCORRECTABLE, 175 | 215: CUDA_ERROR_UNSUPPORTED_LIMIT, 176 | 216: CUDA_ERROR_CONTEXT_ALREADY_IN_USE, 177 | 300: CUDA_ERROR_INVALID_SOURCE, 178 | 301: CUDA_ERROR_FILE_NOT_FOUND, 179 | 302: CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, 180 | 303: CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, 181 | 304: CUDA_ERROR_OPERATING_SYSTEM, 182 | 400: CUDA_ERROR_INVALID_HANDLE, 183 | 500: CUDA_ERROR_NOT_FOUND, 184 | 600: CUDA_ERROR_NOT_READY, 185 | } 186 | 187 | def cuCheckStatus(status): 188 | """ 189 | Raise CUDA exception. 190 | 191 | Raise an exception corresponding to the specified CUDA driver 192 | error code. 193 | 194 | Parameters 195 | ---------- 196 | status : int 197 | CUDA driver error code. 198 | 199 | See Also 200 | -------- 201 | CUDA_EXCEPTIONS 202 | 203 | """ 204 | 205 | if status != 0: 206 | try: 207 | raise CUDA_EXCEPTIONS[status] 208 | except KeyError: 209 | raise CUDA_ERROR 210 | 211 | 212 | CU_POINTER_ATTRIBUTE_CONTEXT = 1 213 | CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2 214 | CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3 215 | CU_POINTER_ATTRIBUTE_HOST_POINTER = 4 216 | 217 | _libcuda.cuPointerGetAttribute.restype = int 218 | _libcuda.cuPointerGetAttribute.argtypes = [ctypes.c_void_p, 219 | ctypes.c_int, 220 | ctypes.c_uint] 221 | def cuPointerGetAttribute(attribute, ptr): 222 | data = ctypes.c_void_p() 223 | status = _libcuda.cuPointerGetAttribute(data, attribute, ptr) 224 | cuCheckStatus(status) 225 | return data 226 | -------------------------------------------------------------------------------- /hebel/pycuda_ops/elementwise.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | import numpy as np 18 | from pycuda import gpuarray 19 | from pycuda.elementwise import ElementwiseKernel 20 | from .. import sampler, memory_pool 21 | from .matrix import extract_columns, insert_columns 22 | 23 | class Kernel(object): 24 | """ Defers creation of the ElementwiseKernels until the first 25 | runtime and automatically selects kernels for double and float. 26 | """ 27 | 28 | def __init__(self, name, signature_float, code_float, 29 | signature_double, code_double): 30 | self.name = name 31 | self.kernel_float = ElementwiseKernel(signature_float, code_float, name) 32 | self.kernel_double = ElementwiseKernel(signature_double, code_double, name) 33 | 34 | def __call__(self, *args, **kwargs): 35 | if args[0].dtype == np.float32: 36 | self.kernel_float(*args, **kwargs) 37 | elif args[0].dtype == np.float64: 38 | self.kernel_double(*args, **kwargs) 39 | else: 40 | raise ValueError("Unknown datatype, must be np.float32 or np.float64") 41 | 42 | def get_kernel(self, dtype): 43 | if dtype == np.float32 or dtype == 'float': 44 | return self.kernel_float 45 | elif dtype == np.float64 or dtype == 'double': 46 | return self.kernel_double 47 | else: 48 | raise ValueError("Unknown datatype, must be np.float32 or np.float64") 49 | 50 | all_kernels = None 51 | def init(): 52 | from pycuda import elementwise 53 | 54 | global all_kernels 55 | 56 | all_kernels_code = { 57 | 'sign': { 58 | 'float': ("float *mat, float *target", 59 | "target[i] = (mat[i] > 0.) - (mat[i] < 0);"), 60 | 'double': ("double *mat, double *target", 61 | "target[i] = (mat[i] > 0.) - (mat[i] < 0);") 62 | }, 63 | 64 | 'sigmoid': { 65 | 'float': ("float *mat", 66 | "mat[i] = 1. / (1. + __expf(-mat[i]))",), 67 | 'double': ("double *mat", 68 | "mat[i] = 1. / (1. + exp(-mat[i]))") 69 | }, 70 | 71 | 'df_sigmoid': { 72 | 'float': ("float *mat, float *target", 73 | """const float f = mat[i]; 74 | target[i] = f * (1 - f); 75 | """), 76 | 'double': ("double *mat, double *target", 77 | """const double f = mat[i]; 78 | target[i] = f * (1 - f); 79 | """) 80 | }, 81 | 82 | 'tanh_inplace': { 83 | 'float': ("float *mat", 84 | "mat[i] = tanhf(mat[i]);"), 85 | 'double': ("double *mat", 86 | "mat[i] = tanh(mat[i]);") 87 | }, 88 | 89 | 'df_tanh': { 90 | 'float': ("float *mat, float *target", 91 | """float f = mat[i]; 92 | target[i] = 1 - pow(f, 2);"""), 93 | 'double': ("double *mat, double *target", 94 | """double f = mat[i]; 95 | target[i] = 1 - pow(f, 2);""") 96 | }, 97 | 98 | 'relu': { 99 | 'float': ("float *mat", 100 | "if (mat[i] < 0.) mat[i] = 0.",), 101 | 'double': ("double *mat", 102 | "if (mat[i] < 0.) mat[i] = 0.") 103 | }, 104 | 105 | 'df_relu': { 106 | 'float': ("float *mat, float *target", 107 | "if (mat[i] <= 0.)\n target[i] = 0.;\nelse\n target[i] = 1.;"), 108 | 'double': ("double *mat, double *target", 109 | "if (mat[i] <= 0.)\n target[i] = 0.;\nelse\n target[i] = 1.;") 110 | }, 111 | 112 | 'sample_dropout_mask': { 113 | 'float': ("float *mat, float *target, char *dropout_mask, " 114 | "float *dropout_prob_array, float dropout_probability", 115 | """if (dropout_prob_array[i] <= dropout_probability) { 116 | dropout_mask[i] = 0.; 117 | target[i] = 0.; 118 | } else { 119 | dropout_mask[i] = 1.; 120 | if (target != mat) 121 | target[i] = mat[i]; 122 | } 123 | """), 124 | 'double': ("double *mat, double *targets, char *dropout_mask, " 125 | "double *dropout_prob_array, float dropout_probability", 126 | """if (dropout_prob_array[i] <= dropout_probability) { 127 | dropout_mask[i] = 0.; 128 | target[i] = 0.; 129 | } else { 130 | dropout_mask[i] = 1.; 131 | if (target != mat) 132 | target[i] = mat[i]; 133 | } 134 | """) 135 | }, 136 | 137 | 'apply_dropout_mask': { 138 | 'float': ("float *mat, char *mask", 139 | "if (mask[i] == 0.) mat[i] = 0;"), 140 | 'double': ("double *mat, char *mask", 141 | "if (mask[i] == 0.) mat[i] = 0;"), 142 | }, 143 | 144 | 'nan_to_zeros': { 145 | 'float': ("float *mat, float *target", 146 | "target[i] = isnan(mat[i]) ? 0. : mat[i];"), 147 | 'double': ("double *mat, double *target", 148 | "target[i] = isnan(mat[i]) ? 0. : mat[i];") 149 | }, 150 | 151 | 'mult_matrix': { 152 | 'float': ("const float *a, const float *b, float *c", 153 | "c[i] = a[i] * b[i];"), 154 | 'double': ("const double *b, const double *b, double *c", 155 | "c[i] = a[i] * b[i];") 156 | 157 | }, 158 | 'substract_matrix': { 159 | 'float': ("const float *a, const float *b, float *c", 160 | "c[i] = a[i] - b[i];"), 161 | 'double': ("const double *a, const double *b, double *c", 162 | "c[i] = a[i] - b[i];") 163 | } 164 | } 165 | 166 | all_kernels = { 167 | name: Kernel(name, 168 | val['float'][0], val['float'][1], 169 | val['double'][0], val['double'][1]) 170 | for name, val in all_kernels_code.iteritems() 171 | } 172 | 173 | def sign(x, target=None): 174 | assert x.flags.c_contiguous 175 | if target is None: 176 | target = gpuarray.GPUArray(x.shape, dtype=x.dtype, allocator=memory_pool.allocate) 177 | assert target.shape == x.shape 178 | assert target.dtype == x.dtype 179 | assert target.flags.c_contiguous 180 | all_kernels['sign'](x, target) 181 | return target 182 | 183 | def sigmoid(x): 184 | assert x.flags.c_contiguous 185 | all_kernels['sigmoid'](x) 186 | 187 | def df_sigmoid(f, target=None): 188 | assert f.flags.c_contiguous 189 | if target is None: 190 | target = gpuarray.empty_like(f) 191 | all_kernels['df_sigmoid'](f, target) 192 | return target 193 | 194 | def tanh(x): 195 | assert x.flags.c_contiguous 196 | all_kernels['tanh_inplace'](x) 197 | 198 | def df_tanh(f, target=None): 199 | assert f.flags.c_contiguous 200 | if target is None: 201 | target = gpuarray.empty_like(f) 202 | all_kernels['df_tanh'](f, target) 203 | return target 204 | 205 | def relu(x): 206 | assert x.flags.c_contiguous 207 | all_kernels['relu'](x) 208 | 209 | def df_relu(x, target=None): 210 | assert x.flags.c_contiguous 211 | if target is None: 212 | target = gpuarray.empty_like(x) 213 | all_kernels['df_relu'](x, target) 214 | return target 215 | 216 | def linear(x): 217 | pass 218 | 219 | def df_linear(x): 220 | return x 221 | 222 | def sample_dropout_mask(x, dropout_probability=.5, columns=None, stream=None, target=None, 223 | dropout_mask=None, dropout_prob_array=None): 224 | """ Samples a dropout mask and applies it in place""" 225 | 226 | assert x.flags.c_contiguous 227 | 228 | if columns is not None: 229 | assert len(columns) == 2 230 | x_tmp = x 231 | x = extract_columns(x, columns[0], columns[1]) 232 | 233 | shape = x.shape 234 | 235 | if dropout_prob_array is None: 236 | dropout_prob_array = gpuarray.empty(shape, x.dtype, allocator=memory_pool.allocate) 237 | sampler.fill_uniform(dropout_prob_array, stream) 238 | 239 | if dropout_mask is None: 240 | dropout_mask = gpuarray.empty(shape, np.int8, allocator=memory_pool.allocate) 241 | 242 | if target is None: target = x 243 | 244 | all_kernels['sample_dropout_mask']( 245 | x, target, dropout_mask, dropout_prob_array, 246 | np.float32(dropout_probability)) 247 | 248 | if columns is not None: 249 | insert_columns(x, x_tmp, columns[0]) 250 | 251 | return dropout_mask 252 | 253 | def apply_dropout_mask(x, mask, columns=None, stream=None): 254 | assert x.flags.c_contiguous 255 | 256 | if columns is not None: 257 | assert len(columns) == 2 258 | x_tmp = x 259 | x = extract_columns(x, columns[0], columns[1]) 260 | 261 | assert x.shape == mask.shape 262 | shape = x.shape 263 | 264 | all_kernels['apply_dropout_mask'](x, mask) 265 | 266 | if columns is not None: 267 | insert_columns(x, x_tmp, columns[0]) 268 | 269 | def nan_to_zeros(x, target=None): 270 | assert x.flags.c_contiguous 271 | if target is None: 272 | target = gpuarray.empty_like(x) 273 | assert target.flags.c_contiguous 274 | all_kernels['nan_to_zeros'](x, target) 275 | return target 276 | 277 | def mult_matrix(a, b, target=None): 278 | assert a.shape == b.shape 279 | if target is None: 280 | target = gpuarray.empty_like(a) 281 | 282 | all_kernels['mult_matrix'](a, b, target) 283 | return target 284 | 285 | def substract_matrix(a, b, target=None): 286 | assert a.shape == b.shape 287 | if target is None: 288 | target = gpuarray.empty_like(a) 289 | 290 | all_kernels['substract_matrix'](a, b, target) 291 | return target 292 | -------------------------------------------------------------------------------- /hebel/pycuda_ops/linalg.py: -------------------------------------------------------------------------------- 1 | # This file is modified from scikits.cuda (https://github.com/lebedov/scikits.cuda) 2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved. 3 | 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | 8 | # Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # Neither the name of Lev Givon nor the names of any contributors may 14 | # be used to endorse or promote products derived from this software 15 | # without specific prior written permission. THIS SOFTWARE IS 16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | # SUCH DAMAGE. 28 | 29 | from string import lower 30 | import pycuda.gpuarray as gpuarray 31 | import numpy as np 32 | from . import cublas 33 | from .. import memory_pool 34 | 35 | def init(): 36 | global _global_cublas_handle 37 | _global_cublas_handle = cublas.cublasCreate() 38 | 39 | def dot(x_gpu, y_gpu, transa='N', transb='N', handle=None, target=None): 40 | """ 41 | Dot product of two arrays. 42 | 43 | For 1D arrays, this function computes the inner product. For 2D 44 | arrays of shapes `(m, k)` and `(k, n)`, it computes the matrix 45 | product; the result has shape `(m, n)`. 46 | 47 | Parameters 48 | ---------- 49 | x_gpu : pycuda.gpuarray.GPUArray 50 | Input array. 51 | y_gpu : pycuda.gpuarray.GPUArray 52 | Input array. 53 | transa : char 54 | If 'T', compute the product of the transpose of `x_gpu`. 55 | If 'C', compute the product of the Hermitian of `x_gpu`. 56 | transb : char 57 | If 'T', compute the product of the transpose of `y_gpu`. 58 | If 'C', compute the product of the Hermitian of `y_gpu`. 59 | handle : int 60 | CUBLAS context. If no context is specified, the default handle from 61 | `scikits.cuda.misc._global_cublas_handle` is used. 62 | 63 | Returns 64 | ------- 65 | c_gpu : pycuda.gpuarray.GPUArray, float{32,64}, or complex{64,128} 66 | Inner product of `x_gpu` and `y_gpu`. When the inputs are 1D 67 | arrays, the result will be returned as a scalar. 68 | 69 | Notes 70 | ----- 71 | The input matrices must all contain elements of the same data type. 72 | 73 | Examples 74 | -------- 75 | >>> import pycuda.gpuarray as gpuarray 76 | >>> import pycuda.autoinit 77 | >>> import numpy as np 78 | >>> import linalg 79 | >>> import misc 80 | >>> linalg.init() 81 | >>> a = np.asarray(np.random.rand(4, 2), np.float32) 82 | >>> b = np.asarray(np.random.rand(2, 2), np.float32) 83 | >>> a_gpu = gpuarray.to_gpu(a) 84 | >>> b_gpu = gpuarray.to_gpu(b) 85 | >>> c_gpu = linalg.dot(a_gpu, b_gpu) 86 | >>> np.allclose(np.dot(a, b), c_gpu.get()) 87 | True 88 | >>> d = np.asarray(np.random.rand(5), np.float32) 89 | >>> e = np.asarray(np.random.rand(5), np.float32) 90 | >>> d_gpu = gpuarray.to_gpu(d) 91 | >>> e_gpu = gpuarray.to_gpu(e) 92 | >>> f = linalg.dot(d_gpu, e_gpu) 93 | >>> np.allclose(np.dot(d, e), f) 94 | True 95 | 96 | """ 97 | 98 | if handle is None: 99 | handle = _global_cublas_handle 100 | 101 | if len(x_gpu.shape) == 1 and len(y_gpu.shape) == 1: 102 | 103 | if x_gpu.size != y_gpu.size: 104 | raise ValueError('arrays must be of same length: ' 105 | 'x_gpu.size = %d, y_gpu.size = %d' % 106 | (x_gpu.size, y_gpu.size)) 107 | 108 | # Compute inner product for 1D arrays: 109 | if (x_gpu.dtype == np.complex64 and y_gpu.dtype == np.complex64): 110 | cublas_func = cublas.cublasCdotu 111 | elif (x_gpu.dtype == np.float32 and y_gpu.dtype == np.float32): 112 | cublas_func = cublas.cublasSdot 113 | elif (x_gpu.dtype == np.complex128 and y_gpu.dtype == np.complex128): 114 | cublas_func = cublas.cublasZdotu 115 | elif (x_gpu.dtype == np.float64 and y_gpu.dtype == np.float64): 116 | cublas_func = cublas.cublasDdot 117 | else: 118 | raise ValueError('unsupported combination of input types: ' 119 | 'x_gpu.dtype = %s, y_gpu.dtype = %s' % 120 | (str(x_gpu.dtype), str(y_gpu.dtype))) 121 | 122 | return cublas_func(handle, x_gpu.size, x_gpu.gpudata, 1, 123 | y_gpu.gpudata, 1) 124 | else: 125 | 126 | # Get the shapes of the arguments (accounting for the 127 | # possibility that one of them may only have one dimension): 128 | x_shape = x_gpu.shape 129 | y_shape = y_gpu.shape 130 | if len(x_shape) == 1: 131 | x_shape = (1, x_shape[0]) 132 | if len(y_shape) == 1: 133 | y_shape = (1, y_shape[0]) 134 | 135 | # Perform matrix multiplication for 2D arrays: 136 | if (x_gpu.dtype == np.complex64 and y_gpu.dtype == np.complex64): 137 | cublas_func = cublas.cublasCgemm 138 | alpha = np.complex64(1.0) 139 | beta = np.complex64(0.0) 140 | elif (x_gpu.dtype == np.float32 and y_gpu.dtype == np.float32): 141 | cublas_func = cublas.cublasSgemm 142 | alpha = np.float32(1.0) 143 | beta = np.float32(0.0) 144 | elif (x_gpu.dtype == np.complex128 and y_gpu.dtype == np.complex128): 145 | cublas_func = cublas.cublasZgemm 146 | alpha = np.complex128(1.0) 147 | beta = np.complex128(0.0) 148 | elif (x_gpu.dtype == np.float64 and y_gpu.dtype == np.float64): 149 | cublas_func = cublas.cublasDgemm 150 | alpha = np.float64(1.0) 151 | beta = np.float64(0.0) 152 | else: 153 | raise ValueError('unsupported combination of input types: ' 154 | 'x_gpu.dtype = %s, y_gpu.dtype = %s' % 155 | (str(x_gpu.dtype), str(y_gpu.dtype))) 156 | 157 | transa = lower(transa) 158 | transb = lower(transb) 159 | 160 | if transb in ['t', 'c']: 161 | m, k = y_shape 162 | elif transb in ['n']: 163 | k, m = y_shape 164 | else: 165 | raise ValueError('invalid value "%s" for transb' % transb) 166 | 167 | if transa in ['t', 'c']: 168 | l, n = x_shape 169 | elif transa in ['n']: 170 | n, l = x_shape 171 | else: 172 | raise ValueError('invalid value "%s" for transa' % transa) 173 | 174 | if l != k: 175 | raise ValueError('objects are not aligned: x_shape = %s, y_shape = %s' % 176 | (x_shape, y_shape)) 177 | 178 | if transb == 'n': 179 | lda = max(1, m) 180 | else: 181 | lda = max(1, k) 182 | 183 | if transa == 'n': 184 | ldb = max(1, k) 185 | else: 186 | ldb = max(1, n) 187 | 188 | ldc = max(1, m) 189 | 190 | # Note that the desired shape of the output matrix is the transpose 191 | # of what CUBLAS assumes: 192 | 193 | if target is None: 194 | target = gpuarray.empty((n, ldc), x_gpu.dtype, allocator=memory_pool.allocate) 195 | 196 | cublas_func(handle, transb, transa, m, n, k, alpha, y_gpu.gpudata, 197 | lda, x_gpu.gpudata, ldb, beta, target.gpudata, ldc) 198 | 199 | return target 200 | -------------------------------------------------------------------------------- /hebel/pycuda_ops/matrix.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from .. import memory_pool, sampler 18 | import numpy as np 19 | from pycuda import driver as drv 20 | from pycuda import gpuarray 21 | from ..utils.math import ceil_div 22 | 23 | add_row_vec_kernel = None 24 | add_col_vec_kernel = None 25 | vector_normalize_kernel = None 26 | _compilation_constants = { 27 | 'add_vec_block_size': 16 28 | } 29 | def init(): 30 | from pycuda.compiler import SourceModule 31 | 32 | global add_row_vec_kernel 33 | global add_col_vec_kernel 34 | global vector_normalize_kernel 35 | 36 | code = """ 37 | #include 38 | __global__ void addRowVecToMat(const float *mat, 39 | const float *vec, 40 | float *target, 41 | const unsigned int n, 42 | const unsigned int m, 43 | const int substract) 44 | { 45 | const int tx = threadIdx.x; 46 | const int ty = threadIdx.y; 47 | const int tidx = blockIdx.x * blockDim.x + threadIdx.x; 48 | const int tidy = blockIdx.y * blockDim.y + threadIdx.y; 49 | 50 | __shared__ float shared_vec[%(add_vec_block_size)d]; 51 | 52 | if ((tx == 0) & (tidy < m)) 53 | shared_vec[ty] = vec[tidy]; 54 | __syncthreads(); 55 | 56 | if ((tidy < m) & (tidx < n)) 57 | { 58 | if (substract) 59 | target[tidx*m+tidy] = mat[tidx*m+tidy] - shared_vec[ty]; 60 | else 61 | target[tidx*m+tidy] = mat[tidx*m+tidy] + shared_vec[ty]; 62 | } 63 | } 64 | 65 | __global__ void addColVecToMat(const float *mat, 66 | const float *vec, 67 | float *target, 68 | const unsigned int n, 69 | const unsigned int m, 70 | const int substract) 71 | { 72 | const int tx = threadIdx.x; 73 | const int ty = threadIdx.y; 74 | const int tidx = blockIdx.x * blockDim.x + threadIdx.x; 75 | const int tidy = blockIdx.y * blockDim.y + threadIdx.y; 76 | 77 | __shared__ float shared_vec[%(add_vec_block_size)d]; 78 | 79 | if ((ty == 0) & (tidx < n)) 80 | shared_vec[tx] = vec[tidx]; 81 | __syncthreads(); 82 | 83 | if ((tidy < m) & (tidx < n)) 84 | { 85 | if (substract) 86 | target[tidx*m+tidy] = mat[tidx*m+tidy] - shared_vec[tx]; 87 | else 88 | target[tidx*m+tidy] = mat[tidx*m+tidy] + shared_vec[tx]; 89 | } 90 | } 91 | 92 | __global__ void kVectorNormalize(float* mat, 93 | float max_vec_norm, 94 | unsigned int width, 95 | unsigned int height) { 96 | 97 | __shared__ float sum_shared[32]; 98 | __shared__ float vec_norm; 99 | float sum = 0; 100 | 101 | for (unsigned int i = threadIdx.x; i < height; i += 32) 102 | sum += powf(mat[blockIdx.x + i * width], 2); 103 | 104 | sum_shared[threadIdx.x] = sum; 105 | 106 | __syncthreads(); 107 | 108 | if (threadIdx.x == 0) { 109 | sum = 0; 110 | 111 | for (unsigned int i = 0; i < 32; i++) 112 | sum += sum_shared[i]; 113 | 114 | vec_norm = sqrtf(sum); 115 | } 116 | __syncthreads(); 117 | 118 | for (unsigned int i = threadIdx.x; i < height; i += 32) { 119 | if (vec_norm > max_vec_norm) 120 | mat[blockIdx.x + i * width] /= (vec_norm / max_vec_norm); 121 | } 122 | } 123 | """ % _compilation_constants 124 | 125 | mod = SourceModule(code) 126 | add_row_vec_kernel = mod.get_function('addRowVecToMat').prepare('PPPIIi') 127 | add_col_vec_kernel = mod.get_function('addColVecToMat').prepare('PPPIIi') 128 | vector_normalize_kernel = mod.get_function("kVectorNormalize").prepare('PfII') 129 | 130 | def add_vec_to_mat(mat, vec, axis=None, inplace=False, 131 | target=None, substract=False): 132 | """ Add a vector to a matrix 133 | """ 134 | 135 | assert mat.flags.c_contiguous 136 | 137 | if axis is None: 138 | if vec.shape[0] == mat.shape[0]: 139 | axis = 0 140 | elif vec.shape[0] == mat.shape[1]: 141 | axis = 1 142 | else: 143 | raise ValueError('Vector length must be equal ' 144 | 'to one side of the matrix') 145 | 146 | n, m = mat.shape 147 | 148 | block = (_compilation_constants['add_vec_block_size'], 149 | _compilation_constants['add_vec_block_size'], 1) 150 | gridx = ceil_div(n, block[0]) 151 | gridy = ceil_div(m, block[1]) 152 | grid = (gridx, gridy, 1) 153 | 154 | if inplace: 155 | target = mat 156 | elif target is None: 157 | target = gpuarray.empty_like(mat) 158 | 159 | if axis == 0: 160 | assert vec.shape[0] == mat.shape[0] 161 | add_col_vec_kernel.prepared_call( 162 | grid, block, 163 | mat.gpudata, 164 | vec.gpudata, 165 | target.gpudata, 166 | np.uint32(n), 167 | np.uint32(m), 168 | np.int32(substract)) 169 | elif axis == 1: 170 | assert vec.shape[0] == mat.shape[1] 171 | add_row_vec_kernel.prepared_call( 172 | grid, block, 173 | mat.gpudata, 174 | vec.gpudata, 175 | target.gpudata, 176 | np.uint32(n), 177 | np.uint32(m), 178 | np.int32(substract)) 179 | return target 180 | 181 | 182 | def vector_normalize(mat, max_vec_norm=1.): 183 | """ Normalize each column vector in mat to length 184 | max_vec_norm if it is longer than max_vec_norm 185 | """ 186 | assert mat.flags.c_contiguous 187 | n, m = mat.shape 188 | 189 | vector_normalize_kernel.prepared_call( 190 | (m, 1, 1), (32, 1, 1), 191 | mat.gpudata, 192 | np.float32(max_vec_norm), 193 | np.int32(m), 194 | np.int32(n)) 195 | 196 | def extract_columns(mat, start=0, stop=None, target=None): 197 | dtype = mat.dtype 198 | itemsize = np.dtype(dtype).itemsize 199 | 200 | input_3d = False 201 | if len(mat.shape) == 2: 202 | N, M = mat.shape 203 | if stop is None: 204 | stop = M 205 | elif len(mat.shape) == 3: 206 | input_3d = True 207 | N, M, Z = mat.shape 208 | if stop is None: 209 | stop = M 210 | start = start * Z 211 | stop = stop * Z 212 | M = M * Z 213 | mat = mat.reshape((N, M)) 214 | else: 215 | raise ValueError("mat must have two or three dimensions") 216 | m = stop - start 217 | 218 | assert mat.flags.c_contiguous 219 | assert start >= 0 and start <= M and stop >= 0 and \ 220 | stop <= M and stop > start 221 | 222 | if target is None: 223 | target = gpuarray.empty((N, m), dtype, allocator=memory_pool.allocate) 224 | 225 | copy = drv.Memcpy2D() 226 | copy.set_src_device(mat.gpudata) 227 | copy.src_x_in_bytes = start * itemsize 228 | copy.set_dst_device(target.gpudata) 229 | copy.src_pitch = M * itemsize 230 | copy.dst_pitch = copy.width_in_bytes = m * itemsize 231 | copy.height = N 232 | copy(aligned=True) 233 | 234 | if input_3d: 235 | assert not m % Z 236 | target = target.reshape((N, m // Z, Z)) 237 | 238 | return target 239 | 240 | 241 | def insert_columns(src, dst, offset): 242 | dtype = src.dtype 243 | itemsize = np.dtype(dtype).itemsize 244 | if len(src.shape) == 2: 245 | h_src, w_src = src.shape 246 | elif len(src.shape) == 3: 247 | h_src = src.shape[0] 248 | w_src = np.prod(src.shape[1:]) 249 | h_dst, w_dst = dst.shape 250 | 251 | assert dst.dtype == dtype 252 | assert h_src == h_dst 253 | assert w_dst >= offset + w_src 254 | 255 | copy = drv.Memcpy2D() 256 | copy.set_src_device(src.gpudata) 257 | copy.set_dst_device(dst.gpudata) 258 | copy.dst_x_in_bytes = offset * itemsize 259 | copy.src_pitch = copy.width_in_bytes = w_src * itemsize 260 | copy.dst_pitch = w_dst * itemsize 261 | copy.height = h_src 262 | copy(aligned=True) 263 | 264 | def pad_array(mat, left=0, right=0, val=0., new_shape=None, stream=None): 265 | assert mat.flags.c_contiguous 266 | 267 | is_chararray = False 268 | if mat.dtype == '|S1': 269 | is_chararray = True 270 | mat.dtype = np.int8 271 | if type(val) is str: 272 | val = ord(val) 273 | 274 | if len(mat.shape) == 2: 275 | height, width = mat.shape 276 | elif len(mat.shape) > 2: 277 | height = mat.shape[0] 278 | width = np.prod(mat.shape[1:]) 279 | mat = mat.reshape((height, width)) 280 | else: 281 | raise ValueError('Array must be at least two-dimensional.') 282 | 283 | padded_width = width + left + right 284 | 285 | padded_mat = gpuarray.empty((height, padded_width), dtype=mat.dtype, 286 | allocator=memory_pool.allocate).fill(val) 287 | 288 | itemsize = np.dtype(padded_mat.dtype).itemsize 289 | copy = drv.Memcpy2D() 290 | copy.set_src_device(mat.gpudata) 291 | copy.set_dst_device(padded_mat.gpudata) 292 | copy.dst_x_in_bytes = left * itemsize 293 | copy.src_pitch = copy.width_in_bytes = width * itemsize 294 | copy.dst_pitch = padded_width * itemsize 295 | copy.height = height 296 | copy(stream) 297 | 298 | if new_shape is not None: 299 | padded_mat = padded_mat.reshape(new_shape) 300 | 301 | if is_chararray: 302 | mat.dtype = np.dtype('|S1') 303 | padded_mat.dtype = np.dtype('|S1') 304 | 305 | return padded_mat 306 | 307 | def rand_array(shape, dtype=np.float32, dist='uniform', stream=None): 308 | mat = gpuarray.empty(shape, dtype, allocator=memory_pool.allocate) 309 | if dist == 'uniform': 310 | sampler.fill_uniform(mat, stream=stream) 311 | elif dist == 'normal': 312 | sampler.fill_normal(mat, stream=stream) 313 | return mat 314 | -------------------------------------------------------------------------------- /hebel/pycuda_ops/reductions.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | import numpy as np 18 | from pycuda import gpuarray 19 | from . import linalg 20 | from .. import memory_pool 21 | 22 | max_column = None 23 | max_row = None 24 | def init(): 25 | from pycuda.compiler import SourceModule 26 | 27 | global max_column 28 | global max_row 29 | 30 | code = """ 31 | #include "float.h" 32 | 33 | __global__ void kMaxColumnwise(float* mat, 34 | float* target, 35 | unsigned int width, 36 | unsigned int height) { 37 | __shared__ float max_vals[32]; 38 | float cur_max = -FLT_MAX; 39 | float val = 0; 40 | 41 | for (unsigned int i = threadIdx.x; i < height; i += 32) { 42 | val = mat[blockIdx.x + i * width]; 43 | 44 | if (val > cur_max) 45 | cur_max = val; 46 | } 47 | 48 | max_vals[threadIdx.x] = cur_max; 49 | 50 | __syncthreads(); 51 | 52 | if (threadIdx.x == 0) { 53 | cur_max = -FLT_MAX; 54 | 55 | for (unsigned int i = 0; i < 32; i++) 56 | if (max_vals[i] > cur_max) 57 | cur_max = max_vals[i]; 58 | 59 | target[blockIdx.x] = cur_max; 60 | } 61 | // __syncthreads(); 62 | } 63 | 64 | __global__ void kMaxRowwise(float* mat, 65 | float* target, 66 | unsigned int width, 67 | unsigned int height) { 68 | __shared__ float max_vals[32]; 69 | float cur_max = -FLT_MAX; 70 | float val = 0; 71 | 72 | for (unsigned int i = threadIdx.x; i < width; i += 32) { 73 | val = mat[blockIdx.x * width + i]; 74 | 75 | if (val > cur_max) 76 | cur_max = val; 77 | } 78 | 79 | max_vals[threadIdx.x] = cur_max; 80 | 81 | __syncthreads(); 82 | 83 | if (threadIdx.x == 0) { 84 | cur_max = -FLT_MAX; 85 | 86 | for (unsigned int i = 0; i < 32; i++) 87 | if (max_vals[i] > cur_max) 88 | cur_max = max_vals[i]; 89 | 90 | target[blockIdx.x] = cur_max; 91 | } 92 | // __syncthreads(); 93 | } 94 | """ 95 | 96 | mod = SourceModule(code) 97 | max_column = mod.get_function("kMaxColumnwise").prepare('PPII') 98 | max_row = mod.get_function("kMaxRowwise").prepare('PPII') 99 | 100 | 101 | def max_by_axis(mat, axis=0): 102 | assert mat.flags.c_contiguous 103 | assert axis in (0, 1) 104 | 105 | n, m = mat.shape 106 | 107 | if axis == 0: 108 | target = gpuarray.empty(m, dtype=np.float32) 109 | max_column.prepared_call( 110 | (m, 1, 1), (32, 1, 1), 111 | mat.gpudata, target.gpudata, 112 | np.int32(m), np.int32(n)) 113 | 114 | elif axis == 1: 115 | target = gpuarray.empty(n, dtype=np.float32) 116 | max_row.prepared_call( 117 | (n, 1, 1), (32, 1, 1), 118 | mat.gpudata, target.gpudata, 119 | np.int32(m), np.int32(n)) 120 | 121 | return target 122 | 123 | 124 | def _matrix_sum_out_axis_wrapper(): 125 | one_vector_cache = {} 126 | 127 | def f(mat, axis=0, cache_one_vector=True, target=None): 128 | assert mat.flags.c_contiguous 129 | N, M = mat.shape 130 | 131 | if axis == 0: 132 | vec_shape = (N, 1) 133 | try: 134 | ones = one_vector_cache[vec_shape] 135 | except KeyError: 136 | ones = gpuarray.empty(vec_shape, dtype=mat.dtype, 137 | allocator=memory_pool.allocate).fill(1.) 138 | if cache_one_vector: one_vector_cache[vec_shape] = ones 139 | 140 | if target is None: 141 | target = gpuarray.empty((M,), mat.dtype, allocator=memory_pool.allocate) 142 | 143 | # if len(target.shape) == 1: 144 | # target = target.reshape((target.shape[0], 1)) 145 | # target.shape = (target.shape[0], 1) 146 | assert target.shape == (M,) 147 | linalg.dot(mat, ones, transa='T', target=target) 148 | elif axis == 1: 149 | vec_shape = (M, 1) 150 | try: 151 | ones = one_vector_cache[vec_shape] 152 | except KeyError: 153 | ones = gpuarray.empty((M, 1), dtype=mat.dtype, 154 | allocator=memory_pool.allocate).fill(1.) 155 | if cache_one_vector: one_vector_cache[vec_shape] = ones 156 | 157 | if target is None: 158 | target = gpuarray.empty((N,), mat.dtype, allocator=memory_pool.allocate) 159 | 160 | # if len(target.shape) == 1: 161 | # target = target.reshape((target.shape[0], 1)) 162 | assert target.shape == (N,) 163 | linalg.dot(mat, ones, target=target) 164 | else: 165 | raise ValueError('axis must be 0 or 1') 166 | 167 | # target.shape = (target.shape[0], 1) 168 | return target 169 | return f 170 | matrix_sum_out_axis = _matrix_sum_out_axis_wrapper() 171 | -------------------------------------------------------------------------------- /hebel/pycuda_ops/softmax.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from . import eps 18 | from .. import memory_pool 19 | from .reductions import max_by_axis 20 | from .matrix import add_vec_to_mat 21 | from .reductions import matrix_sum_out_axis 22 | from .elementwise import nan_to_zeros 23 | from pycuda import cumath, gpuarray 24 | import numpy as np 25 | 26 | def logsumexp(mat): 27 | max_dim = max_by_axis(mat, 1) 28 | tmp = add_vec_to_mat(mat, max_dim, 0, substract=True) 29 | 30 | tmp = cumath.exp(tmp) 31 | 32 | tmp = matrix_sum_out_axis(tmp, 1) 33 | tmp = cumath.log(tmp) 34 | max_dim += tmp 35 | return max_dim 36 | 37 | def softmax(mat): 38 | tmp = gpuarray.empty_like(mat) 39 | L = logsumexp(mat) 40 | tmp = add_vec_to_mat(mat, L, substract=True) 41 | tmp = cumath.exp(tmp) 42 | return tmp 43 | 44 | def cross_entropy(x, y): 45 | loss = y * cumath.log(x + eps) 46 | nan_to_zeros(loss, loss) 47 | loss = -gpuarray.sum(loss) 48 | return loss 49 | 50 | def cross_entropy_logistic(x, y): 51 | loss = y * cumath.log(x + eps) + (1. - y) * cumath.log(1. - x + eps) 52 | loss = -gpuarray.sum(loss) 53 | return loss -------------------------------------------------------------------------------- /hebel/pycuda_ops/utils.py: -------------------------------------------------------------------------------- 1 | # This file is taken from scikits.cuda (https://github.com/lebedov/scikits.cuda) 2 | # Copyright (c) 2009-2013, Lev Givon. All rights reserved. 3 | 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | 8 | # Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # Neither the name of Lev Givon nor the names of any contributors may 14 | # be used to endorse or promote products derived from this software 15 | # without specific prior written permission. THIS SOFTWARE IS 16 | # PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | # SUCH DAMAGE. 28 | 29 | #!/usr/bin/env python 30 | 31 | """ 32 | Utility functions. 33 | """ 34 | 35 | import ctypes 36 | import re 37 | import subprocess 38 | 39 | try: 40 | import elftools 41 | except ImportError: 42 | import re 43 | 44 | def get_soname(filename): 45 | """ 46 | Retrieve SONAME of shared library. 47 | 48 | Parameters 49 | ---------- 50 | filename : str 51 | Full path to shared library. 52 | 53 | Returns 54 | ------- 55 | soname : str 56 | SONAME of shared library. 57 | 58 | Notes 59 | ----- 60 | This function uses the `objdump` system command. 61 | 62 | """ 63 | 64 | try: 65 | p = subprocess.Popen(['objdump', '-p', filename], 66 | stdout=subprocess.PIPE) 67 | out = p.communicate()[0] 68 | except: 69 | raise RuntimeError('error executing objdump') 70 | else: 71 | result = re.search('^\s+SONAME\s+(.+)$',out,re.MULTILINE) 72 | if result: 73 | return result.group(1) 74 | else: 75 | 76 | # No SONAME found: 77 | return '' 78 | 79 | else: 80 | import ctypes 81 | import elftools.elf.elffile as elffile 82 | import elftools.construct.macros as macros 83 | import elftools.elf.structs as structs 84 | 85 | def get_soname(filename): 86 | """ 87 | Retrieve SONAME of shared library. 88 | 89 | Parameters 90 | ---------- 91 | filename : str 92 | Full path to shared library. 93 | 94 | Returns 95 | ------- 96 | soname : str 97 | SONAME of shared library. 98 | 99 | Notes 100 | ----- 101 | This function uses the pyelftools [ELF] package. 102 | 103 | References 104 | ---------- 105 | .. [ELF] http://pypi.python.org/pypi/pyelftools 106 | 107 | """ 108 | 109 | stream = open(filename, 'rb') 110 | f = elffile.ELFFile(stream) 111 | dynamic = f.get_section_by_name('.dynamic') 112 | dynstr = f.get_section_by_name('.dynstr') 113 | 114 | # Handle libraries built for different machine architectures: 115 | if f.header['e_machine'] == 'EM_X86_64': 116 | st = structs.Struct('Elf64_Dyn', 117 | macros.ULInt64('d_tag'), 118 | macros.ULInt64('d_val')) 119 | elif f.header['e_machine'] == 'EM_386': 120 | st = structs.Struct('Elf32_Dyn', 121 | macros.ULInt32('d_tag'), 122 | macros.ULInt32('d_val')) 123 | else: 124 | raise RuntimeError('unsupported machine architecture') 125 | 126 | entsize = dynamic['sh_entsize'] 127 | for k in xrange(dynamic['sh_size']/entsize): 128 | result = st.parse(dynamic.data()[k*entsize:(k+1)*entsize]) 129 | 130 | # The following value for the SONAME tag is specified in elf.h: 131 | if result.d_tag == 14: 132 | return dynstr.get_string(result.d_val) 133 | 134 | # No SONAME found: 135 | return '' 136 | -------------------------------------------------------------------------------- /hebel/schedulers.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """ A bunch of different schedulers to scale learning 18 | parameters. These are used e.g. to slowly reduce the learning rate 19 | during training or scale momentum up and down during the early and 20 | late phases of training. 21 | """ 22 | 23 | 24 | def constant_scheduler(value): 25 | while True: 26 | yield value 27 | 28 | 29 | def exponential_scheduler(init_value, decay): 30 | """ Decreases exponentially """ 31 | 32 | value = init_value 33 | while True: 34 | yield value 35 | value *= decay 36 | 37 | 38 | def linear_scheduler_up(init_value, target_value, duration): 39 | """ Increases linearly and then stays flat """ 40 | 41 | value = init_value 42 | t = 0 43 | while True: 44 | yield value 45 | t += 1 46 | if t < duration: 47 | value = init_value + t * (target_value - init_value) / duration 48 | else: 49 | value = target_value 50 | 51 | 52 | def linear_scheduler_up_down(init_value, target_value, final_value, 53 | duration_up, t_decrease, duration_down): 54 | """ Increases linearly to target_value, stays at target_value until 55 | t_decrease and then decreases linearly 56 | """ 57 | 58 | value = init_value 59 | t = 0 60 | 61 | while True: 62 | yield value 63 | t += 1 64 | if t < duration_up: 65 | value = init_value + t * (target_value - init_value) / \ 66 | float(duration_up) 67 | elif t > t_decrease: 68 | value = target_value - (t - t_decrease) * \ 69 | (target_value - final_value) / \ 70 | float(duration_down) 71 | else: 72 | value = target_value 73 | -------------------------------------------------------------------------------- /hebel/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | -------------------------------------------------------------------------------- /hebel/utils/call_check.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """ 18 | Utility functions for checking passed arguments against call signature 19 | of a function or class constructor. 20 | """ 21 | import functools 22 | import inspect 23 | import types 24 | from .string_utils import match 25 | 26 | def check_call_arguments(to_call, kwargs): 27 | """ 28 | Check the call signature against a dictionary of proposed arguments, 29 | raising an informative exception in the case of mismatch. 30 | 31 | Parameters 32 | ---------- 33 | to_call : class or callable 34 | Function or class to examine (in the case of classes, the 35 | constructor call signature is analyzed) 36 | kwargs : dict 37 | Dictionary mapping parameter names (including positional 38 | arguments) to proposed values. 39 | """ 40 | if 'self' in kwargs.keys(): 41 | raise TypeError("Your dictionary includes an entry for 'self', " 42 | "which is just asking for trouble") 43 | 44 | orig_to_call = getattr(to_call, '__name__', str(to_call)) 45 | if not isinstance(to_call, types.FunctionType): 46 | if hasattr(to_call, '__init__'): 47 | to_call = to_call.__init__ 48 | elif hasattr(to_call, '__call__'): 49 | to_call = to_call.__call__ 50 | 51 | args, varargs, keywords, defaults = inspect.getargspec(to_call) 52 | 53 | if any(not isinstance(arg, str) for arg in args): 54 | raise TypeError('%s uses argument unpacking, which is deprecated and ' 55 | 'unsupported by this pylearn2' % orig_to_call) 56 | 57 | if varargs is not None: 58 | raise TypeError('%s has a variable length argument list, but ' 59 | 'this is not supported by config resolution' % 60 | orig_to_call) 61 | 62 | if keywords is None: 63 | bad_keywords = [arg_name for arg_name in kwargs.keys() 64 | if arg_name not in args] 65 | 66 | if len(bad_keywords) > 0: 67 | bad = ', '.join(bad_keywords) 68 | args = [ arg for arg in args if arg != 'self' ] 69 | if len(args) == 0: 70 | matched_str = '(It does not support any keywords, actually)' 71 | else: 72 | matched = [ match(keyword, args) for keyword in bad_keywords ] 73 | matched_str = 'Did you mean %s?' % (', '.join(matched)) 74 | raise TypeError('%s does not support the following ' 75 | 'keywords: %s. %s' % 76 | (orig_to_call, bad, matched_str)) 77 | 78 | if defaults is None: 79 | num_defaults = 0 80 | else: 81 | num_defaults = len(defaults) 82 | 83 | required = args[:len(args) - num_defaults] 84 | missing = [arg for arg in required if arg not in kwargs] 85 | 86 | if len(missing) > 0: 87 | #iff the im_self (or __self__) field is present, this is a 88 | # bound method, which has 'self' listed as an argument, but 89 | # which should not be supplied by kwargs 90 | is_bound = hasattr(to_call, 'im_self') or hasattr(to_call, '__self__') 91 | if len(missing) > 1 or missing[0] != 'self' or not is_bound: 92 | if 'self' in missing: 93 | missing.remove('self') 94 | missing = ', '.join([str(m) for m in missing]) 95 | raise TypeError('%s did not get these expected ' 96 | 'arguments: %s' % (orig_to_call, missing)) 97 | 98 | def checked_call(to_call, kwargs): 99 | """ 100 | Attempt calling a function or instantiating a class with a given set of 101 | arguments, raising a more helpful exception in the case of argument 102 | mismatch. 103 | 104 | Parameters 105 | ---------- 106 | to_call : class or callable 107 | Function or class to examine (in the case of classes, the 108 | constructor call signature is analyzed) 109 | kwargs : dict 110 | Dictionary mapping parameter names (including positional 111 | arguments) to proposed values. 112 | """ 113 | try: 114 | return to_call(**kwargs) 115 | except TypeError: 116 | check_call_arguments(to_call, kwargs) 117 | raise 118 | 119 | def sensible_argument_errors(func): 120 | @functools.wraps(func) 121 | def wrapped_func(*args, **kwargs): 122 | try: 123 | func(*args, **kwargs) 124 | except TypeError: 125 | argnames, varargs, keywords, defaults = inspect.getargspec(func) 126 | posargs = dict(zip(argnames, args)) 127 | bad_keywords = [] 128 | for keyword in kwargs: 129 | if keyword not in argnames: 130 | bad_keywords.append(keyword) 131 | 132 | if len(bad_keywords) > 0: 133 | bad = ', '.join(bad_keywords) 134 | raise TypeError('%s() does not support the following ' 135 | 'keywords: %s' % (str(func.func_name), bad)) 136 | allargsgot = set(list(kwargs.keys()) + list(posargs.keys())) 137 | numrequired = len(argnames) - len(defaults) 138 | diff = list(set(argnames[:numrequired]) - allargsgot) 139 | if len(diff) > 0: 140 | raise TypeError('%s() did not get required args: %s' % 141 | (str(func.func_name), ', '.join(diff))) 142 | raise 143 | return wrapped_func 144 | 145 | -------------------------------------------------------------------------------- /hebel/utils/environ.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | #Utilities for working with environment variables 18 | import os 19 | 20 | def putenv(key, value): 21 | #this makes the change visible to other parts of the code 22 | #in this same process 23 | os.environ[key] = value 24 | # this makes it available to any subprocesses we launch 25 | os.putenv(key, value) 26 | 27 | -------------------------------------------------------------------------------- /hebel/utils/exc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | __author__ = "Ian Goodfellow" 18 | """ 19 | Exceptions related to datasets 20 | """ 21 | 22 | class EnvironmentVariableError(Exception): 23 | """ An exception raised when a required environment variable is not defined """ 24 | 25 | def __init__(self, *args): 26 | super(EnvironmentVariableError,self).__init__(*args) 27 | 28 | class NoDataPathError(EnvironmentVariableError): 29 | """ 30 | Exception raised when PYLEARN2_DATA_PATH is required but has not been 31 | defined. 32 | """ 33 | def __init__(self): 34 | super(NoDataPathError, self).__init__(data_path_essay) 35 | 36 | data_path_essay = """\ 37 | You need to define your PYLEARN2_DATA_PATH environment variable. If you are 38 | using a computer at LISA, this should be set to /data/lisa/data. 39 | """ 40 | 41 | class NotInstalledError(Exception): 42 | """ 43 | Exception raised when a dataset appears not to be installed. 44 | This is different from an individual file missing within a dataset, 45 | the file not loading correctly, etc. 46 | This exception is used to make unit tests skip testing of datasets 47 | that haven't been installed. 48 | We do want the unit test to run and crash if the dataset is installed 49 | incorrectly. 50 | """ 51 | -------------------------------------------------------------------------------- /hebel/utils/math.py: -------------------------------------------------------------------------------- 1 | ceil_div = lambda x, y: int((x + y - 1) / y) 2 | div_up = lambda x, y: y * ceil_div(x, y) -------------------------------------------------------------------------------- /hebel/utils/plotting.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | from pycuda import gpuarray 18 | import numpy as np 19 | from math import ceil 20 | 21 | def show_filters(W, img_dims, columns=10, normalize=True, **kwargs): 22 | import matplotlib.pyplot as plt 23 | if isinstance(W, gpuarray.GPUArray): W = W.get() 24 | 25 | D, N = W.shape 26 | 27 | if normalize: 28 | W = W - W.min() #[np.newaxis,:] 29 | W = W / W.max() #[np.newaxis,:] 30 | 31 | rows = int(ceil(N / columns)) 32 | 33 | fig = plt.figure(1, **kwargs) 34 | plt.subplots_adjust(left=0., right=.51, wspace=.1, hspace=.01) 35 | 36 | filters = np.rollaxis(W.reshape(img_dims + (N,)), 2) 37 | filters = np.vstack([np.hstack(filters[i:i+columns]) for i in range(0, N, columns)]) 38 | plt.axis('off') 39 | plt.imshow(filters, cmap=plt.cm.gray, interpolation='nearest', figure=fig) 40 | -------------------------------------------------------------------------------- /hebel/utils/string_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | """ Utilities for modifying strings""" 18 | 19 | import os 20 | import warnings 21 | import re 22 | import functools 23 | 24 | from .exc import EnvironmentVariableError, NoDataPathError 25 | 26 | def preprocess(string): 27 | """ 28 | Preprocesses a string, by replacing ${VARNAME} with 29 | os.environ['VARNAME'] 30 | 31 | Parameters 32 | ---------- 33 | string: the str object to preprocess 34 | 35 | Returns 36 | ------- 37 | the preprocessed string 38 | """ 39 | 40 | split = string.split('${') 41 | 42 | rval = [split[0]] 43 | 44 | for candidate in split[1:]: 45 | subsplit = candidate.split('}') 46 | 47 | if len(subsplit) < 2: 48 | raise ValueError('Open ${ not followed by } before ' \ 49 | + 'end of string or next ${ in "' \ 50 | + string + '"') 51 | 52 | varname = subsplit[0] 53 | 54 | if varname == 'PYLEARN2_TRAIN_FILE_NAME': 55 | warnings.warn("PYLEARN2_TRAIN_FILE_NAME is deprecated and may be " 56 | "removed from the library on or after Oct 22, 2013. Switch" 57 | " to PYLEARN2_TRAIN_FILE_FULL_STEM") 58 | 59 | try: 60 | val = os.environ[varname] 61 | except KeyError: 62 | if varname == 'PYLEARN2_DATA_PATH': 63 | raise NoDataPathError() 64 | if varname == 'PYLEARN2_VIEWER_COMMAND': 65 | raise EnvironmentVariableError(environment_variable_essay) 66 | 67 | raise ValueError('Unrecognized environment variable "' + varname 68 | + '". Did you mean ' + match(varname, os.environ.keys()) 69 | + '?') 70 | 71 | rval.append(val) 72 | 73 | rval.append('}'.join(subsplit[1:])) 74 | 75 | rval = ''.join(rval) 76 | 77 | return rval 78 | 79 | 80 | 81 | 82 | def find_number(s): 83 | """ s is a string 84 | returns None if there are no numbers in the string 85 | otherwise returns the range of characters occupied by the first 86 | number in the string """ 87 | 88 | r = re.search('-?\d+[.e]?\d*',s) 89 | if r is not None: 90 | return r.span(0) 91 | return None 92 | 93 | def tokenize_by_number(s): 94 | """ splits a string into a list of tokens 95 | each is either a string containing no numbers 96 | or a float """ 97 | 98 | r = find_number(s) 99 | 100 | if r == None: 101 | return [ s ] 102 | else: 103 | tokens = [] 104 | if r[0] > 0: 105 | tokens.append(s[0:r[0]]) 106 | tokens.append( float(s[r[0]:r[1]]) ) 107 | if r[1] < len(s): 108 | tokens.extend(tokenize_by_number(s[r[1]:])) 109 | return tokens 110 | assert False #line should be unreached 111 | 112 | 113 | def number_aware_alphabetical_cmp(str1, str2): 114 | """ cmp function for sorting a list of strings by alphabetical order, but with 115 | numbers sorted numerically. 116 | 117 | i.e., foo1, foo2, foo10, foo11 118 | instead of foo1, foo10 119 | """ 120 | 121 | def flatten_tokens(tokens): 122 | l = [] 123 | for token in tokens: 124 | if isinstance(token, str): 125 | for char in token: 126 | l.append(char) 127 | else: 128 | assert isinstance(token, float) 129 | l.append(token) 130 | return l 131 | 132 | seq1 = flatten_tokens(tokenize_by_number(str1)) 133 | seq2 = flatten_tokens(tokenize_by_number(str2)) 134 | 135 | l = min(len(seq1),len(seq2)) 136 | 137 | i = 0 138 | 139 | while i < l: 140 | if seq1[i] < seq2[i]: 141 | return -1 142 | elif seq1[i] > seq2[i]: 143 | return 1 144 | i += 1 145 | 146 | if len(seq1) < len(seq2): 147 | return -1 148 | elif len(seq1) > len(seq2): 149 | return 1 150 | 151 | return 0 152 | 153 | def match(wrong, candidates): 154 | """ 155 | wrong: a mispelling 156 | candidates: a set of correct words 157 | 158 | returns a guess of which candidate is the right one 159 | 160 | This should be used with a small number of candidates and a high potential 161 | edit distance. 162 | ie, use it to correct a wrong filename in a directory, wrong class name 163 | in a module, etc. Don't use it to correct small typos of freeform natural 164 | language words. 165 | """ 166 | 167 | assert len(candidates) > 0 168 | 169 | # Current implementation tries all candidates and outputs the one 170 | # with the min score 171 | # Could try to do something smarter 172 | 173 | def score(w1,w2): 174 | # Current implementation returns negative dot product of 175 | # the two words mapped into a feature space by mapping phi 176 | # w -> [ phi(w1), .1 phi(first letter of w), .1 phi(last letter of w) ] 177 | # Could try to do something smarter 178 | 179 | w1 = w1.lower() 180 | w2 = w2.lower() 181 | 182 | def phi(w): 183 | # Current feature mapping is to the vector of counts of 184 | # all letters and two-letter sequences 185 | # Could try to do something smarter 186 | rval = {} 187 | 188 | for i in xrange(len(w)): 189 | l = w[i] 190 | rval[l] = rval.get(l,0.) + 1. 191 | if i < len(w)-1: 192 | b = w[i:i+2] 193 | rval[b] = rval.get(b,0.) + 1. 194 | 195 | return rval 196 | 197 | d1 = phi(w1) 198 | d2 = phi(w2) 199 | 200 | def mul(d1, d2): 201 | rval = 0 202 | 203 | for key in set(d1).union(d2): 204 | rval += d1.get(key,0) * d2.get(key,0) 205 | 206 | return rval 207 | 208 | tot_score = mul(phi(w1),phi(w2)) / float(len(w1)*len(w2)) + \ 209 | 0.1 * mul(phi(w1[0:1]), phi(w2[0:1])) + \ 210 | 0.1 * mul(phi(w1[-1:]), phi(w2[-1:])) 211 | 212 | return tot_score 213 | 214 | scored_candidates = [ (-score(wrong, candidate), candidate) 215 | for candidate in candidates ] 216 | 217 | scored_candidates.sort() 218 | 219 | return scored_candidates[0][1] 220 | 221 | def censor_non_alphanum(s): 222 | """ 223 | Returns s with all non-alphanumeric characters replaced with * 224 | """ 225 | 226 | def censor(ch): 227 | if (ch >= 'A' and ch <= 'z') or (ch >= '0' and ch <= '9'): 228 | return ch 229 | return '*' 230 | 231 | return ''.join([censor(ch) for ch in s]) 232 | 233 | environment_variable_essay = """ 234 | PYLEARN2_VIEWER_COMMAND not defined. PLEASE READ THE FOLLOWING MESSAGE CAREFULLY 235 | TO SET UP THIS ENVIRONMENT VARIABLE: 236 | 237 | pylearn2 uses an external program to display images. Because different systems have different 238 | image programs available, pylearn2 requires the user to specify what image viewer program to 239 | use. 240 | 241 | You need to choose an image viewer program that pylearn2 should use. Then tell pylearn2 to use 242 | that image viewer program by defining your PYLEARN2_VIEWER_COMMAND environment variable. 243 | 244 | You need to choose PYLEARN_VIEWER_COMMAND such that running 245 | 246 | ${PYLEARN2_VIEWER_COMMAND} image.png 247 | 248 | in a command prompt on your machine will do the following: 249 | -open an image viewer in a new process. 250 | -not return until you have closed the image. 251 | 252 | Acceptable commands include: 253 | gwenview 254 | eog --new-instance 255 | 256 | This is assuming that you have gwenview or a version of eog that supports --new-instance 257 | installed on your machine. If you don't, install one of those, or figure out a command 258 | that has the above properties that is available from your setup. 259 | 260 | On most linux setups, you can define your environment variable by adding this line to your 261 | ~/.bashrc file: 262 | 263 | export PYLEARN2_VIEWER_COMMAND="eog --new-instance" 264 | 265 | *** YOU MUST INCLUDE THE WORD "export". DO NOT JUST ASSIGN TO THE ENVIRONMENT VARIABLE *** 266 | If you do not include the word "export", the environment variable will be set in your 267 | bash shell, but will not be visible to processes that you launch from it, like the python 268 | interpreter. 269 | 270 | Don't forget that changes from your .bashrc file won't apply until you run 271 | 272 | source ~/.bashrc 273 | 274 | or open a new terminal window. If you're seeing this from an ipython notebook 275 | you'll need to restart the ipython notebook, or maybe modify os.environ from 276 | an ipython cell. 277 | """ 278 | -------------------------------------------------------------------------------- /hebel/version.py: -------------------------------------------------------------------------------- 1 | version = '0.03-dev' 2 | release = '0.02' -------------------------------------------------------------------------------- /hebel_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Hannes Bretschneider 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | 13 | # You should have received a copy of the GNU General Public License along 14 | # with this program; if not, write to the Free Software Foundation, Inc., 15 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 | 17 | import hebel 18 | hebel.init(0) 19 | 20 | import unittest 21 | import random 22 | import numpy as np 23 | from pycuda import gpuarray 24 | from pycuda.curandom import rand as curand 25 | from hebel import sampler, memory_pool 26 | from hebel.models import NeuralNet, NeuralNetRegression 27 | from hebel.optimizers import SGD 28 | from hebel.parameter_updaters import SimpleSGDUpdate, \ 29 | MomentumUpdate, NesterovMomentumUpdate 30 | from hebel.data_providers import MNISTDataProvider, BatchDataProvider 31 | from hebel.monitors import SimpleProgressMonitor 32 | from hebel.schedulers import exponential_scheduler, linear_scheduler_up, \ 33 | constant_scheduler 34 | from hebel.pycuda_ops.matrix import extract_columns, insert_columns 35 | from hebel.pycuda_ops.elementwise import sample_dropout_mask 36 | 37 | 38 | class TestNeuralNetMNIST(unittest.TestCase): 39 | def setUp(self): 40 | self.train_data = MNISTDataProvider('train', 100) 41 | self.test_data = MNISTDataProvider('test') 42 | self.D = self.train_data.D 43 | self.n_out = 10 44 | 45 | def test_relu(self): 46 | model = NeuralNet(n_in=self.D, n_out=self.n_out, 47 | layers=[1000], activation_function='relu', 48 | dropout=True) 49 | optimizer = SGD(model, SimpleSGDUpdate, self.train_data, 50 | self.test_data, 51 | learning_rate_schedule=exponential_scheduler(1., .99), 52 | progress_monitor=SimpleProgressMonitor()) 53 | optimizer.run(20) 54 | self.assertLess(optimizer.progress_monitor.train_error[-1][1], 55 | optimizer.progress_monitor.train_error[0][1]) 56 | del model, optimizer 57 | 58 | def test_momentum(self): 59 | model = NeuralNet(n_in=self.D, n_out=self.n_out, 60 | layers=[1000], activation_function='relu', 61 | dropout=True) 62 | optimizer = SGD(model, MomentumUpdate, self.train_data, 63 | self.test_data, 64 | learning_rate_schedule=exponential_scheduler(1., .99), 65 | momentum_schedule=linear_scheduler_up(.5, .9, 5), 66 | progress_monitor=SimpleProgressMonitor()) 67 | optimizer.run(20) 68 | self.assertLess(optimizer.progress_monitor.train_error[-1][1], 69 | optimizer.progress_monitor.train_error[0][1]) 70 | del model, optimizer 71 | 72 | def test_nesterov_momentum(self): 73 | model = NeuralNet(n_in=self.D, n_out=self.n_out, 74 | layers=[100], activation_function='relu', 75 | dropout=True) 76 | optimizer = SGD(model, NesterovMomentumUpdate, self.train_data, 77 | self.test_data, 78 | learning_rate_schedule=exponential_scheduler(1., .99), 79 | momentum_schedule=linear_scheduler_up(.5, .9, 5), 80 | progress_monitor=SimpleProgressMonitor()) 81 | optimizer.run(20) 82 | self.assertLess(optimizer.progress_monitor.train_error[-1][1], 83 | optimizer.progress_monitor.train_error[0][1]) 84 | del model, optimizer 85 | 86 | 87 | class TestColumnSlicing(unittest.TestCase): 88 | def test_extract_columns(self): 89 | for _ in range(20): 90 | dtype = random.choice((np.float32, np.float64)) 91 | N = np.random.randint(100, 1000) 92 | M = np.random.randint(100, 1000) 93 | a = np.random.randint(0, M) 94 | b = np.random.randint(a + 1, M) 95 | m = b - a 96 | assert m > 0 97 | 98 | X = curand((N, M), dtype) 99 | Y = extract_columns(X, a, b) 100 | 101 | self.assertTrue(np.all(X.get()[:, a:b] == Y.get())) 102 | 103 | def test_insert_columns(self): 104 | for _ in range(20): 105 | dtype = random.choice((np.float32, np.float64)) 106 | N = np.random.randint(100, 1000) 107 | M = np.random.randint(100, 1000) 108 | m = np.random.randint(1, M) 109 | offset = np.random.randint(0, M - m) 110 | 111 | X = curand((N, M), dtype) 112 | Y = curand((N, m), dtype) 113 | insert_columns(Y, X, offset) 114 | 115 | self.assertTrue(np.all(X.get()[:, offset:offset+m] == Y.get())) 116 | 117 | 118 | class TestSampleDropoutMask(unittest.TestCase): 119 | TOL = 1e-3 120 | 121 | def test_sample_dropout_mask(self): 122 | for _ in range(20): 123 | height = 1000 124 | width = 10000 125 | dropout_prob = np.random.rand() 126 | X = sampler.gen_uniform((height, width), np.float32) 127 | dropout_mask = sample_dropout_mask(X, dropout_prob) 128 | dropout_rate = 1. - dropout_mask.get().mean() 129 | 130 | self.assertLess(np.abs(dropout_prob - dropout_rate), self.TOL) 131 | self.assertTrue(np.all((X.get() != 0.) == dropout_mask.get())) 132 | 133 | def test_sample_dropout_mask_columns(self): 134 | for _ in range(20): 135 | height = 10000 136 | width = 10000 137 | dropout_prob = np.random.rand() 138 | X = sampler.gen_uniform((height, width), np.float32) 139 | 140 | start = np.random.randint(0, width - 1000) 141 | end = start + 1000 142 | columns = (start, end) 143 | 144 | dropout_mask = sample_dropout_mask(X, dropout_prob, columns) 145 | dropout_rate = 1. - dropout_mask.get().mean() 146 | 147 | self.assertEqual(dropout_mask.shape, (X.shape[0], end - start)) 148 | self.assertLess(np.abs(dropout_prob - dropout_rate), 149 | self.TOL) 150 | self.assertTrue(np.all((X.get()[:, start:end] != 0.) 151 | == dropout_mask.get())) 152 | 153 | class TestNeuralNetRegression(unittest.TestCase): 154 | def test_neural_net_regression(self): 155 | for _ in range(20): 156 | N = 10000 # Number of data points 157 | D = 100 # Dimensionality of exogenous data 158 | P = 50 # Dimensionality of endogenous data 159 | 160 | W_true = 10 * np.random.rand(D, P) - 5 161 | b_true = 100 * np.random.rand(P) - 50 162 | 163 | X = np.random.randn(N, D) 164 | Y = np.dot(X, W_true) + b_true[np.newaxis, :] + np.random.randn(N, P) 165 | 166 | W_lstsq = np.linalg.lstsq(np.c_[np.ones((N, 1)), X], Y)[0] 167 | b_lstsq = W_lstsq[0] 168 | W_lstsq = W_lstsq[1:] 169 | 170 | data_provider = BatchDataProvider(gpuarray.to_gpu(X.astype(np.float32), 171 | allocator=memory_pool.allocate), 172 | gpuarray.to_gpu(Y.astype(np.float32), 173 | allocator=memory_pool.allocate)) 174 | 175 | model = NeuralNetRegression([], n_in=D, n_out=P) 176 | optimizer = SGD(model, SimpleSGDUpdate, 177 | data_provider, data_provider, 178 | learning_rate_schedule=constant_scheduler(10.), 179 | early_stopping=True) 180 | optimizer.run(100) 181 | 182 | self.assertLess(np.abs(W_lstsq - model.top_layer.W.get()).max(), 183 | 1e-5) 184 | 185 | if __name__ == '__main__': 186 | unittest.main() 187 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from hebel.version import version 3 | 4 | try: 5 | from pypandoc import convert 6 | read_md = lambda f: convert(f, 'rst') 7 | except ImportError: 8 | print("warning: pypandoc module not found, could not convert Markdown to RST") 9 | read_md = lambda f: open(f, 'r').read() 10 | 11 | setup( name='Hebel', 12 | version=version, 13 | description='GPU-Accelerated ' 14 | 'Deep Learning Library in Python', 15 | long_description=read_md('README.md'), 16 | keywords='cuda gpu machine-learning deep-learning neural-networks', 17 | classifiers=[ 18 | 'Development Status :: 3 - Alpha', 19 | 'Intended Audience :: Science/Research', 20 | 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', 21 | 'Programming Language :: C', 22 | 'Programming Language :: Python :: 2.7', 23 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 24 | 'Topic :: Scientific/Engineering :: Image Recognition' 25 | ], 26 | url='https://github.com/hannes-brt/hebel', 27 | author='Hannes Bretschneider', 28 | author_email='hannes@psi.utoronto.ca', 29 | license='GPLv2', 30 | packages=['hebel', 31 | 'hebel.models', 32 | 'hebel.layers', 33 | 'hebel.utils', 34 | 'hebel.pycuda_ops'], 35 | install_requires=[ 36 | 'pycuda', 37 | 'numpy', 38 | 'pyyaml', 39 | 'skdata' 40 | ], 41 | test_suite='nose.collector', 42 | tests_require=['nose'], 43 | scripts=['train_model.py'], 44 | include_package_data=True, 45 | zip_safe=False 46 | ) 47 | -------------------------------------------------------------------------------- /train_model.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (C) 2013 Hannes Bretschneider 3 | 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 2 of the License, or 7 | # (at your option) any later version. 8 | 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program; if not, write to the Free Software Foundation, Inc., 16 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 17 | 18 | from hebel.config import run_from_config 19 | 20 | description = """ Run this script with a yaml configuration file as input. 21 | E.g.: 22 | 23 | python train_model.py examples/mnist_neural_net_deep.yml 24 | 25 | """ 26 | 27 | if __name__ == "__main__": 28 | import argparse 29 | 30 | parser = argparse.ArgumentParser(description=description) 31 | parser.add_argument('config_file') 32 | args = parser.parse_args() 33 | 34 | if not args.config_file.endswith('.yml') and not args.config_file.endswith('.yaml'): 35 | args.config_file = args.config_file + '.yml' 36 | 37 | yaml_src = ''.join(open(args.config_file).readlines()) 38 | 39 | run_from_config(yaml_src) 40 | --------------------------------------------------------------------------------