├── .gitignore ├── .scrutinizer.yml ├── .travis.yml ├── CONTRIBUTING.rst ├── LICENSE ├── README.rst ├── bin └── blocks-continue ├── blocks ├── __init__.py ├── algorithms │ └── __init__.py ├── bricks │ ├── __init__.py │ ├── attention.py │ ├── base.py │ ├── bn.py │ ├── conv.py │ ├── cost.py │ ├── interfaces.py │ ├── lookup.py │ ├── parallel.py │ ├── recurrent │ │ ├── __init__.py │ │ ├── architectures.py │ │ ├── base.py │ │ └── misc.py │ ├── sequence_generators.py │ ├── sequences.py │ ├── simple.py │ └── wrappers.py ├── config.py ├── extensions │ ├── __init__.py │ ├── monitoring.py │ ├── predicates.py │ ├── saveload.py │ ├── stopping.py │ └── training.py ├── filter.py ├── graph │ ├── __init__.py │ ├── annotations.py │ └── bn.py ├── initialization.py ├── log │ ├── __init__.py │ ├── log.py │ └── sqlite.py ├── main_loop.py ├── model.py ├── monitoring │ ├── __init__.py │ ├── aggregation.py │ └── evaluators.py ├── roles.py ├── scripts │ └── __init__.py ├── search.py ├── select.py ├── serialization.py ├── theano_expressions.py ├── utils │ ├── __init__.py │ ├── containers.py │ ├── profile.py │ ├── testing.py │ ├── theano_utils.py │ └── utils.py └── version.py ├── docs ├── _static │ ├── code_quality.png │ ├── mnist.png │ ├── plot_a.png │ ├── plot_cost.png │ ├── sequence_generator_scheme.png │ └── sequence_generator_scheme.svg ├── api │ ├── algorithms.rst │ ├── bricks.rst │ ├── extensions.rst │ ├── filter.rst │ ├── graph.rst │ ├── index.rst │ ├── initialization.rst │ ├── log.rst │ ├── main_loop.rst │ ├── model.rst │ ├── roles.rst │ ├── select.rst │ ├── serialization.rst │ ├── theano_expressions.rst │ └── utils.rst ├── bricks_overview.rst ├── cg.rst ├── conf.py ├── configuration.rst ├── create_your_own_brick.rst ├── development │ ├── docs.rst │ ├── index.rst │ ├── internal_api.rst │ └── pull_request.rst ├── index.rst ├── plotting.rst ├── rnn.rst ├── serialization.rst ├── setup.rst └── tutorial.rst ├── doctests └── __init__.py ├── req-rtd.txt ├── req-scrutinizer.txt ├── req-travis-conda.txt ├── req-travis-docs-pip.txt ├── req-travis-pip.txt ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── algorithms ├── __init__.py └── test_algorithms.py ├── bricks ├── __init__.py ├── test_attention.py ├── test_bn.py ├── test_bricks.py ├── test_conv.py ├── test_cost.py ├── test_interfaces.py ├── test_lookup.py ├── test_recurrent.py ├── test_sequence_generators.py └── test_wrappers.py ├── extensions ├── __init__.py ├── test_extensions.py ├── test_monitoring.py ├── test_progressbar.py ├── test_saveload.py ├── test_stopping.py ├── test_timing.py └── test_training.py ├── graph └── test_bn.py ├── monitoring ├── __init__.py ├── test_aggregation.py ├── test_evaluators.py └── test_monitored_quantity.py ├── test_config.py ├── test_graph.py ├── test_initialization.py ├── test_log.py ├── test_main_loop.py ├── test_model.py ├── test_roles.py ├── test_search.py ├── test_select.py ├── test_serialization.py ├── test_theano_expressions.py ├── test_variable_filter.py └── utils ├── __init__.py ├── test_containers.py ├── test_imports.py └── test_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | 56 | # Eclipse 57 | .project 58 | .pydevproject 59 | .settings/ 60 | -------------------------------------------------------------------------------- /.scrutinizer.yml: -------------------------------------------------------------------------------- 1 | build: 2 | environment: 3 | python: 3.5.0 4 | dependencies: 5 | override: 6 | - pip install -r req-scrutinizer.txt 7 | tests: 8 | override: 9 | - flake8 --filename='blocks-continue' bin 10 | - flake8 blocks doctests tests 11 | - pep257 bin --numpy --ignore=D100,D101,D102,D103 --match='blocks-continue' 12 | - pep257 blocks --numpy --ignore=D100,D101,D102,D103 13 | - pep257 doctests tests --numpy --ignore=D100,D101,D102,D103 --match='.*\.py' 14 | checks: 15 | python: 16 | code_rating: true 17 | duplicate_code: true 18 | format_bad_indentation: 19 | indentation: '4 spaces' 20 | format_mixed_indentation: true 21 | format_line_too_long: 22 | max_length: '79' 23 | imports_relative_import: true 24 | imports_wildcard_import: true 25 | format_bad_whitespace: true 26 | format_multiple_statements: true 27 | basic_invalid_name: 28 | functions: '[a-z_][a-z0-9_]{0,30}$' 29 | variables: '(([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$' 30 | whitelisted_names: '_,floatX,logger,config' 31 | constants: '(([A-Z_][A-Z0-9_]*)|(__.*__))$' 32 | attributes: '(([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$' 33 | arguments: '(([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$' 34 | class_attributes: '([A-Za-z_][A-Za-z0-9_]{0,30}|(__.*__))$' 35 | inline_vars: '[A-Za-z_][A-Za-z0-9_]*$' 36 | classes: '[A-Z_][a-zA-Z0-9]+$' 37 | modules: '(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$' 38 | methods: '[a-z_][a-z0-9_]{0,30}$' 39 | classes_no_self_argument: true 40 | classes_bad_mcs_method_argument: true 41 | classes_bad_classmethod_argument: true 42 | variables_unused_variable: true 43 | variables_unused_import: true 44 | variables_used_before_assignment: true 45 | variables_undefined_variable: true 46 | variables_undefined_loop_variable: true 47 | variables_redefined_outer_name: true 48 | variables_redefined_builtin: true 49 | variables_redefine_in_handler: true 50 | variables_no_name_in_module: true 51 | variables_global_variable_undefined: true 52 | variables_global_variable_not_assigned: true 53 | variables_global_statement: true 54 | typecheck_unexpected_keyword_arg: true 55 | variables_global_at_module_level: true 56 | variables_unused_wildcard_import: true 57 | variables_unused_argument: true 58 | variables_unpacking_non_sequence: true 59 | variables_undefined_all_variable: true 60 | variables_unbalanced_tuple_unpacking: true 61 | variables_invalid_all_object: true 62 | typecheck_too_many_function_args: true 63 | typecheck_redundant_keyword_arg: true 64 | typecheck_not_callable: true 65 | typecheck_no_member: true 66 | typecheck_missing_kwoa: true 67 | typecheck_maybe_no_member: true 68 | typecheck_duplicate_keyword_arg: true 69 | typecheck_assignment_from_none: true 70 | typecheck_assignment_from_no_return: true 71 | string_unused_format_string_key: true 72 | string_truncated_format_string: true 73 | string_too_many_format_args: true 74 | string_too_few_format_args: true 75 | string_mixed_format_string: true 76 | string_missing_format_string_key: true 77 | string_format_needs_mapping: true 78 | string_constant_anomalous_unicode_escape_in_string: true 79 | string_constant_anomalous_backslash_in_string: true 80 | string_bad_str_strip_call: true 81 | string_bad_format_string_key: true 82 | string_bad_format_character: true 83 | open_mode_bad_open_mode: true 84 | newstyle_bad_super_call: true 85 | logging_unsupported_format: true 86 | logging_too_many_args: true 87 | logging_too_few_args: true 88 | logging_not_lazy: true 89 | logging_format_truncated: true 90 | imports_reimported: true 91 | imports_import_self: true 92 | imports_deprecated_module: true 93 | imports_cyclic_import: true 94 | format_unnecessary_semicolon: true 95 | format_trailing_whitespace: true 96 | format_superfluous_parens: true 97 | format_old_ne_operator: true 98 | format_missing_final_newline: true 99 | format_lowercase_l_suffix: true 100 | format_backtick: true 101 | exceptions_raising_string: true 102 | exceptions_raising_non_exception: true 103 | exceptions_raising_bad_type: true 104 | exceptions_pointless_except: true 105 | exceptions_notimplemented_raised: true 106 | exceptions_catching_non_exception: true 107 | exceptions_broad_except: true 108 | exceptions_binary_op_exception: true 109 | exceptions_bare_except: true 110 | exceptions_bad_except_order: true 111 | design_interface_not_implemented: true 112 | design_abstract_class_not_used: true 113 | design_abstract_class_little_used: true 114 | classes_valid_slots: true 115 | classes_super_init_not_called: true 116 | classes_signature_differs: true 117 | classes_protected_access: true 118 | classes_non_parent_init_called: true 119 | classes_non_iterator_returned: true 120 | classes_no_method_argument: true 121 | classes_no_init: true 122 | classes_missing_interface_method: true 123 | classes_method_hidden: true 124 | classes_interface_is_not_class: true 125 | classes_bad_staticmethod_argument: true 126 | classes_bad_mcs_classmethod_argument: true 127 | classes_bad_context_manager: true 128 | classes_arguments_differ: true 129 | classes_access_member_before_definition: true 130 | basic_yield_outside_function: true 131 | basic_useless_else_on_loop: true 132 | basic_unreachable: true 133 | basic_unnecessary_pass: true 134 | basic_unnecessary_lambda: true 135 | basic_return_outside_function: true 136 | basic_return_in_init: true 137 | basic_return_arg_in_generator: true 138 | basic_pointless_string_statement: true 139 | basic_pointless_statement: true 140 | basic_old_raise_syntax: true 141 | basic_not_in_loop: true 142 | basic_nonexistent_operator: true 143 | basic_missing_reversed_argument: true 144 | basic_missing_module_attribute: true 145 | basic_lost_exception: true 146 | basic_init_is_generator: true 147 | basic_function_redefined: true 148 | basic_expression_not_assigned: true 149 | basic_exec_used: true 150 | basic_eval_used: true 151 | basic_empty_docstring: true 152 | basic_duplicate_key: true 153 | basic_duplicate_argument_name: true 154 | basic_dangerous_default_value: true 155 | basic_bad_reversed_sequence: true 156 | basic_assert_on_tuple: true 157 | basic_abstract_class_instantiated: true 158 | filter: 159 | paths: 160 | - blocks/* 161 | - bin/* 162 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | cache: 3 | directories: 4 | - $TRAVIS_BUILD_DIR/data 5 | branches: 6 | only: 7 | - master 8 | - stable 9 | language: python 10 | matrix: 11 | include: 12 | - python: 2.7 13 | env: TESTS=blocks FLOATX=float32 14 | - python: 2.7 15 | env: TESTS=blocks FLOATX=float32 DB=sqlite 16 | - python: 3.5 17 | env: TESTS=blocks FLOATX=float64 18 | - python: 2.7 19 | env: TESTS=blocks-examples FLOATX=float32 20 | - python: 2.7 21 | env: TESTS=blocks-examples FLOATX=float32 DB=sqlite 22 | - python: 3.5 23 | env: TESTS=blocks-examples FLOATX=float64 24 | - python: 2.7 25 | env: TESTS=documentation FLOATX=float32 26 | - python: 3.5 27 | env: TESTS=documentation FLOATX=float64 28 | before_install: 29 | - # Setup Python environment with BLAS libraries 30 | - | 31 | if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then 32 | wget -q http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 33 | else 34 | wget -q http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 35 | fi 36 | - chmod +x miniconda.sh 37 | - ./miniconda.sh -b -p $HOME/miniconda 38 | - export PATH=$HOME/miniconda/bin:$PATH 39 | - conda update -q --yes conda 40 | - export FUEL_DATA_PATH=$TRAVIS_BUILD_DIR/data 41 | - export MKL_THREADING_LAYER=GNU # For Theano 42 | install: 43 | # Install all Python dependencies 44 | - conda install -q --yes python=$TRAVIS_PYTHON_VERSION --file req-travis-conda.txt 45 | - | 46 | if [[ $TESTS == 'documentation' ]]; then 47 | conda install -q --yes python=$TRAVIS_PYTHON_VERSION graphviz==2.38.0 48 | fi 49 | - pip install -q -r req-travis-pip.txt 50 | - | 51 | if [[ $TESTS == 'documentation' ]]; then 52 | pip install -q -r req-travis-docs-pip.txt 53 | fi 54 | script: 55 | - pip install -e . -r requirements.txt # Tests setup.py 56 | - curl https://raw.githubusercontent.com/mila-udem/fuel/master/.travis-data.sh | bash -s -- mnist 57 | - # Must export environment variable so that the subprocess is aware of it 58 | - export THEANO_FLAGS=floatX=$FLOATX,optimizer=fast_compile 59 | - export FUEL_FLOATX=$FLOATX 60 | - "if [[ $DB == 'sqlite' ]]; then echo 'log_backend: sqlite' > ~/.blocksrc; fi" 61 | - # Running nose2 within coverage makes imports count towards coverage 62 | - function fail { export FAILED=1; } 63 | - | 64 | if [[ $TESTS == 'blocks' ]]; then 65 | coverage run -p --source=blocks -m nose2.__main__ -v doctests || fail 66 | echo "[global]\ncxx =" > ~/.theanorc 67 | coverage run -p --source=blocks -m nose2.__main__ -v tests || fail 68 | return $FAILED 69 | fi 70 | - | 71 | if [[ $TESTS == 'blocks-examples' ]]; then 72 | cd $TRAVIS_BUILD_DIR 73 | git clone https://github.com/mila-udem/blocks-examples.git 74 | cd blocks-examples 75 | nose2 -v tests 76 | fi 77 | - | 78 | if [[ $TESTS == 'documentation' ]]; then 79 | sphinx-build -vW docs html 80 | fi 81 | after_script: 82 | - | 83 | if [[ $TESTS == 'blocks' ]]; then 84 | coverage combine 85 | coveralls 86 | fi 87 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Filing an issue 2 | =============== 3 | If you are having a problem, then *before* filing an issue, please verify 4 | the following: 5 | 6 | * That you are using a **compatible version of Python** -- this means version 7 | 3.4 or newer for mainline Python. Legacy Python support is limited to 2.7 and 8 | will eventually be dropped, and not all features may be available; users are 9 | encouraged to move to Python 3.x as soon as possible. 10 | * That you are using **the latest version of Theano** from the GitHub ``master`` 11 | branch. Blocks is developed concurrently with Theano's bleeding edge development 12 | and many problems with using Blocks can be traced to using the latest stable 13 | version of Theano (or an insufficiently recent GitHub checkout). Please see the 14 | `Blocks installation instructions`_ for more details. 15 | * You are using the latest Blocks (and Fuel_) from the GitHub ``master`` 16 | branch. If you are using ``stable``, then if possible, please check if your 17 | problem persists if you switch to using ``master``. It may still be worth 18 | filing the issue if your problem is fixed in ``master``, if it is a serious 19 | enough problem to warrant backporting a fix to ``stable``. 20 | * That your issue is about the software itself -- either a bug report, feature 21 | request, question on how to accomplish a certain defined operation within 22 | Blocks, etc. -- and not a general machine learning or neural networks question. 23 | 24 | Making a pull request 25 | ===================== 26 | 27 | Blocks development occurs in two separate branches: The ``master`` branch is the 28 | development branch. If you want to contribute a new feature or change the 29 | behavior of Blocks in any way, please make your pull request to this branch. 30 | 31 | The ``stable`` branch contains the latest release of Blocks. If you are fixing a 32 | bug (that is present in the latest release), make a pull request to this branch. 33 | If the bug is present in both the ``master`` and ``stable`` branch, two separate 34 | pull requests are in order. 35 | 36 | Want to contribute? 37 | =================== 38 | 39 | *Great!* We're always happy to help people contribute to Blocks. Here are 40 | few steps to help you get started: 41 | 42 | GitHub crash course 43 | If you're new to GitHub, be sure to check out our `quick reference`_ to the 44 | pull-request workflow, which will show you how to fork Blocks, create a new 45 | branch, and make a pull-request of your changes. 46 | 47 | Writing documentation 48 | If you're writing docstrings, please make sure that they comply with the 49 | `NumPy docstring standard`_. All of our documentation is written in 50 | reStructuredText_. 51 | 52 | Formatting guidelines 53 | We're pretty strict about following `PEP 8`_ guidelines. See `the 54 | documentation`_ for some tips on how to make sure your code is fully 55 | compliant. 56 | 57 | Code guidelines 58 | If you're going to write a lot of code, have a read through our `coding 59 | guidelines`_. 60 | 61 | License 62 | Blocks is licensed under the `MIT license`_, with portions licensed under 63 | the 3-clause BSD license. By contributing you agree to license your 64 | contributions under the MIT license. 65 | 66 | Questions about using Blocks? 67 | ============================= 68 | 69 | Please send your questions to the `Blocks users mailing list`_. You might not 70 | be the first one with this question or problem, so be sure to search both the 71 | mailing list and the GitHub issues to make sure the answer isn't out there 72 | already. 73 | 74 | .. _Blocks users mailing list: https://groups.google.com/forum/#!forum/blocks-users 75 | .. _Blocks installation instructions: https://blocks.readthedocs.org/en/latest/setup.html 76 | .. _Fuel: http://fuel.readthedocs.org/ 77 | .. _quick reference: https://blocks.readthedocs.org/en/latest/development/pull_request.html 78 | .. _the documentation: https://blocks.readthedocs.org/en/latest/development/index.html#formatting-guidelines 79 | .. _coding guidelines: https://blocks.readthedocs.org/en/latest/development/index.html#code-guidelines 80 | .. _PEP 8: https://www.python.org/dev/peps/pep-0008/ 81 | .. _NumPy docstring standard: https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt 82 | .. _reStructuredText: http://docutils.sourceforge.net/docs/user/rst/quickref.html 83 | .. _MIT license: https://raw.githubusercontent.com/mila-udem/blocks/master/LICENSE 84 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Université de Montréal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | ---- 24 | 25 | Parts of this library use (modified) code from the Pylearn2 framework, which is 26 | distributed under BSD-3 and available at https://github.com/lisa-lab/pylearn2. 27 | 28 | Copyright (c) 2011--2014, Université de Montréal 29 | All rights reserved. 30 | 31 | Redistribution and use in source and binary forms, with or without 32 | modification, are permitted provided that the following conditions are met: 33 | 34 | 1. Redistributions of source code must retain the above copyright notice, this 35 | list of conditions and the following disclaimer. 36 | 37 | 2. Redistributions in binary form must reproduce the above copyright notice, 38 | this list of conditions and the following disclaimer in the documentation 39 | and/or other materials provided with the distribution. 40 | 41 | 3. Neither the name of the copyright holder nor the names of its contributors 42 | may be used to endorse or promote products derived from this software without 43 | specific prior written permission. 44 | 45 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 46 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 47 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 48 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 49 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 51 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 52 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 53 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 54 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 55 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://img.shields.io/coveralls/mila-udem/blocks.svg 2 | :target: https://coveralls.io/r/mila-udem/blocks 3 | 4 | .. image:: https://travis-ci.org/mila-udem/blocks.svg?branch=master 5 | :target: https://travis-ci.org/mila-udem/blocks 6 | 7 | .. image:: https://readthedocs.org/projects/blocks/badge/?version=latest 8 | :target: https://blocks.readthedocs.org/ 9 | 10 | .. image:: https://img.shields.io/scrutinizer/g/mila-udem/blocks.svg 11 | :target: https://scrutinizer-ci.com/g/mila-udem/blocks/ 12 | 13 | .. image:: https://requires.io/github/mila-udem/blocks/requirements.svg?branch=master 14 | :target: https://requires.io/github/mila-udem/blocks/requirements/?branch=master 15 | 16 | .. image:: https://img.shields.io/badge/license-MIT-blue.svg 17 | :target: https://github.com/mila-udem/blocks/blob/master/LICENSE 18 | 19 | Blocks 20 | ====== 21 | Blocks is a framework that helps you build neural network models on top of 22 | Theano. Currently it supports and provides: 23 | 24 | * Constructing parametrized Theano operations, called "bricks" 25 | * Pattern matching to select variables and bricks in large models 26 | * Algorithms to optimize your model 27 | * Saving and resuming of training 28 | * Monitoring and analyzing values during training progress (on the training set 29 | as well as on test sets) 30 | * Application of graph transformations, such as dropout 31 | 32 | In the future we also hope to support: 33 | 34 | * Dimension, type and axes-checking 35 | 36 | See Also: 37 | * `Fuel`_, the data processing engine developed primarily for Blocks. 38 | * `Blocks-examples`_ for maintained examples of scripts using Blocks. 39 | * `Blocks-extras`_ for semi-maintained additional Blocks components. 40 | 41 | Citing Blocks 42 | If you use Blocks or Fuel in your work, we'd really appreciate it if you could cite the following paper: 43 | 44 | Bart van Merriënboer, Dzmitry Bahdanau, Vincent Dumoulin, Dmitriy Serdyuk, David Warde-Farley, Jan Chorowski, and Yoshua Bengio, "`Blocks and Fuel: Frameworks for deep learning`_," *arXiv preprint arXiv:1506.00619 [cs.LG]*, 2015. 45 | 46 | Documentation 47 | Please see the documentation_ for more information. 48 | 49 | Contributing 50 | If you want to contribute, please make sure to read the `developer guidelines`_. 51 | 52 | .. _documentation: http://blocks.readthedocs.org 53 | .. _developer guidelines: http://blocks.readthedocs.org/en/latest/development/index.html 54 | .. _Blocks and Fuel\: Frameworks for deep learning: http://arxiv.org/abs/1506.00619 55 | .. _Blocks-examples: https://github.com/mila-udem/blocks-examples 56 | .. _Blocks-extras: https://github.com/mila-udem/blocks-extras 57 | .. _Fuel: https://github.com/mila-udem/fuel 58 | -------------------------------------------------------------------------------- /bin/blocks-continue: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import logging 3 | from argparse import ArgumentParser 4 | 5 | from blocks.serialization import continue_training 6 | 7 | if __name__ == "__main__": 8 | logging.basicConfig() 9 | 10 | parser = ArgumentParser("Continues your pickled main loop") 11 | parser.add_argument( 12 | "path", help="A path to a file with a pickled main loop") 13 | args = parser.parse_args() 14 | 15 | continue_training(args.path) 16 | -------------------------------------------------------------------------------- /blocks/__init__.py: -------------------------------------------------------------------------------- 1 | """The blocks library for parametrized Theano ops.""" 2 | import blocks.version 3 | __version__ = blocks.version.version 4 | -------------------------------------------------------------------------------- /blocks/bricks/__init__.py: -------------------------------------------------------------------------------- 1 | """Bricks are parameterized Theano operations.""" 2 | from __future__ import absolute_import 3 | 4 | from .base import application, Brick, lazy 5 | from .bn import (BatchNormalization, SpatialBatchNormalization, 6 | BatchNormalizedMLP) 7 | from .interfaces import (Activation, Feedforward, Initializable, LinearLike, 8 | Random) 9 | from .recurrent import (BaseRecurrent, SimpleRecurrent, LSTM, GatedRecurrent, 10 | Bidirectional, RecurrentStack, 11 | RECURRENTSTACK_SEPARATOR, recurrent) 12 | from .simple import (Linear, Bias, Maxout, LinearMaxout, Identity, Tanh, 13 | Logistic, Softplus, Rectifier, LeakyRectifier, 14 | Softmax, NDimensionalSoftmax) 15 | from .sequences import Sequence, FeedforwardSequence, MLP 16 | from .wrappers import WithExtraDims 17 | 18 | __all__ = ('application', 'Brick', 'lazy', 'BatchNormalization', 19 | 'SpatialBatchNormalization', 'BatchNormalizedMLP', 20 | 'Activation', 'Feedforward', 'Initializable', 'LinearLike', 21 | 'Random', 'Linear', 'Bias', 'Maxout', 'LinearMaxout', 'Identity', 22 | 'Tanh', 'Logistic', 'Softplus', 'Rectifier', 'LeakyRectifier', 23 | 'Softmax', 'NDimensionalSoftmax', 'Sequence', 24 | 'FeedforwardSequence', 'MLP', 'WithExtraDims', 25 | 'BaseRecurrent', 'SimpleRecurrent', 'LSTM', 'GatedRecurrent', 26 | 'Bidirectional', 'RecurrentStack', 'RECURRENTSTACK_SEPARATOR', 27 | 'recurrent') 28 | -------------------------------------------------------------------------------- /blocks/bricks/cost.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import theano 4 | from theano import tensor 5 | from six import add_metaclass 6 | 7 | from blocks.bricks.base import application, Brick 8 | 9 | 10 | @add_metaclass(ABCMeta) 11 | class Cost(Brick): 12 | @abstractmethod 13 | @application 14 | def apply(self, *args, **kwargs): 15 | pass 16 | 17 | 18 | @add_metaclass(ABCMeta) 19 | class CostMatrix(Cost): 20 | """Base class for costs which can be calculated element-wise. 21 | 22 | Assumes that the data has format (batch, features). 23 | 24 | """ 25 | @application(outputs=["cost"]) 26 | def apply(self, *args, **kwargs): 27 | return self.cost_matrix(*args, **kwargs).sum(axis=1).mean() 28 | 29 | @abstractmethod 30 | @application 31 | def cost_matrix(self, *args, **kwargs): 32 | pass 33 | 34 | 35 | class BinaryCrossEntropy(CostMatrix): 36 | @application 37 | def cost_matrix(self, y, y_hat): 38 | cost = tensor.nnet.binary_crossentropy(y_hat, y) 39 | return cost 40 | 41 | 42 | class AbsoluteError(CostMatrix): 43 | @application 44 | def cost_matrix(self, y, y_hat): 45 | cost = abs(y - y_hat) 46 | return cost 47 | 48 | 49 | class SquaredError(CostMatrix): 50 | @application 51 | def cost_matrix(self, y, y_hat): 52 | cost = tensor.sqr(y - y_hat) 53 | return cost 54 | 55 | 56 | class CategoricalCrossEntropy(Cost): 57 | @application(outputs=["cost"]) 58 | def apply(self, y, y_hat): 59 | cost = tensor.nnet.categorical_crossentropy(y_hat, y).mean() 60 | return cost 61 | 62 | 63 | class MisclassificationRate(Cost): 64 | """Calculates the misclassification rate for a mini-batch. 65 | 66 | Parameters 67 | ---------- 68 | top_k : int, optional 69 | If the ground truth class is within the `top_k` highest 70 | responses for a given example, the model is considered 71 | to have predicted correctly. Default: 1. 72 | 73 | Notes 74 | ----- 75 | Ties for `top_k`-th place are broken pessimistically, i.e. 76 | in the (in practice, rare) case that there is a tie for `top_k`-th 77 | highest output for a given example, it is considered an incorrect 78 | prediction. 79 | 80 | """ 81 | def __init__(self, top_k=1): 82 | self.top_k = top_k 83 | super(MisclassificationRate, self).__init__() 84 | 85 | @application(outputs=["error_rate"]) 86 | def apply(self, y, y_hat): 87 | # Support checkpoints that predate self.top_k 88 | top_k = getattr(self, 'top_k', 1) 89 | if top_k == 1: 90 | mistakes = tensor.neq(y, y_hat.argmax(axis=1)) 91 | else: 92 | row_offsets = theano.tensor.arange(0, y_hat.flatten().shape[0], 93 | y_hat.shape[1]) 94 | truth_score = y_hat.flatten()[row_offsets + y] 95 | # We use greater than _or equals_ here so that the model 96 | # _must_ have its guess in the top k, and cannot extend 97 | # its effective "list of predictions" by tying lots of things 98 | # for k-th place. 99 | higher_scoring = tensor.ge(y_hat, truth_score.dimshuffle(0, 'x')) 100 | # Because we used greater-than-or-equal we have to correct for 101 | # counting the true label. 102 | num_higher = higher_scoring.sum(axis=1) - 1 103 | mistakes = tensor.ge(num_higher, top_k) 104 | return mistakes.mean(dtype=theano.config.floatX) 105 | -------------------------------------------------------------------------------- /blocks/bricks/lookup.py: -------------------------------------------------------------------------------- 1 | """Introduces Lookup brick.""" 2 | from blocks.bricks import Initializable, Feedforward 3 | from blocks.bricks.base import application, lazy 4 | from blocks.roles import WEIGHT, add_role 5 | from blocks.utils import check_theano_variable, shared_floatx_nans 6 | 7 | 8 | class LookupTable(Initializable, Feedforward): 9 | """Encapsulates representations of a range of integers. 10 | 11 | This brick can be used to embed integers, e.g. word indices, 12 | into a vector space. 13 | 14 | Parameters 15 | ---------- 16 | length : int 17 | The size of the lookup table, or in other words, one plus the 18 | maximum index for which a representation is contained. 19 | dim : int 20 | The dimensionality of representations. 21 | 22 | Notes 23 | ----- 24 | See :class:`.Initializable` for initialization parameters. 25 | 26 | """ 27 | has_bias = False 28 | 29 | @lazy(allocation=['length', 'dim']) 30 | def __init__(self, length, dim, **kwargs): 31 | super(LookupTable, self).__init__(**kwargs) 32 | self.length = length 33 | self.dim = dim 34 | 35 | @property 36 | def W(self): 37 | return self.parameters[0] 38 | 39 | def _allocate(self): 40 | self.parameters.append(shared_floatx_nans((self.length, self.dim), 41 | name='W')) 42 | add_role(self.parameters[-1], WEIGHT) 43 | 44 | def _initialize(self): 45 | self.weights_init.initialize(self.W, self.rng) 46 | 47 | @application(inputs=['indices'], outputs=['output']) 48 | def apply(self, indices): 49 | """Perform lookup. 50 | 51 | Parameters 52 | ---------- 53 | indices : :class:`~tensor.TensorVariable` 54 | The indices of interest. The dtype must be integer. 55 | 56 | Returns 57 | ------- 58 | output : :class:`~tensor.TensorVariable` 59 | Representations for the indices of the query. Has :math:`k+1` 60 | dimensions, where :math:`k` is the number of dimensions of the 61 | `indices` parameter. The last dimension stands for the 62 | representation element. 63 | 64 | """ 65 | check_theano_variable(indices, None, ("int", "uint")) 66 | output_shape = [indices.shape[i] 67 | for i in range(indices.ndim)] + [self.dim] 68 | return self.W[indices.flatten()].reshape(output_shape) 69 | 70 | def get_dim(self, name): 71 | if name == 'output': 72 | return self.dim 73 | if name == 'indices': 74 | return 0 75 | return super(LookupTable, self).get_dim(name) 76 | 77 | @property 78 | def input_dim(self): 79 | return 0 80 | 81 | @input_dim.setter 82 | def input_dim(self, dim): 83 | if dim != 0: 84 | raise ValueError("LookupTable input must be integer") 85 | 86 | @property 87 | def output_dim(self): 88 | return self.dim 89 | 90 | @output_dim.setter 91 | def output_dim(self, dim): 92 | self.dim = dim 93 | -------------------------------------------------------------------------------- /blocks/bricks/recurrent/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseRecurrent, recurrent 2 | from .architectures import SimpleRecurrent, LSTM, GatedRecurrent 3 | from .misc import Bidirectional, RecurrentStack, RECURRENTSTACK_SEPARATOR 4 | 5 | 6 | __all__ = ("BaseRecurrent", "recurrent", "SimpleRecurrent", "LSTM", 7 | "GatedRecurrent", "Bidirectional", "RecurrentStack", 8 | "RECURRENTSTACK_SEPARATOR") 9 | -------------------------------------------------------------------------------- /blocks/bricks/sequences.py: -------------------------------------------------------------------------------- 1 | """Bricks that compose together other bricks in linear sequences.""" 2 | import copy 3 | from toolz import interleave, unique 4 | from picklable_itertools.extras import equizip 5 | 6 | from ..utils import pack 7 | from .base import Brick, application, lazy 8 | from .interfaces import Feedforward, Initializable 9 | from .simple import Linear 10 | 11 | 12 | class Sequence(Brick): 13 | """A sequence of bricks. 14 | 15 | This brick applies a sequence of bricks, assuming that their in- and 16 | outputs are compatible. 17 | 18 | Parameters 19 | ---------- 20 | application_methods : list 21 | List of :class:`.BoundApplication` or :class:`.Brick` to apply. 22 | For :class:`.Brick`s, the ``.apply`` method is used. 23 | 24 | """ 25 | def __init__(self, application_methods, **kwargs): 26 | pairs = ((a.apply, a) if isinstance(a, Brick) else (a, a.brick) 27 | for a in application_methods) 28 | self.application_methods, bricks = zip(*pairs) 29 | kwargs.setdefault('children', []).extend(unique(bricks)) 30 | super(Sequence, self).__init__(**kwargs) 31 | 32 | @application 33 | def apply(self, *args): 34 | child_input = args 35 | for application_method in self.application_methods: 36 | output = application_method(*pack(child_input)) 37 | child_input = output 38 | return output 39 | 40 | @apply.property('inputs') 41 | def apply_inputs(self): 42 | return self.application_methods[0].inputs 43 | 44 | @apply.property('outputs') 45 | def apply_outputs(self): 46 | return self.application_methods[-1].outputs 47 | 48 | 49 | class FeedforwardSequence(Sequence, Feedforward): 50 | """A sequence where the first and last bricks are feedforward. 51 | 52 | Parameters 53 | ---------- 54 | application_methods : list 55 | List of :class:`.BoundApplication` to apply. The first and last 56 | application method should belong to a :class:`Feedforward` brick. 57 | 58 | """ 59 | @property 60 | def input_dim(self): 61 | return self.children[0].input_dim 62 | 63 | @input_dim.setter 64 | def input_dim(self, value): 65 | self.children[0].input_dim = value 66 | 67 | @property 68 | def output_dim(self): 69 | return self.children[-1].output_dim 70 | 71 | @output_dim.setter 72 | def output_dim(self, value): 73 | self.children[-1].output_dim = value 74 | 75 | 76 | class MLP(FeedforwardSequence, Initializable): 77 | """A simple multi-layer perceptron. 78 | 79 | Parameters 80 | ---------- 81 | activations : list of :class:`.Brick`, :class:`.BoundApplication`, 82 | or ``None`` 83 | A list of activations to apply after each linear transformation. 84 | Give ``None`` to not apply any activation. It is assumed that the 85 | application method to use is ``apply``. Required for 86 | :meth:`__init__`. 87 | dims : list of ints 88 | A list of input dimensions, as well as the output dimension of the 89 | last layer. Required for :meth:`~.Brick.allocate`. 90 | prototype : :class:`.Brick`, optional 91 | The transformation prototype. A copy will be created for every 92 | activation. If not provided, an instance of :class:`~simple.Linear` 93 | will be used. 94 | 95 | Notes 96 | ----- 97 | See :class:`Initializable` for initialization parameters. 98 | 99 | Note that the ``weights_init``, ``biases_init`` (as well as 100 | ``use_bias`` if set to a value other than the default of ``None``) 101 | configurations will overwrite those of the layers each time the 102 | :class:`MLP` is re-initialized. For more fine-grained control, push the 103 | configuration to the child layers manually before initialization. 104 | 105 | >>> from blocks.bricks import Tanh 106 | >>> from blocks.initialization import IsotropicGaussian, Constant 107 | >>> mlp = MLP(activations=[Tanh(), None], dims=[30, 20, 10], 108 | ... weights_init=IsotropicGaussian(), 109 | ... biases_init=Constant(1)) 110 | >>> mlp.push_initialization_config() # Configure children 111 | >>> mlp.children[0].weights_init = IsotropicGaussian(0.1) 112 | >>> mlp.initialize() 113 | 114 | """ 115 | @lazy(allocation=['dims']) 116 | def __init__(self, activations, dims, prototype=None, **kwargs): 117 | self.activations = activations 118 | self.prototype = Linear() if prototype is None else prototype 119 | self.linear_transformations = [] 120 | for i in range(len(activations)): 121 | linear = copy.deepcopy(self.prototype) 122 | name = self.prototype.__class__.__name__.lower() 123 | linear.name = '{}_{}'.format(name, i) 124 | self.linear_transformations.append(linear) 125 | if not dims: 126 | dims = [None] * (len(activations) + 1) 127 | self.dims = dims 128 | # Interleave the transformations and activations 129 | applications = [a for a in interleave([self.linear_transformations, 130 | activations]) if a is not None] 131 | super(MLP, self).__init__(applications, **kwargs) 132 | 133 | @property 134 | def input_dim(self): 135 | return self.dims[0] 136 | 137 | @input_dim.setter 138 | def input_dim(self, value): 139 | self.dims[0] = value 140 | 141 | @property 142 | def output_dim(self): 143 | return self.dims[-1] 144 | 145 | @output_dim.setter 146 | def output_dim(self, value): 147 | self.dims[-1] = value 148 | 149 | def _push_allocation_config(self): 150 | if not len(self.dims) - 1 == len(self.linear_transformations): 151 | raise ValueError 152 | for input_dim, output_dim, layer in \ 153 | equizip(self.dims[:-1], self.dims[1:], 154 | self.linear_transformations): 155 | layer.input_dim = input_dim 156 | layer.output_dim = output_dim 157 | if getattr(self, 'use_bias', None) is not None: 158 | layer.use_bias = self.use_bias 159 | -------------------------------------------------------------------------------- /blocks/extensions/predicates.py: -------------------------------------------------------------------------------- 1 | class OnLogRecord(object): 2 | """Trigger a callback when a certain log record is found. 3 | 4 | Parameters 5 | ---------- 6 | record_name : str 7 | The record name to check. 8 | 9 | """ 10 | def __init__(self, record_name): 11 | self.record_name = record_name 12 | 13 | def __call__(self, log): 14 | return bool(log.current_row.get(self.record_name, False)) 15 | 16 | def __eq__(self, other): 17 | return (type(other) == type(self) and 18 | other.record_name == self.record_name) 19 | -------------------------------------------------------------------------------- /blocks/extensions/saveload.py: -------------------------------------------------------------------------------- 1 | """Extensions for saving and loading the state of a training process.""" 2 | import os.path 3 | import logging 4 | 5 | from blocks.extensions import SimpleExtension 6 | from blocks.utils import reraise_as 7 | from blocks.serialization import (secure_dump, load, dump_and_add_to_dump, 8 | load_parameters) 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | LOADED_FROM = "loaded_from" 13 | SAVED_TO = "saved_to" 14 | 15 | 16 | class Checkpoint(SimpleExtension): 17 | """Saves a pickled version of the main loop to the disk. 18 | 19 | The pickled main loop can be later reloaded and training can be 20 | resumed. 21 | 22 | Makes a `SAVED_TO` record in the log with the serialization destination 23 | in the case of success and ``None`` in the case of failure. The 24 | value of the record is a tuple of paths to which saving was done 25 | (there can be more than one if the user added a condition 26 | with an argument, see :meth:`do` docs). 27 | 28 | Parameters 29 | ---------- 30 | path : str 31 | The destination path for pickling. 32 | parameters : list, optional 33 | The parameters to save separately. If None, the parameters from 34 | the model (main_loop.model.parameters) are saved. 35 | save_separately : list of str, optional 36 | The list of the main loop's attributes to be saved (copied) 37 | in a separate file in the tar archive. It may be used for example 38 | to save the log separetely. The name of the attribute will be used 39 | as name in the tar file. 40 | save_main_loop : bool 41 | Choose whether to save the main loop or not. This can be useful 42 | for example if you are only interested in saving the parameters, 43 | but not the whole main loop. Defaults to `True`. 44 | use_cpickle : bool 45 | See documentation of :func:`~blocks.serialization.dump`. 46 | 47 | Notes 48 | ----- 49 | Using pickling for saving the whole main loop object comes with 50 | certain limitations: 51 | 52 | * Theano computation graphs build in the GPU-mode 53 | (`theano.config.device == "gpu"`) can not be used in the usual mode 54 | (and vice-versa). Therefore using this extension binds you to using 55 | only one kind of device. 56 | 57 | 58 | """ 59 | def __init__(self, path, parameters=None, save_separately=None, 60 | save_main_loop=True, use_cpickle=False, **kwargs): 61 | kwargs.setdefault("after_training", True) 62 | super(Checkpoint, self).__init__(**kwargs) 63 | self.path = path 64 | self.parameters = parameters 65 | self.save_separately = save_separately 66 | self.save_main_loop = save_main_loop 67 | self.use_cpickle = use_cpickle 68 | 69 | def do(self, callback_name, *args): 70 | """Pickle the main loop object to the disk. 71 | 72 | If `*args` contain an argument from user, it is treated as 73 | saving path to be used instead of the one given at the 74 | construction stage. 75 | 76 | """ 77 | logger.info("Checkpointing has started") 78 | _, from_user = self.parse_args(callback_name, args) 79 | try: 80 | path = self.path 81 | if from_user: 82 | path, = from_user 83 | to_add = None 84 | if self.save_separately: 85 | to_add = {attr: getattr(self.main_loop, attr) for attr in 86 | self.save_separately} 87 | if self.parameters is None: 88 | if hasattr(self.main_loop, 'model'): 89 | self.parameters = self.main_loop.model.parameters 90 | object_ = None 91 | if self.save_main_loop: 92 | object_ = self.main_loop 93 | secure_dump(object_, path, 94 | dump_function=dump_and_add_to_dump, 95 | parameters=self.parameters, 96 | to_add=to_add, 97 | use_cpickle=self.use_cpickle) 98 | except Exception: 99 | path = None 100 | raise 101 | finally: 102 | already_saved_to = self.main_loop.log.current_row.get(SAVED_TO, ()) 103 | self.main_loop.log.current_row[SAVED_TO] = (already_saved_to + 104 | (path,)) 105 | logger.info("Checkpointing has finished") 106 | 107 | 108 | class Load(SimpleExtension): 109 | """Loads a saved checkpoint into the main loop. 110 | 111 | Makes a `LOADED_FROM` record in the log with the dump path. 112 | 113 | Parameters 114 | ---------- 115 | path : str 116 | The path to the folder with dump. 117 | load_iteration_state : bool 118 | If `True`, load the iteration state. This can be useful when your 119 | model has very long epochs, and you want to resume when you were in 120 | the middle of one. Defaults to `False`. 121 | load_log : bool 122 | If `True`, load the old log and continue logging from there. 123 | Convenient because you end up with a single log of the entire 124 | training history. Defaults to `False`. 125 | 126 | Notes 127 | ----- 128 | Requires the model to be created entirely using bricks, with a unique 129 | path/name for each brick, so that the parameters can be matched to 130 | their values. 131 | 132 | In order to load the iteration state and the log, the saved model needs 133 | to be unpickled. Note that resuming training this way is still not 134 | entirely seamless because e.g. extensions will not be reloaded. 135 | 136 | """ 137 | def __init__(self, path, load_iteration_state=False, load_log=False, 138 | **kwargs): 139 | kwargs.setdefault("before_training", True) 140 | super(Load, self).__init__(**kwargs) 141 | self.path = path 142 | self.load_iteration_state = load_iteration_state 143 | self.load_log = load_log 144 | 145 | def load_to(self, main_loop): 146 | with open(self.path, "rb") as source: 147 | main_loop.model.set_parameter_values(load_parameters(source)) 148 | if self.load_iteration_state or self.load_log: 149 | loaded_main_loop = load(source) 150 | if self.load_log: 151 | main_loop.log = loaded_main_loop.log 152 | if self.load_iteration_state: 153 | main_loop.iteration_state = \ 154 | loaded_main_loop.iteration_state 155 | 156 | def do(self, *args, **kwargs): 157 | if not os.path.exists(self.path): 158 | logger.warning("No dump found") 159 | return 160 | logger.info("loading model from {}".format(self.path)) 161 | try: 162 | self.load_to(self.main_loop) 163 | self.main_loop.log.current_row[LOADED_FROM] = self.path 164 | except Exception: 165 | reraise_as("Failed to load the state") 166 | -------------------------------------------------------------------------------- /blocks/extensions/training.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import logging 3 | from blocks.extensions import SimpleExtension 4 | 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class SharedVariableModifier(SimpleExtension): 10 | """Adjusts shared variable parameter using some function. 11 | 12 | Applies a function to compute the new value of a shared parameter each 13 | iteration. 14 | 15 | This class can be used to adapt over the training process parameters 16 | like learning rate, momentum, etc. 17 | 18 | Parameters 19 | ---------- 20 | parameter : :class:`~tensor.TensorSharedVariable` 21 | Shared variable to be adjusted 22 | function : callable 23 | A function which outputs a numeric value to which the 24 | given shared variable will be set and may take one or two 25 | arguments. 26 | 27 | In the first case, function that takes the total number of 28 | iterations done (``int``) as an input. 29 | 30 | In the second case, it is a function which takes number of 31 | iterations done (``int``) and old value of the shared variable 32 | (with the same dtype as `parameter`). 33 | num_args : int, optional 34 | The number of arguments to pass to the function. If unspecified, 35 | it will be inferred. This is useful if you are using function-like 36 | objects for which the arity of the function cannot be inferred. 37 | 38 | Notes 39 | ----- 40 | This class includes a method ``function`` that calls the function 41 | passed in the constructor and a ``num_args`` property which computes 42 | the number of arguments to use by inspecting the function object. 43 | Subclasses may override a method called ``function`` and/or 44 | the ``num_args`` property and instead pass ``None`` to the superclass 45 | constructor. This can be used to bypass certain serialization issues 46 | on Legacy Python regarding the unpicklability of instance 47 | method objects. 48 | 49 | """ 50 | def __init__(self, parameter, function, num_args=None, **kwargs): 51 | kwargs.setdefault("after_batch", True) 52 | super(SharedVariableModifier, self).__init__(**kwargs) 53 | self.parameter = parameter 54 | self._function = function 55 | self._num_args = num_args 56 | 57 | @property 58 | def num_args(self): 59 | if self._num_args is None: 60 | self._num_args = len(inspect.getargspec(self._function).args) 61 | return self._num_args 62 | 63 | def function(self, *args): 64 | return self._function(*args) 65 | 66 | def do(self, which_callback, *args): 67 | iterations_done = self.main_loop.log.status['iterations_done'] 68 | if self.num_args == 1: 69 | new_value = self.function(iterations_done) 70 | else: 71 | old_value = self.parameter.get_value() 72 | new_value = self.function(iterations_done, old_value) 73 | self.parameter.set_value(new_value) 74 | 75 | 76 | class TrackTheBest(SimpleExtension): 77 | """Check if a log quantity has the minimum/maximum value so far. 78 | 79 | Parameters 80 | ---------- 81 | record_name : str 82 | The name of the record to track. 83 | notification_name : str, optional 84 | The name for the record to be made in the log when the current 85 | value of the tracked quantity is the best so far. It not given, 86 | 'record_name' plus "best_so_far" suffix is used. 87 | choose_best : callable, optional 88 | A function that takes the current value and the best so far 89 | and return the best of two. By default :func:`min`, which 90 | corresponds to tracking the minimum value. 91 | 92 | Attributes 93 | ---------- 94 | best_name : str 95 | The name of the status record to keep the best value so far. 96 | notification_name : str 97 | The name of the record written to the log when the current 98 | value of the tracked quantity is the best so far. 99 | 100 | Notes 101 | ----- 102 | In the likely case that you are relying on another extension to 103 | add the tracked quantity to the log, make sure to place this 104 | extension *after* the extension that writes the quantity to the log 105 | in the `extensions` argument to :class:`blocks.main_loop.MainLoop`. 106 | 107 | """ 108 | def __init__(self, record_name, notification_name=None, 109 | choose_best=min, **kwargs): 110 | self.record_name = record_name 111 | if not notification_name: 112 | notification_name = record_name + "_best_so_far" 113 | self.notification_name = notification_name 114 | self.best_name = "best_" + record_name 115 | self.choose_best = choose_best 116 | kwargs.setdefault("after_epoch", True) 117 | super(TrackTheBest, self).__init__(**kwargs) 118 | 119 | def do(self, which_callback, *args): 120 | clsname = self.__class__.__name__ 121 | current_value = self.main_loop.log.current_row.get(self.record_name) 122 | logger.debug('%s: current value of log.current_row["%s"] = %s', 123 | clsname, self.record_name, str(current_value)) 124 | if current_value is None: 125 | return 126 | best_value = self.main_loop.status.get(self.best_name, None) 127 | logger.debug('%s: current value of status["%s"] = %s', 128 | clsname, self.best_name, str(best_value)) 129 | if (best_value is None or 130 | (current_value != best_value and 131 | self.choose_best(current_value, best_value) == 132 | current_value)): 133 | logger.debug('%s: New best obtained at iteration %d!', 134 | clsname, self.main_loop.log.status['iterations_done']) 135 | logger.debug('%s: Updating status["%s"], adding notification ' 136 | 'to log (%s)', clsname, self.best_name, 137 | self.notification_name) 138 | self.main_loop.status[self.best_name] = current_value 139 | self.main_loop.log.current_row[self.notification_name] = True 140 | -------------------------------------------------------------------------------- /blocks/graph/annotations.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from ..roles import add_role, AUXILIARY 3 | 4 | 5 | def add_annotation(var, annotation): 6 | annotations = getattr(var.tag, 'annotations', []) 7 | if any(old_annotation.__class__ == annotation.__class__ 8 | for old_annotation in annotations): 9 | raise ValueError 10 | else: 11 | var.tag.annotations = annotations + [annotation] 12 | 13 | 14 | class Annotation(object): 15 | """Annotations on Theano variables in a graph. 16 | 17 | In Blocks annotations are automatically attached to variables created 18 | using bricks. One form of annotation is that many variables are 19 | assigned a role (see :class:`.VariableRole`). A second form of 20 | annotation comes in the form of attaching a :class:`Annotation` 21 | instance to the variable's ``tag`` attribute, with auxiliary variables 22 | and/or updates. 23 | 24 | For example, we might be interested in the mean activation of certain 25 | application of a :class:`.Linear` brick. The variable representing the 26 | mean activation is attached as an auxiliary variable to the annotations 27 | of the input and output variables of this brick. Using the 28 | :class:`ComputationGraph` class (the 29 | :attr:`~ComputationGraph.variables`, 30 | :attr:`~ComputationGraph.auxiliary_variables`, etc. attributes in 31 | particular) we can retrieve these Theano variables to pass on to the 32 | monitor, use as a regularizer, etc. 33 | 34 | In most cases, annotations are added on a brick level (e.g. each brick 35 | will assign the weight norm of its weights as an auxiliary value) or on 36 | an application level (e.g. each time a brick is applied, its mean 37 | activation will become an auxiliary variable). However, you can also 38 | add annotations manually, by setting the ``annotation`` value of a 39 | variable's ``tag`` field. 40 | 41 | Examples 42 | -------- 43 | >>> from theano import tensor 44 | >>> x = tensor.vector() 45 | >>> annotation = Annotation() 46 | >>> annotation.add_auxiliary_variable(x + 1, name='x_plus_1') 47 | >>> add_annotation(x, annotation) 48 | >>> y = x ** 2 49 | >>> from blocks.graph import ComputationGraph 50 | >>> cg = ComputationGraph([y]) 51 | >>> cg.auxiliary_variables 52 | [x_plus_1] 53 | 54 | """ 55 | def __init__(self): 56 | self.auxiliary_variables = [] 57 | self.updates = OrderedDict() 58 | 59 | def add_auxiliary_variable(self, variable, roles=None, name=None): 60 | """Attach an auxiliary variable to the graph. 61 | 62 | Auxiliary variables are Theano variables that are not part of a 63 | brick's output, but can be useful nonetheless e.g. as a regularizer 64 | or to monitor during training progress. 65 | 66 | Parameters 67 | ---------- 68 | variable : :class:`~tensor.TensorVariable` 69 | The variable you want to add. 70 | roles : list of :class:`.VariableRole` instances, optional 71 | The roles of this variable. The :const:`.AUXILIARY` 72 | role will automatically be added. Other options are 73 | :const:`.COST`, :const:`.WEIGHT`, etc. 74 | name : str, optional 75 | Name to give to the variable. If the variable already has a 76 | name it will be overwritten. 77 | 78 | Examples 79 | -------- 80 | >>> from blocks.bricks.base import application, Brick 81 | >>> from blocks.roles import COST 82 | >>> from blocks.utils import shared_floatx_nans 83 | >>> class Foo(Brick): 84 | ... def _allocate(self): 85 | ... W = shared_floatx_nans((10, 10)) 86 | ... self.add_auxiliary_variable(W.mean(), name='mean_W') 87 | ... @application 88 | ... def apply(self, x, application_call): 89 | ... application_call.add_auxiliary_variable( 90 | ... x - 1, name='x_minus_1') 91 | ... application_call.add_auxiliary_variable( 92 | ... x.mean(), roles=[COST], name='mean_x') 93 | ... return x + 1 94 | >>> from theano import tensor 95 | >>> x = tensor.vector() 96 | >>> y = Foo().apply(x) 97 | >>> from blocks.graph import ComputationGraph 98 | >>> cg = ComputationGraph([y]) 99 | >>> from blocks.filter import VariableFilter 100 | >>> var_filter = VariableFilter(roles=[AUXILIARY]) 101 | >>> var_filter(cg.variables) # doctest: +SKIP 102 | {x_minus_1, mean_W, mean_x} 103 | >>> var_filter = VariableFilter(roles=[COST]) 104 | >>> var_filter(cg.variables) # doctest: +SKIP 105 | {mean_x} 106 | 107 | """ 108 | add_annotation(variable, self) 109 | if name is not None: 110 | variable.name = name 111 | variable.tag.name = name 112 | add_role(variable, AUXILIARY) 113 | if roles is not None: 114 | for role in roles: 115 | add_role(variable, role) 116 | self.auxiliary_variables.append(variable) 117 | -------------------------------------------------------------------------------- /blocks/log/__init__.py: -------------------------------------------------------------------------------- 1 | from .log import TrainingLog 2 | from .sqlite import SQLiteLog 3 | 4 | BACKENDS = { 5 | 'python': TrainingLog, 6 | 'sqlite': SQLiteLog 7 | } 8 | -------------------------------------------------------------------------------- /blocks/log/log.py: -------------------------------------------------------------------------------- 1 | """The event-based main loop of Blocks.""" 2 | from abc import ABCMeta 3 | from collections import defaultdict 4 | from numbers import Integral 5 | from uuid import uuid4 6 | 7 | import six 8 | 9 | 10 | @six.add_metaclass(ABCMeta) 11 | class TrainingLogBase(object): 12 | """Base class for training log. 13 | 14 | A training log stores the training timeline, statistics and other 15 | auxiliary information. Training logs can use different backends e.g. 16 | in-memory Python objects or an SQLite database. 17 | 18 | Information is stored similar to a nested dictionary, so use 19 | ``log[time][key]`` to read data. An entry without stored data will 20 | return an empty dictionary-like object that can be written to, 21 | ``log[time][key] = value``. 22 | 23 | Depending on the backend, ``log[time] = {'key': 'value'}`` could fail. 24 | Use ``log[time].update({'key': 'value'})`` for compatibility across 25 | backends. 26 | 27 | In addition to the set of records displaying training dynamics, a 28 | training log has a :attr:`status` attribute, which is a dictionary with 29 | data that is not bound to a particular time. 30 | 31 | .. warning:: 32 | 33 | Changes to mutable objects might not be reflected in the log, 34 | depending on the backend. So don't use 35 | ``log.status['key'].append(...)``, use ``log.status['key'] = ...`` 36 | instead. 37 | 38 | Parameters 39 | ---------- 40 | uuid : :class:`uuid.UUID`, optional 41 | The UUID of this log. For persistent log backends, passing the UUID 42 | will result in an old log being loaded. Otherwise a new, random 43 | UUID will be created. 44 | 45 | Attributes 46 | ---------- 47 | status : dict 48 | A dictionary with data representing the current state of training. 49 | By default it contains ``iterations_done``, ``epochs_done`` and 50 | ``_epoch_ends`` (a list of time stamps when epochs ended). 51 | 52 | """ 53 | def __init__(self, uuid=None): 54 | if uuid is None: 55 | self.uuid = uuid4() 56 | else: 57 | self.uuid = uuid 58 | if uuid is None: 59 | self.status.update({ 60 | 'iterations_done': 0, 61 | 'epochs_done': 0, 62 | '_epoch_ends': [], 63 | 'resumed_from': None 64 | }) 65 | 66 | @property 67 | def h_uuid(self): 68 | """Return a hexadecimal version of the UUID bytes. 69 | 70 | This is necessary to store ids in an SQLite database. 71 | 72 | """ 73 | return self.uuid.hex 74 | 75 | def resume(self): 76 | """Resume a log by setting a new random UUID. 77 | 78 | Keeps a record of the old log that this is a continuation of. It 79 | copies the status of the old log into the new log. 80 | 81 | """ 82 | old_uuid = self.h_uuid 83 | old_status = dict(self.status) 84 | self.uuid = uuid4() 85 | self.status.update(old_status) 86 | self.status['resumed_from'] = old_uuid 87 | 88 | def _check_time(self, time): 89 | if not isinstance(time, Integral) or time < 0: 90 | raise ValueError("time must be a non-negative integer") 91 | 92 | @property 93 | def current_row(self): 94 | return self[self.status['iterations_done']] 95 | 96 | @property 97 | def previous_row(self): 98 | return self[self.status['iterations_done'] - 1] 99 | 100 | @property 101 | def last_epoch_row(self): 102 | return self[self.status['_epoch_ends'][-1]] 103 | 104 | 105 | class TrainingLog(defaultdict, TrainingLogBase): 106 | """Training log using a `defaultdict` as backend. 107 | 108 | Notes 109 | ----- 110 | For analysis of the logs, it can be useful to convert the log to a 111 | Pandas_ data frame: 112 | 113 | .. code:: python 114 | 115 | df = DataFrame.from_dict(log, orient='index') 116 | 117 | .. _Pandas: http://pandas.pydata.org 118 | 119 | """ 120 | def __init__(self): 121 | defaultdict.__init__(self, dict) 122 | self.status = {} 123 | TrainingLogBase.__init__(self) 124 | 125 | def __reduce__(self): 126 | constructor, args, _, _, items = super(TrainingLog, self).__reduce__() 127 | return constructor, (), self.__dict__, _, items 128 | 129 | def __getitem__(self, time): 130 | self._check_time(time) 131 | return super(TrainingLog, self).__getitem__(time) 132 | 133 | def __setitem__(self, time, value): 134 | self._check_time(time) 135 | return super(TrainingLog, self).__setitem__(time, value) 136 | -------------------------------------------------------------------------------- /blocks/model.py: -------------------------------------------------------------------------------- 1 | """Model - heavily annotated computation graph. 2 | 3 | A model in Blocks is simply an annotated computation graph. The class 4 | :class:`Model` extends :class:`blocks.graph.ComputationGraph` :class:, 5 | which is able to handle annotations and roles in general, but is 6 | deliberately made unaware of specific annotations that a Theano graph 7 | created by Blocks typically has, such as bricks and application calls. The 8 | :class:`Model` adds this functionality. Using :class:`Model` you can do 9 | things like query all the bricks used to build the computation graph, 10 | request "hierarchical names" of the parameters (a hierarchical name is a 11 | path-like string which in addition to the parameter's name contains names 12 | of the bricks on the path from a root brick to the brick that owns the 13 | parameters, e.g. ``/mlp/linear/W``). 14 | 15 | For more information, see :class:`Model` docstring. 16 | 17 | """ 18 | import logging 19 | from collections import OrderedDict, Counter 20 | from itertools import chain 21 | 22 | from blocks.algorithms import GradientDescent 23 | from blocks.graph import ComputationGraph 24 | from blocks.filter import get_brick 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class Model(ComputationGraph): 30 | """Handles annotations in Blocks-built computation graphs. 31 | 32 | Use this class to handle your Blocks-created computation graph. 33 | 34 | Examples 35 | -------- 36 | >>> from theano import tensor 37 | >>> from blocks.bricks import MLP, Tanh 38 | >>> x = tensor.matrix('x') 39 | >>> mlp = MLP([Tanh(), Tanh()], [10, 10, 10]) 40 | >>> y = mlp.apply(x) 41 | >>> model = Model(y) 42 | 43 | With :class:`Model` you can get access to the brick hierarchy. The 44 | brick hierarchy is defined by ``children`` attributes that every brick 45 | has. The bricks that are not children of other bricks are called top 46 | bricks. It is often useful to have access to top bricks of a brick 47 | hierarchy used to build a computation graph, and here is how you can do 48 | it: 49 | 50 | >>> model.get_top_bricks() #doctest: +ELLIPSIS 51 | [>> model.get_parameter_dict() #doctest: +NORMALIZE_WHITESPACE 58 | OrderedDict([('/mlp/linear_1.b', b), ('/mlp/linear_0.b', b), 59 | ('/mlp/linear_0.W', W), ('/mlp/linear_1.W', W)]) 60 | 61 | """ 62 | def __init__(self, *args, **kwargs): 63 | super(Model, self).__init__(*args, **kwargs) 64 | bricks = [get_brick(var) for var 65 | in self.variables + self.scan_variables if get_brick(var)] 66 | children = set(chain(*(brick.children for brick in bricks))) 67 | # Quadratic complexity: we should not have thousands of 68 | # top-level bricks. 69 | self.top_bricks = [] 70 | for brick in bricks: 71 | if brick not in children and brick not in self.top_bricks: 72 | self.top_bricks.append(brick) 73 | names = Counter([brick.name for brick in self.top_bricks]) 74 | repeated_names = [name for name, count in names.items() if count > 1] 75 | if repeated_names: 76 | raise ValueError("top bricks with the same name:" 77 | " {}".format(', '.join(repeated_names))) 78 | parameter_list = [] 79 | for parameter in self.parameters: 80 | if get_brick(parameter): 81 | parameter_list.append( 82 | (get_brick(parameter).get_hierarchical_name(parameter), 83 | parameter)) 84 | else: 85 | parameter_list.append((parameter.name, parameter)) 86 | self._parameter_dict = OrderedDict(parameter_list) 87 | 88 | def check_sanity(self, algorithm): 89 | # Sanity check for the most common case 90 | if self and isinstance(algorithm, GradientDescent): 91 | if not (set(self.get_parameter_dict().values()) == 92 | set(algorithm.parameters)): 93 | logger.warning("different parameters for model and algorithm") 94 | 95 | def get_parameter_dict(self): 96 | """Returns parameters with their hierarchical names. 97 | 98 | The parameter names are formed from positions of their owner bricks 99 | in the bricks hierarchy. The variable names are used for the 100 | parameters that do not belong to any brick. 101 | 102 | Returns 103 | ------- 104 | parameter_dict : dict 105 | A dictionary of (hierarchical name, shared variable) pairs. 106 | 107 | """ 108 | return self._parameter_dict 109 | 110 | def get_parameter_values(self): 111 | """Return the values of model parameters. 112 | 113 | The same hierarhical names as in :meth:`get_parameter_dict` are 114 | used to uniquely identify parameters. 115 | 116 | Returns 117 | ------- 118 | parameter_values : OrderedDict 119 | Dictionary of (hierarchical name, :class:`~numpy.ndarray`) 120 | pairs. 121 | 122 | """ 123 | return OrderedDict( 124 | (name, parameter.get_value()) 125 | for name, parameter in self.get_parameter_dict().items()) 126 | 127 | def set_parameter_values(self, parameter_values): 128 | """Set the values of model parameters. 129 | 130 | The same hierarhical names as in :meth:`get_parameter_dict` are 131 | used to uniquely identify parameters. 132 | 133 | Parameters 134 | ---------- 135 | parameter_values : OrderedDict 136 | Dictionary of (hierarchical name, :class:`~numpy.ndarray`) 137 | pairs. 138 | 139 | """ 140 | parameters = self.get_parameter_dict() 141 | 142 | unknown = set(parameter_values) - set(parameters) 143 | missing = set(parameters) - set(parameter_values) 144 | if len(unknown): 145 | logger.error("unknown parameter names: {}\n".format(unknown)) 146 | if len(missing): 147 | logger.error("missing values for parameters: {}\n".format(missing)) 148 | 149 | for name, value in parameter_values.items(): 150 | if name in parameters: 151 | model_shape = parameters[name].container.data.shape 152 | if model_shape != value.shape: 153 | raise ValueError("Shape mismatch for parameter: {}. " 154 | "Expected {}, got {}." 155 | .format(name, model_shape, value.shape)) 156 | parameters[name].set_value(value) 157 | 158 | def get_top_bricks(self): 159 | """Get the bricks that do not have parents. 160 | 161 | Returns 162 | ------- 163 | bricks : list of :class:`~blocks.bricks.base.Brick` 164 | 165 | """ 166 | return self.top_bricks 167 | -------------------------------------------------------------------------------- /blocks/monitoring/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/blocks/monitoring/__init__.py -------------------------------------------------------------------------------- /blocks/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/blocks/scripts/__init__.py -------------------------------------------------------------------------------- /blocks/theano_expressions.py: -------------------------------------------------------------------------------- 1 | """Frequently used Theano expressions.""" 2 | from theano import tensor 3 | 4 | 5 | def l2_norm(tensors, squared=False): 6 | """Computes the total L2 norm of a set of tensors. 7 | 8 | Converts all operands to :class:`~tensor.TensorVariable` 9 | (see :func:`~tensor.as_tensor_variable`). 10 | 11 | Parameters 12 | ---------- 13 | tensors : iterable of :class:`~tensor.TensorVariable` (or compatible) 14 | The tensors. 15 | squared : bool, optional 16 | If `True`, return the squared L2 norm. Default: `False`. 17 | 18 | """ 19 | summed = [tensor.sqr(tensor.as_tensor_variable(t)).sum() for t in tensors] 20 | joined = tensor.stack(summed, axis=0) 21 | return joined.sum() if squared else tensor.sqrt(joined.sum()) 22 | 23 | 24 | def hessian_times_vector(gradient, parameter, vector, r_op=False): 25 | """Return an expression for the Hessian times a vector. 26 | 27 | Parameters 28 | ---------- 29 | gradient : :class:`~tensor.TensorVariable` 30 | The gradient of a cost with respect to `parameter` 31 | parameter : :class:`~tensor.TensorVariable` 32 | The parameter with respect to which to take the gradient 33 | vector : :class:`~tensor.TensorVariable` 34 | The vector with which to multiply the Hessian 35 | r_op : bool, optional 36 | Whether to use :func:`~tensor.gradient.Rop` or not. Defaults to 37 | ``False``. Which solution is fastest normally needs to be 38 | determined by profiling. 39 | 40 | """ 41 | if r_op: 42 | return tensor.Rop(gradient, parameter, vector) 43 | return tensor.grad(tensor.sum(gradient * vector), parameter) 44 | -------------------------------------------------------------------------------- /blocks/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # This is basically equivalent to 2 | # 3 | # from .utils import 4 | # from .theano_utils import 5 | # 6 | # but acts lazily, meaning that the submodule imported only if needed 7 | import sys 8 | import importlib 9 | from types import ModuleType 10 | 11 | 12 | class UtilsModule(ModuleType): 13 | utils_attrs = ( 14 | "pack", "unpack", "reraise_as", "dict_subset", "dict_union", 15 | "repr_attrs", "ipdb_breakpoint", "print_sum", "print_shape", 16 | "change_recursion_limit", "extract_args", "find_bricks") 17 | theano_utils_attrs = ( 18 | "shared_floatx_zeros_matching", "shared_floatx_zeros", 19 | "shared_floatx_nans", "shared_floatx", "shared_like", 20 | "check_theano_variable", "is_graph_input", 21 | "is_shared_variable", "put_hook") 22 | __all__ = utils_attrs + theano_utils_attrs 23 | __doc__ = __doc__ 24 | __file__ = __file__ 25 | __path__ = __path__ 26 | 27 | def __getattr__(self, item): 28 | # Do lazy import so the submodule imported only if needed. 29 | # Python manages second import in a way that it is almost free. 30 | if item in self.utils_attrs: 31 | utils = importlib.import_module(".utils", __name__) 32 | return getattr(utils, item) 33 | elif item in self.theano_utils_attrs: 34 | theano_utils = importlib.import_module(".theano_utils", __name__) 35 | return getattr(theano_utils, item) 36 | else: 37 | super(UtilsModule, self).__getattribute__(item) 38 | 39 | 40 | # In Python2 (legacy Python) garbage collector destroys the module 41 | # unless we save a reference 42 | old_module = sys.modules[__name__] 43 | sys.modules[__name__] = UtilsModule(__name__) 44 | -------------------------------------------------------------------------------- /blocks/utils/containers.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from collections import MutableSequence 3 | 4 | from six import add_metaclass 5 | 6 | 7 | @add_metaclass(ABCMeta) 8 | class AnnotatingList(MutableSequence): 9 | """Mutable sequence performing operations on inserted/removed items. 10 | 11 | Parameters 12 | ---------- 13 | items : iterable, optional 14 | An iterable of items to initialize the sequence with. 15 | 16 | """ 17 | def __init__(self, items=None): 18 | self._items = [] 19 | if not items: 20 | items = [] 21 | for item in items: 22 | self.append(item) 23 | 24 | def __repr__(self): 25 | return repr(self._items) 26 | 27 | def __eq__(self, other): 28 | return self._items == other 29 | 30 | def __ne__(self, other): 31 | return self._items != other 32 | 33 | def __getitem__(self, key): 34 | return self._items[key] 35 | 36 | def _setitem(self, key, value): 37 | """The operation to perform when an item is inserted/appended.""" 38 | pass 39 | 40 | def _delitem(self, key): 41 | """The operation to perform when an item is deleted.""" 42 | pass 43 | 44 | def __setitem__(self, key, value): 45 | self._setitem(key, value) 46 | self._items[key] = value 47 | 48 | def __delitem__(self, key): 49 | self._delitem(key) 50 | del self._items[key] 51 | 52 | def __len__(self): 53 | return len(self._items) 54 | 55 | def insert(self, key, value): 56 | self._setitem(key, value) 57 | self._items.insert(key, value) 58 | -------------------------------------------------------------------------------- /blocks/utils/profile.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import timeit 5 | from collections import defaultdict, OrderedDict 6 | 7 | 8 | class Profile(object): 9 | """A profile of hierarchical timers. 10 | 11 | Keeps track of timings performed with :class:`Timer`. It also keeps 12 | track of the way these timings were nested and makes use of this 13 | information when reporting. 14 | 15 | """ 16 | def __init__(self): 17 | self.total = defaultdict(int) 18 | self.current = [] 19 | self.order = OrderedDict() 20 | 21 | def enter(self, name): 22 | self.current.append(name) 23 | # We record the order in which sections were first called 24 | self.order[tuple(self.current)] = None 25 | 26 | def exit(self, t): 27 | self.total[tuple(self.current)] += t 28 | self.current.pop() 29 | 30 | def report(self, f=sys.stderr): 31 | """Print a report of timing information to standard output. 32 | 33 | Parameters 34 | ---------- 35 | f : object, optional 36 | An object with a ``write`` method that accepts string inputs. 37 | Can be a file object, ``sys.stdout``, etc. Defaults to 38 | ``sys.stderr``. 39 | 40 | """ 41 | total = sum(v for k, v in self.total.items() if len(k) == 1) 42 | 43 | def print_report(keys, level=0): 44 | subtotal = 0 45 | for key in keys: 46 | if len(key) > level + 1: 47 | continue 48 | subtotal += self.total[key] 49 | section = ' '.join(key[-1].split('_')) 50 | section = section[0].upper() + section[1:] 51 | print('{:30}{:15.2f}{:15.2%}'.format( 52 | level * ' ' + section, self.total[key], 53 | self.total[key] / total 54 | ), file=f) 55 | children = [k for k in keys 56 | if k[level] == key[level] and 57 | len(k) > level + 1] 58 | child_total = print_report(children, level + 1) 59 | if children: 60 | print('{:30}{:15.2f}{:15.2%}'.format( 61 | (level + 1) * ' ' + 'Other', 62 | self.total[key] - child_total, 63 | (self.total[key] - child_total) / total 64 | ), file=f) 65 | return subtotal 66 | 67 | print('{:30}{:>15}{:>15}'.format('Section', 'Time', '% of total'), 68 | file=f) 69 | print('-' * 60, file=f) 70 | if total: 71 | print_report(self.order.keys()) 72 | else: 73 | print('No profile information collected.', file=f) 74 | 75 | 76 | class Timer(object): 77 | """A context manager to time the execution time of code within it. 78 | 79 | This timer is attached to a :class:`Profile` object that it reports 80 | timings to. The :class:`Profile` object accumulates the timings. 81 | Timers can be nested, which the :class:`Profile` will automatically 82 | keep track of and use in its reporting. 83 | 84 | Parameters 85 | ---------- 86 | name : str 87 | The name of this section. Expected to adhere to variable naming 88 | styles. 89 | profile : :class:`Profile` 90 | The profile of the main loop. This is the object this context 91 | manager will report the execution time to. The accumulation and 92 | processing of timing information is handled by this object. 93 | 94 | Notes 95 | ----- 96 | Timings are reported using :func:`timeit.default_timer`. 97 | 98 | """ 99 | def __init__(self, name, profile): 100 | self.name = name 101 | self.profile = profile 102 | 103 | def __enter__(self): 104 | self.profile.enter(self.name) 105 | self.start = timeit.default_timer() 106 | 107 | def __exit__(self, *args): 108 | self.profile.exit(timeit.default_timer() - self.start) 109 | -------------------------------------------------------------------------------- /blocks/utils/testing.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import time 5 | from six import wraps 6 | from importlib import import_module 7 | from unittest.case import SkipTest 8 | 9 | from six import StringIO 10 | 11 | import blocks 12 | from blocks.algorithms import TrainingAlgorithm 13 | from blocks.config import config 14 | from blocks.main_loop import MainLoop 15 | from fuel.datasets import IterableDataset 16 | 17 | 18 | def silence_printing(test): 19 | @wraps(test) 20 | def wrapper(*args, **kwargs): 21 | stdout = sys.stdout 22 | sys.stdout = StringIO() 23 | logger = logging.getLogger(blocks.__name__) 24 | old_level = logger.level 25 | logger.setLevel(logging.ERROR) 26 | try: 27 | test(*args, **kwargs) 28 | finally: 29 | sys.stdout = stdout 30 | logger.setLevel(old_level) 31 | return wrapper 32 | 33 | 34 | def skip_if_not_available(modules=None, datasets=None, configurations=None): 35 | """Raises a SkipTest exception when requirements are not met. 36 | 37 | Parameters 38 | ---------- 39 | modules : list 40 | A list of strings of module names. If one of the modules fails to 41 | import, the test will be skipped. 42 | datasets : list 43 | A list of strings of folder names. If the data path is not 44 | configured, or the folder does not exist, the test is skipped. 45 | configurations : list 46 | A list of of strings of configuration names. If this configuration 47 | is not set and does not have a default, the test will be skipped. 48 | 49 | """ 50 | if modules is None: 51 | modules = [] 52 | if datasets is None: 53 | datasets = [] 54 | if configurations is None: 55 | configurations = [] 56 | for module in modules: 57 | try: 58 | import_module(module) 59 | except Exception: 60 | raise SkipTest 61 | if module == 'bokeh': 62 | ConnectionError = import_module( 63 | 'requests.exceptions').ConnectionError 64 | session = import_module('bokeh.session').Session() 65 | try: 66 | session.execute('get', session.base_url) 67 | except ConnectionError: 68 | raise SkipTest 69 | 70 | if datasets and not hasattr(config, 'data_path'): 71 | raise SkipTest 72 | for dataset in datasets: 73 | if not os.path.exists(os.path.join(config.data_path, dataset)): 74 | raise SkipTest 75 | for configuration in configurations: 76 | if not hasattr(config, configuration): 77 | raise SkipTest 78 | 79 | 80 | def skip_if_configuration_set(configuration, value, message=None): 81 | """Raise SkipTest if a configuration option has a certain value. 82 | 83 | Parameters 84 | ---------- 85 | configuration : str 86 | Configuration option to check. 87 | value : str 88 | Value of `blocks.config.` which should cause 89 | a `SkipTest` to be raised. 90 | message : str, optional 91 | Reason for skipping the test. 92 | 93 | """ 94 | if getattr(config, configuration) == value: 95 | if message is not None: 96 | raise SkipTest(message) 97 | else: 98 | raise SkipTest 99 | 100 | 101 | class MockAlgorithm(TrainingAlgorithm): 102 | """An algorithm that only saves data. 103 | 104 | Also checks that the initialization routine is only called once. 105 | 106 | """ 107 | def __init__(self, delay_time=0): 108 | self._initialized = False 109 | self.delay_time = delay_time 110 | 111 | def initialize(self): 112 | assert not self._initialized 113 | self._initialized = True 114 | 115 | def process_batch(self, batch): 116 | self.batch = batch 117 | time.sleep(self.delay_time) 118 | 119 | 120 | class MockMainLoop(MainLoop): 121 | """Mock main loop with mock algorithm and simple data stream. 122 | 123 | Can be used with `main_loop = MagicMock(wraps=MockMainLoop())` to check 124 | which calls were made. 125 | 126 | """ 127 | def __init__(self, delay_time=0, **kwargs): 128 | kwargs.setdefault('data_stream', 129 | IterableDataset(range(10)).get_example_stream()) 130 | kwargs.setdefault('algorithm', MockAlgorithm(delay_time)) 131 | super(MockMainLoop, self).__init__(**kwargs) 132 | -------------------------------------------------------------------------------- /blocks/version.py: -------------------------------------------------------------------------------- 1 | version = '0.2.0' 2 | -------------------------------------------------------------------------------- /docs/_static/code_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/docs/_static/code_quality.png -------------------------------------------------------------------------------- /docs/_static/mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/docs/_static/mnist.png -------------------------------------------------------------------------------- /docs/_static/plot_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/docs/_static/plot_a.png -------------------------------------------------------------------------------- /docs/_static/plot_cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/docs/_static/plot_cost.png -------------------------------------------------------------------------------- /docs/_static/sequence_generator_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/docs/_static/sequence_generator_scheme.png -------------------------------------------------------------------------------- /docs/api/algorithms.rst: -------------------------------------------------------------------------------- 1 | Algorithms 2 | ========== 3 | 4 | .. automodule:: blocks.algorithms 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/api/bricks.rst: -------------------------------------------------------------------------------- 1 | Bricks 2 | ====== 3 | 4 | * `Convolutional bricks`_ 5 | * `Routing bricks`_ 6 | * `Recurrent bricks`_ 7 | * `Attention bricks`_ 8 | * `Sequence generators`_ 9 | * `Cost bricks`_ 10 | 11 | .. automodule:: blocks.bricks 12 | :members: 13 | :exclude-members: Activation, ActivationDocumentation, BaseRecurrent, 14 | recurrent, SimpleRecurrent, LSTM, GatedRecurrent, 15 | Bidirectional, RecurrentStack, RECURRENTSTACK_SEPARATOR 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | .. automodule:: blocks.bricks.lookup 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | Convolutional bricks 25 | -------------------- 26 | 27 | .. automodule:: blocks.bricks.conv 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | Routing bricks 33 | -------------- 34 | 35 | .. automodule:: blocks.bricks.parallel 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | 40 | Recurrent bricks 41 | ---------------- 42 | 43 | Recurrent architectures 44 | ^^^^^^^^^^^^^^^^^^^^^^^ 45 | 46 | .. automodule:: blocks.bricks.recurrent.architectures 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | 51 | Helper bricks for recurrent networks 52 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 53 | 54 | .. automodule:: blocks.bricks.recurrent.misc 55 | :members: 56 | :undoc-members: 57 | :show-inheritance: 58 | 59 | Base definitions for recurrent bricks 60 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 61 | 62 | .. automodule:: blocks.bricks.recurrent.base 63 | :members: 64 | :undoc-members: 65 | :show-inheritance: 66 | 67 | Attention bricks 68 | ---------------- 69 | 70 | .. automodule:: blocks.bricks.attention 71 | :members: 72 | :undoc-members: 73 | :show-inheritance: 74 | 75 | 76 | Sequence generators 77 | ------------------- 78 | 79 | .. automodule:: blocks.bricks.sequence_generators 80 | :members: 81 | :undoc-members: 82 | :show-inheritance: 83 | 84 | Cost bricks 85 | ----------- 86 | 87 | .. automodule:: blocks.bricks.cost 88 | :members: 89 | :undoc-members: 90 | :show-inheritance: 91 | 92 | Wrapper bricks 93 | -------------- 94 | .. automodule:: blocks.bricks.wrappers 95 | :members: 96 | :undoc-members: 97 | :show-inheritance: 98 | -------------------------------------------------------------------------------- /docs/api/extensions.rst: -------------------------------------------------------------------------------- 1 | Extensions 2 | ========== 3 | 4 | .. automodule:: blocks.extensions 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Monitoring extensions 10 | --------------------- 11 | 12 | .. automodule:: blocks.extensions.monitoring 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | Training 18 | -------- 19 | 20 | .. automodule:: blocks.extensions.training 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | Serialization 26 | ------------- 27 | 28 | .. automodule:: blocks.extensions.saveload 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /docs/api/filter.rst: -------------------------------------------------------------------------------- 1 | .. _filter: 2 | 3 | Filter 4 | ====== 5 | 6 | .. automodule:: blocks.filter 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/api/graph.rst: -------------------------------------------------------------------------------- 1 | .. _graph: 2 | 3 | Computational graph 4 | =================== 5 | 6 | .. automodule:: blocks.graph 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. warning:: 5 | 6 | This API reference is currently nothing but a dump of docstrings, ordered 7 | alphabetically. 8 | 9 | The API reference contains detailed descriptions of the different end-user 10 | classes, functions, methods, etc. you will need to work with Blocks. 11 | 12 | .. note:: 13 | 14 | This API reference only contains *end-user* documentation. If you are 15 | looking to hack away at Blocks' internals, you will find more detailed 16 | comments in the source code. 17 | 18 | .. toctree:: 19 | :glob: 20 | 21 | * 22 | -------------------------------------------------------------------------------- /docs/api/initialization.rst: -------------------------------------------------------------------------------- 1 | .. _initialization: 2 | 3 | Parameter initialization 4 | ======================== 5 | 6 | .. automodule:: blocks.initialization 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/api/log.rst: -------------------------------------------------------------------------------- 1 | .. _log: 2 | 3 | Logging 4 | ======= 5 | 6 | Log has two different backends configurable in ``.blocksrc``, 7 | see :doc:`../configuration`. 8 | 9 | .. automodule:: blocks.log 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | Dictionary backend 15 | ------------------ 16 | 17 | .. automodule:: blocks.log.log 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | Sqlite backend 23 | -------------- 24 | 25 | .. automodule:: blocks.log.sqlite 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /docs/api/main_loop.rst: -------------------------------------------------------------------------------- 1 | .. _main: 2 | 3 | Main loop 4 | ========= 5 | 6 | .. automodule:: blocks.main_loop 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/api/model.rst: -------------------------------------------------------------------------------- 1 | Model 2 | ===== 3 | 4 | .. automodule:: blocks.model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/api/roles.rst: -------------------------------------------------------------------------------- 1 | Variable roles 2 | ============== 3 | 4 | .. autofunction:: blocks.roles.add_role 5 | 6 | Roles 7 | ----- 8 | 9 | All roles are implemented as subclasses of :class:`VariableRole`. 10 | 11 | .. autoclass:: blocks.roles.VariableRole 12 | 13 | The actual roles are instances of the different subclasses of 14 | :class:`VariableRole`. They are: 15 | 16 | .. automodule:: blocks.roles 17 | :members: INPUT, OUTPUT, AUXILIARY, COST, PARAMETER, WEIGHT, BIAS, FILTER 18 | -------------------------------------------------------------------------------- /docs/api/select.rst: -------------------------------------------------------------------------------- 1 | Brick selectors 2 | =============== 3 | 4 | .. automodule:: blocks.select 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/api/serialization.rst: -------------------------------------------------------------------------------- 1 | Serialization 2 | ============= 3 | 4 | .. automodule:: blocks.serialization 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/api/theano_expressions.rst: -------------------------------------------------------------------------------- 1 | .. _theano_expressions: 2 | 3 | Theano expressions 4 | ================== 5 | 6 | .. automodule:: blocks.theano_expressions 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/api/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils: 2 | 3 | Common Utilities 4 | ================ 5 | 6 | .. automodule:: blocks.utils.utils 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | 11 | 12 | Theano Utilities 13 | ================ 14 | 15 | .. automodule:: blocks.utils.theano_utils 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/cg.rst: -------------------------------------------------------------------------------- 1 | Managing the computation graph 2 | ============================== 3 | 4 | Theano constructs computation graphs of mathematical expressions. Bricks help 5 | you :doc:`build these graphs `, but they do more than that. 6 | When you apply a brick to a Theano variable, it automatically *annotates* this 7 | Theano variable, in two ways: 8 | 9 | * It defines the *role* this variable plays in the computation graph e.g. it will 10 | label weight matrices and biases as parameters, keep track of which variables 11 | were the in- and outputs of your bricks, and more. 12 | * It constructs *auxiliary variables*. These are variables which are not 13 | outputs of your brick, but might still be of interest. For example, if you are 14 | training a neural network, you might be interested to know the norm of your 15 | weight matrices, so Blocks attaches these as auxiliary variables to the graph. 16 | 17 | Using annotations 18 | ----------------- 19 | 20 | The :class:`.ComputationGraph` class provides an interface to this annotated 21 | graph. For example, let's say we want to train an autoencoder using weight decay 22 | on some of the layers. 23 | 24 | >>> from theano import tensor 25 | >>> x = tensor.matrix('features') 26 | >>> from blocks.bricks import MLP, Logistic, Rectifier 27 | >>> from blocks.initialization import IsotropicGaussian, Constant 28 | >>> mlp = MLP(activations=[Rectifier()] * 2 + [Logistic()], 29 | ... dims=[784, 256, 128, 784], 30 | ... weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) 31 | >>> y_hat = mlp.apply(x) 32 | >>> from blocks.bricks.cost import BinaryCrossEntropy 33 | >>> cost = BinaryCrossEntropy().apply(x, y_hat) 34 | 35 | Our Theano computation graph is now defined by our loss, ``cost``. We initialize 36 | the managed graph. 37 | 38 | >>> from blocks.graph import ComputationGraph 39 | >>> cg = ComputationGraph(cost) 40 | 41 | We will find that there are many variables in this graph. 42 | 43 | >>> print(cg.variables) # doctest: +SKIP 44 | [TensorConstant{0}, b, W_norm, b_norm, features, TensorConstant{1.0}, ...] 45 | 46 | To apply weight decay, we only need the weights matrices. These have been tagged 47 | with the :const:`~blocks.roles.WEIGHT` role. So let's create a filter that finds these for us. 48 | 49 | >>> from blocks.filter import VariableFilter 50 | >>> from blocks.roles import WEIGHT 51 | >>> print(VariableFilter(roles=[WEIGHT])(cg.variables)) 52 | [W, W, W] 53 | 54 | Note that the variables in :attr:`cg.variables 55 | <.ComputationGraph.variables>` are ordered according to the *topological 56 | order* of their apply nodes. This means that for a feedforward network the 57 | parameters will be returned in the order of our layers. 58 | 59 | But let's imagine for a second that we are actually dealing with a far more 60 | complicated network, and we want to apply weight decay to the parameters of one 61 | layer in particular. To do that, we can filter the variables by the bricks that 62 | created them. 63 | 64 | >>> second_layer = mlp.linear_transformations[1] 65 | >>> from blocks.roles import PARAMETER 66 | >>> var_filter = VariableFilter(roles=[PARAMETER], bricks=[second_layer]) 67 | >>> print(var_filter(cg.variables)) 68 | [b, W] 69 | 70 | .. note:: 71 | 72 | There are a variety of different roles that you can filter by. You might have 73 | noted already that there is a hierarchy to many of them: Filtering by 74 | :const:`~blocks.roles.PARAMETER` will also return variables of the child 75 | roles :const:`~blocks.roles.WEIGHT` and :const:`~blocks.roles.BIAS`. 76 | 77 | We can also see what auxiliary variables our bricks have created. These might be 78 | of interest to monitor during training, for example. 79 | 80 | >>> print(cg.auxiliary_variables) 81 | [W_norm, b_norm, W_norm, b_norm, W_norm, b_norm] 82 | 83 | -------------------------------------------------------------------------------- /docs/configuration.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | ============= 3 | 4 | .. automodule:: blocks.config 5 | 6 | .. autoclass:: ConfigurationError 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/development/docs.rst: -------------------------------------------------------------------------------- 1 | Building documentation 2 | ---------------------- 3 | 4 | If you've made significant changes to the documentation, you can build a local 5 | to see how your changes are rendered. You will need to install Sphinx_, the 6 | Napoleon_ extension (to enable NumPy docstring support), and the `Read the Docs 7 | theme`_. You can do this by installing the optional ``docs`` requirements. 8 | 9 | For Blocks: 10 | 11 | .. code-block:: bash 12 | 13 | $ pip install --upgrade git+git://github.com/user/blocks.git#egg=blocks[docs] 14 | 15 | 16 | For Fuel: 17 | 18 | .. code-block:: bash 19 | 20 | $ pip install --upgrade git+git://github.com/user/fuel.git#egg=fuel[docs] 21 | 22 | 23 | After the requirements have been installed, you can build a copy of the 24 | documentation by running the following command from the root ``blocks`` 25 | (or ``fuel``) directory. 26 | 27 | .. code-block:: bash 28 | 29 | $ sphinx-build -b html docs docs/_build/html 30 | 31 | .. _Sphinx: http://sphinx-doc.org/ 32 | .. _Read the Docs theme: https://github.com/snide/sphinx_rtd_theme 33 | 34 | Docstrings 35 | ---------- 36 | 37 | Blocks and Fuel follow the `NumPy docstring standards`_. For a quick 38 | introduction, have a look at the NumPy_ or Napoleon_ examples of 39 | compliant docstrings. A few common mistakes to avoid: 40 | 41 | * There is no line break after the opening quotes (``"""``). 42 | * There is an empty line before the closing quotes (``"""``). 43 | * The summary should not be more than one line. 44 | 45 | The docstrings are formatted using reStructuredText_, and can make use of all 46 | the formatting capabilities this provides. They are rendered into HTML 47 | documentation using the `Read the Docs`_ service. After code has been merged, 48 | please ensure that documentation was built successfully and that your docstrings 49 | rendered as you intended by looking at the online documentation (for 50 | `Blocks `_ or `Fuel `_, 51 | which is automatically updated. 52 | 53 | Writing doctests_ is encouraged, and they are run as part of the test suite. 54 | They should use Python 3 syntax. 55 | 56 | .. _NumPy docstring standards: https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt 57 | .. _NumPy: https://github.com/numpy/numpy/blob/master/doc/example.py 58 | .. _Napoleon: http://sphinxcontrib-napoleon.readthedocs.org/en/latest/example_numpy.html 59 | .. _reStructuredText: http://docutils.sourceforge.net/rst.html 60 | .. _doctests: https://docs.python.org/2/library/doctest.html 61 | .. _Read the Docs: https://readthedocs.org/ 62 | .. _Blocks online documentation: http://blocks.readthedocs.org/ 63 | .. _Fuel online documentation: http://fuel.readthedocs.org/ 64 | .. _a bug in Napoleon: https://bitbucket.org/birkenfeld/sphinx-contrib/issue/82/napoleon-return-type-containing-colons-is 65 | 66 | .. _references_and_intersphinx: 67 | 68 | References and Intersphinx 69 | -------------------------- 70 | 71 | Sphinx allows you to `reference other objects`_ in the framework. This 72 | automatically creates links to the API documentation of that object (if it 73 | exists). 74 | 75 | .. code-block:: rst 76 | 77 | This is a link to :class:`SomeClass` in the same file. If you want to 78 | reference an object in another file, you can use a leading dot to tell 79 | Sphinx to look in all files e.g. :meth:`.SomeClass.a_method`. 80 | 81 | Intersphinx is an extension that is enabled which allows to you to reference 82 | the documentation of other projects such as Theano, NumPy and Scipy. 83 | 84 | .. code-block:: rst 85 | 86 | The input to a method can be of the type :class:`~numpy.ndarray`. Note that 87 | in this case we need to give the full path. The tilde (~) tells Sphinx not 88 | to render the full path (numpy.ndarray), but only the object itself 89 | (ndarray). 90 | 91 | .. warning:: 92 | 93 | Because of `a bug in Napoleon`_ you can't use the reference to a type in the 94 | "Returns" section of your docstring without giving it a name. This doesn't 95 | render correctly: 96 | 97 | :: 98 | 99 | Returns 100 | ------- 101 | :class:`Brick` 102 | The returned Brick. 103 | 104 | But this does: 105 | 106 | :: 107 | 108 | Returns 109 | ------- 110 | retured_brick : :class:`Brick` 111 | The returned Brick. 112 | 113 | .. _reference other objects: http://sphinx-doc.org/domains.html#python-roles 114 | -------------------------------------------------------------------------------- /docs/development/internal_api.rst: -------------------------------------------------------------------------------- 1 | Internal API 2 | ============ 3 | 4 | * `Bricks`_ 5 | * `Extensions`_ 6 | * `Utils`_ 7 | 8 | Bricks 9 | ------ 10 | .. automodule:: blocks.bricks.base 11 | :undoc-members: 12 | :members: 13 | :private-members: 14 | :show-inheritance: 15 | 16 | .. automodule:: blocks.bricks 17 | :members: Activation 18 | :undoc-members: 19 | :private-members: 20 | :show-inheritance: 21 | 22 | .. automodule:: blocks.bricks.interfaces 23 | :members: ActivationDocumentation 24 | :undoc-members: 25 | :private-members: 26 | :show-inheritance: 27 | 28 | Extensions 29 | ---------- 30 | .. automodule:: blocks.extensions.predicates 31 | :undoc-members: 32 | :members: 33 | :private-members: 34 | :show-inheritance: 35 | 36 | .. automodule:: blocks.monitoring.evaluators 37 | :undoc-members: 38 | :members: 39 | :private-members: 40 | :show-inheritance: 41 | 42 | Utils 43 | ----- 44 | .. automodule:: blocks.utils.containers 45 | :undoc-members: 46 | :members: 47 | :private-members: 48 | :show-inheritance: 49 | 50 | .. automodule:: blocks.utils.profile 51 | :undoc-members: 52 | :members: 53 | :private-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/development/pull_request.rst: -------------------------------------------------------------------------------- 1 | Pull request workflow 2 | ===================== 3 | 4 | Blocks development takes place on GitHub_; developers (including project 5 | leads!) add new features by sending `pull requests`_ from their personal 6 | fork (we operate on the so-called `fork & pull`_ model). 7 | 8 | .. _GitHub: http://github.com/ 9 | .. _pull requests: https://help.github.com/articles/using-pull-requests/ 10 | .. _fork & pull: https://help.github.com/articles/using-pull-requests/#fork--pull 11 | 12 | This page serves as a "quick reference" for the recommended pull request 13 | workflow. It assumes you are working on a UNIX-like environment with Git 14 | already installed. It is **not** intended to be an exhaustive tutorial 15 | on Git; there are many of those available. 16 | 17 | Before you begin 18 | ---------------- 19 | 20 | Create a GitHub account 21 | ~~~~~~~~~~~~~~~~~~~~~~~ 22 | If you don't already have one, you should 23 | `create yourself a GitHub account `_. 24 | 25 | Fork the Blocks repository 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | Once you've set up your account and logged in, you should fork the Blocks 28 | repository to your account by clicking the "Fork" button on the 29 | `official repository's web page `_. 30 | More information on forking is available in `the GitHub documentation`_. 31 | 32 | .. _the GitHub documentation: https://help.github.com/articles/fork-a-repo/ 33 | 34 | Clone from your fork 35 | ~~~~~~~~~~~~~~~~~~~~ 36 | In the side bar of your newly created fork of the Blocks repository, you should 37 | see a field that says **HTTPS clone URL** above it. Copy that to your clipboard 38 | and run, at the terminal, 39 | 40 | .. code-block:: bash 41 | 42 | $ git clone CLONE_URL 43 | 44 | where ``CLONE_URL`` is the URL you copied from your GitHub fork. 45 | 46 | If you're doing a lot of development with GitHub you should look into 47 | setting up `SSH key authentication `_. 48 | 49 | Add the official Blocks repository as a remote 50 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 51 | In order to keep up with changes to the official Blocks repository, notify 52 | Git of its existence and location by running 53 | 54 | .. code-block:: bash 55 | 56 | $ git remote add upstream https://github.com/mila-udem/blocks.git 57 | 58 | You only need to do this once. 59 | 60 | Beginning a pull request 61 | ------------------------ 62 | 63 | Verify that origin points to your fork 64 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 65 | Running the command 66 | 67 | .. code-block:: bash 68 | 69 | $ git remote -v | grep origin 70 | 71 | should display two lines. The URLs therein should contain your GitHub username. 72 | 73 | Update your upstream remote 74 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 75 | Your cloned repository stores a local history of the activity in remote 76 | repositories, and only interacts with the Internet when certain commands 77 | are invoked. In order to synchronize the activity in the official Blocks 78 | repository (which Git now knows as ``upstream``) with the local mirror of 79 | the history related to ``upstream``, run 80 | 81 | .. code-block:: bash 82 | 83 | $ git fetch upstream 84 | 85 | You should do this before starting every pull request, for reasons that 86 | will become clear below. 87 | 88 | Create a new branch for your pull request based on the latest development version of Blocks 89 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 90 | In order to create a new branch *starting from the latest commit in the 91 | master branch of the official Blocks repository*, make sure you've fetched 92 | from ``upstream`` (see above) and run 93 | 94 | .. code-block:: bash 95 | 96 | $ git checkout -b my_branch_name_for_my_cool_feature upstream/master 97 | 98 | Obviously, you'll probably want to choose a better branch name. 99 | 100 | Note that doing this (rather than simply creating a new branch from some 101 | arbtirary point) may save you from a (possibly painful) rebase later on. 102 | 103 | Working on your pull request 104 | ---------------------------- 105 | 106 | Make modifications, stage them, and commit them 107 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 108 | Repeat until satisfied: 109 | 110 | * Make some modifications to the code 111 | * Stage them using ``git add`` (``git add -p`` is particularly useful) 112 | * ``git commit`` them, alternately ``git reset`` to undo staging by 113 | ``git add``. 114 | 115 | Push the branch to your fork 116 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 117 | .. code-block:: bash 118 | 119 | $ git push -u origin my_branch_name_for_my_cool_feature 120 | 121 | Submitting for review 122 | --------------------- 123 | 124 | Send a pull request 125 | ~~~~~~~~~~~~~~~~~~~ 126 | This can be done from the GitHub web interface for your fork. See 127 | `this documentation from GitHub`_ for more information. 128 | 129 | .. _this documentation from GitHub: https://help.github.com/articles/using-pull-requests/#initiating-the-pull-request 130 | 131 | **Give your pull request an appropriate title** which makes it obvious what 132 | the content is. **If it is intended to resolve a specific ticket**, put "Fixes 133 | #NNN." in the pull request description field, where *NNN* is the issue 134 | number. By doing this, GitHub will know to `automatically close the issue`_ 135 | when your pull request is merged. 136 | 137 | Blocks development occurs in two separate branches: The ``master`` branch is the 138 | development branch. If you want to contribute a new feature or change the 139 | behavior of Blocks in any way, please make your pull request to this branch. 140 | 141 | The ``stable`` branch contains the latest release of Blocks. If you are fixing a 142 | bug (that is present in the latest release), make a pull request to this branch. 143 | If the bug is present in both the ``master`` and ``stable`` branch, two separate 144 | pull requests are in order. The command ``git-cherry-pick_`` could be useful here. 145 | 146 | .. _automatically close the issue: https://github.com/blog/1506-closing-issues-via-pull-requests 147 | .. _git-cherry-pick: https://git-scm.com/docs/git-cherry-pick 148 | 149 | Incorporating feedback 150 | ---------------------- 151 | In order to add additional commits responding to reviewer feedback, simply 152 | follow the instructions above for using ``git add`` and ``git commit``, and 153 | finally ``git push`` (after running the initial command with ``-u``, you should 154 | simply be able to use ``git push`` without any further arguments). 155 | 156 | Rebasing 157 | ~~~~~~~~ 158 | 159 | Occasionally you will be asked to *rebase* your branch against the latest 160 | master. To do this, run (while you have your branch checked out) 161 | 162 | .. code-block:: bash 163 | 164 | $ git fetch upstream && git rebase upstream/master 165 | 166 | You may encounter an error message about one or more *conflicts*. See 167 | `GitHub's help page on the subject`_. Note that after a rebase you will 168 | usually have to overwrite previous commits on your fork's copy of the 169 | branch with ``git push --force``. 170 | 171 | .. _GitHub's help page on the subject: https://help.github.com/articles/resolving-merge-conflicts-after-a-git-rebase/ 172 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Blocks' documentation! 2 | ================================= 3 | Blocks is a framework that helps you build and manage neural network models on 4 | using Theano. 5 | 6 | Want to get try it out? Start by :doc:`installing ` Blocks and having a 7 | look at the :ref:`quickstart ` further down this page. Once you're 8 | hooked, try your hand at the :ref:`tutorials ` and the 9 | examples_. 10 | 11 | Blocks is developed in parallel with Fuel_, a dataset processing framework. 12 | 13 | .. warning:: 14 | Blocks is a new project which is still under development. As such, certain 15 | (all) parts of the framework are subject to change. The last stable (and 16 | thus likely an outdated) version can be found in the ``stable`` branch. 17 | 18 | .. tip:: 19 | 20 | That said, if you are interested in using Blocks and run into any problems, 21 | feel free to ask your question on the `mailing list`_. Also, don't hesitate 22 | to file bug reports and feature requests by `making a GitHub issue`_. 23 | 24 | .. _mailing list: https://groups.google.com/forum/#!forum/blocks-users 25 | .. _making a GitHub issue: https://github.com/mila-udem/blocks/issues/new 26 | .. _Fuel: https://github.com/mila-udem/fuel 27 | .. _examples: https://github.com/mila-udem/blocks-examples 28 | 29 | .. _tutorials: 30 | 31 | Tutorials 32 | --------- 33 | .. toctree:: 34 | :maxdepth: 1 35 | 36 | setup 37 | tutorial 38 | bricks_overview 39 | cg 40 | plotting 41 | 42 | In-depth 43 | -------- 44 | .. toctree:: 45 | :maxdepth: 1 46 | 47 | rnn 48 | configuration 49 | create_your_own_brick 50 | serialization 51 | api/index.rst 52 | development/index.rst 53 | 54 | 55 | .. _quickstart: 56 | 57 | Quickstart 58 | ========== 59 | 60 | .. doctest:: 61 | :hide: 62 | 63 | >>> from theano import tensor 64 | >>> from blocks.algorithms import GradientDescent, Scale 65 | >>> from blocks.bricks import MLP, Tanh, Softmax 66 | >>> from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate 67 | >>> from blocks.graph import ComputationGraph 68 | >>> from blocks.initialization import IsotropicGaussian, Constant 69 | >>> from fuel.streams import DataStream 70 | >>> from fuel.transformers import Flatten 71 | >>> from fuel.datasets import MNIST 72 | >>> from fuel.schemes import SequentialScheme 73 | >>> from blocks.extensions import FinishAfter, Printing 74 | >>> from blocks.extensions.monitoring import DataStreamMonitoring 75 | >>> from blocks.main_loop import MainLoop 76 | 77 | Construct your model. 78 | 79 | >>> mlp = MLP(activations=[Tanh(), Softmax()], dims=[784, 100, 10], 80 | ... weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) 81 | >>> mlp.initialize() 82 | 83 | Calculate your loss function. 84 | 85 | >>> x = tensor.matrix('features') 86 | >>> y = tensor.lmatrix('targets') 87 | >>> y_hat = mlp.apply(x) 88 | >>> cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat) 89 | >>> error_rate = MisclassificationRate().apply(y.flatten(), y_hat) 90 | 91 | Load your training data using Fuel. 92 | 93 | >>> mnist_train = MNIST(("train",)) 94 | >>> train_stream = Flatten( 95 | ... DataStream.default_stream( 96 | ... dataset=mnist_train, 97 | ... iteration_scheme=SequentialScheme(mnist_train.num_examples, 128)), 98 | ... which_sources=('features',)) 99 | >>> mnist_test = MNIST(("test",)) 100 | >>> test_stream = Flatten( 101 | ... DataStream.default_stream( 102 | ... dataset=mnist_test, 103 | ... iteration_scheme=SequentialScheme(mnist_test.num_examples, 1024)), 104 | ... which_sources=('features',)) 105 | 106 | And train! 107 | 108 | >>> from blocks.model import Model 109 | >>> main_loop = MainLoop( 110 | ... model=Model(cost), data_stream=train_stream, 111 | ... algorithm=GradientDescent( 112 | ... cost=cost, parameters=ComputationGraph(cost).parameters, 113 | ... step_rule=Scale(learning_rate=0.1)), 114 | ... extensions=[FinishAfter(after_n_epochs=5), 115 | ... DataStreamMonitoring( 116 | ... variables=[cost, error_rate], 117 | ... data_stream=test_stream, 118 | ... prefix="test"), 119 | ... Printing()]) 120 | >>> main_loop.run() # doctest: +ELLIPSIS 121 | 122 | ... 123 | 124 | For a runnable version of this code, please see the MNIST demo 125 | in our repository with examples_. 126 | 127 | Features 128 | -------- 129 | 130 | Currently Blocks supports and provides: 131 | 132 | * Constructing parametrized Theano operations, called "bricks" 133 | * Pattern matching to select variables and bricks in large models 134 | * Algorithms to optimize your model 135 | * Saving and resuming of training 136 | * Monitoring and analyzing values during training progress (on the training set 137 | as well as on test sets) 138 | * Application of graph transformations, such as dropout (*limited support*) 139 | 140 | In the future we also hope to support: 141 | 142 | * Dimension, type and axes-checking 143 | 144 | .. image:: https://img.shields.io/coveralls/mila-udem/blocks.svg 145 | :target: https://coveralls.io/r/mila-udem/blocks 146 | 147 | .. image:: https://travis-ci.org/mila-udem/blocks.svg?branch=master 148 | :target: https://travis-ci.org/mila-udem/blocks 149 | 150 | .. image:: https://readthedocs.org/projects/blocks/badge/?version=latest 151 | :target: https://blocks.readthedocs.org/ 152 | 153 | .. image:: https://img.shields.io/scrutinizer/g/mila-udem/blocks.svg 154 | :target: https://scrutinizer-ci.com/g/mila-udem/blocks/ 155 | 156 | .. image:: https://img.shields.io/badge/license-MIT-blue.svg 157 | :target: https://github.com/mila-udem/blocks/blob/master/LICENSE 158 | 159 | | 160 | 161 | Indices and tables 162 | ================== 163 | * :ref:`genindex` 164 | * :ref:`modindex` 165 | -------------------------------------------------------------------------------- /docs/plotting.rst: -------------------------------------------------------------------------------- 1 | Live plotting 2 | ============= 3 | 4 | .. note:: 5 | 6 | The live plotting functionality is part of ``blocks-extras``, which must be 7 | separately installed. 8 | 9 | Plots often give a clearer image of your training progress than textual logs. 10 | This is why Blocks has a :class:`.Plot` extension which 11 | allows you to plot the entries from the log that you are interested in. 12 | 13 | We use Bokeh_, an interactive visualization library, to perform the plotting. 14 | More specifically, we use the *Bokeh Plot Server*. This is basically a light web 15 | server to which Blocks can send data, which then gets displayed in live plots in 16 | your browser. The advantage of this approach is that you can even monitor your 17 | models' training progress over a network. 18 | 19 | First, make sure that you installed the necessary requirements (see :doc:`the 20 | installation instructions `). To start the server type 21 | 22 | .. code-block:: bash 23 | 24 | $ bokeh-server 25 | 26 | This will start a server that is accesible on your computer at 27 | ``http://localhost:5006``. If you want to make sure that you can access your 28 | plots across a network (or the internet), you can listen on all IP addresses 29 | using 30 | 31 | .. code-block:: bash 32 | 33 | $ bokeh-server --ip 0.0.0.0 34 | 35 | Now that your plotting server is up and running, start your main loop and 36 | pass the :class:`.Plot` extension. Consider this example of fitting the 37 | function :math:`f(x) = x^a` to :math:`f(x) = x^2`. 38 | 39 | >>> import theano 40 | >>> a = theano.shared(3.) 41 | >>> a.name = 'a' 42 | >>> x = theano.tensor.scalar('data') 43 | >>> cost = abs(x ** 2 - x ** a) 44 | >>> cost.name = 'cost' 45 | 46 | We train on a 150 random points in :math:`[0, 1]`. 47 | 48 | >>> import numpy 49 | >>> from fuel.streams import DataStream 50 | >>> from fuel.datasets import IterableDataset 51 | >>> data_stream = DataStream(IterableDataset( 52 | ... numpy.random.rand(150).astype(theano.config.floatX))) 53 | 54 | Now let's train with gradient descent and plot the results. 55 | 56 | >>> from blocks.main_loop import MainLoop 57 | >>> from blocks.algorithms import GradientDescent, Scale 58 | >>> from blocks.extensions import FinishAfter 59 | >>> from blocks.extensions.monitoring import TrainingDataMonitoring 60 | >>> from blocks_extras.extensions.plot import Plot # doctest: +SKIP 61 | >>> main_loop = MainLoop( 62 | ... model=None, data_stream=data_stream, 63 | ... algorithm=GradientDescent(cost=cost, 64 | ... parameters=[a], 65 | ... step_rule=Scale(learning_rate=0.1)), 66 | ... extensions=[FinishAfter(after_n_epochs=1), 67 | ... TrainingDataMonitoring([cost, a], after_batch=True), 68 | ... Plot('Plotting example', channels=[['cost'], ['a']], 69 | ... after_batch=True)]) # doctest: +SKIP 70 | >>> main_loop.run() # doctest: +SKIP 71 | 72 | .. tip:: 73 | 74 | If you want to plot channels in the same figure, pass them as part of the 75 | same list. For example, ``[['cost', 'a']]`` would have plotted a single 76 | figure with both the cost and the estimate of the exponent. 77 | 78 | Open up your browser and go to ``http://localhost:5006`` to see your model 79 | cost go down in real-time! 80 | 81 | .. image:: /_static/plot_cost.png 82 | :width: 49% 83 | 84 | .. image:: /_static/plot_a.png 85 | :width: 49% 86 | 87 | 88 | .. _Bokeh: http://bokeh.pydata.org/ 89 | .. _blocks-extras: https://github.com/mila-udem/blocks-extras 90 | -------------------------------------------------------------------------------- /docs/serialization.rst: -------------------------------------------------------------------------------- 1 | Serialization 2 | ============= 3 | 4 | The ability to save models and their training progress is important for two 5 | reasons: 6 | 7 | 1. Neural nets can take days or even weeks to train. If training is 8 | interrupted during this time, it is important that we can continue from 9 | where we left off. 10 | 2. We need the ability to save models in order to share them with others or save 11 | them for later use or inspection. 12 | 13 | These two goals come with differing requirements, which is why Blocks 14 | implements a custom serialization approach that tries to meet both needs in the 15 | :func:`.dump` and :func:`.load` functions. 16 | 17 | Pickling the training loop 18 | -------------------------- 19 | 20 | .. warning:: 21 | 22 | Due to the complexity of serializing a Python objects as large as the main 23 | loop, (un)pickling will sometimes fail because it exceeds the default maximum 24 | recursion depth set in Python. Increasing the limit should fix the problem. 25 | 26 | When checkpointing, Blocks pickles the entire :class:`main loop <.MainLoop>`, 27 | effectively serializing the exact state of the model as well as the training 28 | state (iteration state, extensions, etc.). Technically there are some 29 | difficulties with this approach: 30 | 31 | * Some Python objects cannot be pickled e.g. file handles, generators, 32 | dynamically generated classes, nested classes, etc. 33 | * The pickling of Theano objects can be problematic. 34 | * We do not want to serialize the training data kept in memory, since this can 35 | be prohibitively large. 36 | 37 | Blocks addresses these problems by avoiding certain data structures such as 38 | generators and nested classes (see the :ref:`developer guidelines 39 | `) and overriding the pickling behaviour of some 40 | objects, making the pickling of the main loop possible. 41 | 42 | However, pickling can be problematic for long-term storage of models, because 43 | 44 | * Unpickling depends on the libraries used being unchanged. This means that if 45 | you updated Blocks, Theano, etc. to a new version where the interface has 46 | changed, loading your training progress could fail. 47 | * The unpickling of Theano objects can be problematic, especially when 48 | transferring from GPU to CPU or vice versa. 49 | * It is not possible on Python 2 to unpickle objects that were pickled in Python 50 | 3. 51 | 52 | Parameter saving 53 | ---------------- 54 | 55 | This is why Blocks intercepts the pickling of all Theano shared variables (which 56 | includes the parameters), and stores them as separate NPY_ files. The resulting 57 | file is a ZIP archive that contains the pickled main loop as well as a collection 58 | of NumPy arrays. The NumPy arrays (and hence parameters) in the ZIP file can be 59 | read, across platforms, using the :func:`numpy.load` function, making it 60 | possible to inspect and load parameter values, even if the unpickling of the 61 | main loop fails. 62 | 63 | .. _NPY: http://docs.scipy.org/doc/numpy-dev/neps/npy-format.html 64 | -------------------------------------------------------------------------------- /docs/setup.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | The easiest way to install Blocks is using the Python package manager 5 | ``pip``. Blocks isn't listed yet on the Python Package Index (PyPI), so 6 | you will have to grab it directly from GitHub. 7 | 8 | .. code-block:: bash 9 | 10 | $ pip install git+git://github.com/mila-udem/blocks.git \ 11 | -r https://raw.githubusercontent.com/mila-udem/blocks/master/requirements.txt 12 | 13 | This will give you the cutting-edge development version. The latest stable 14 | release is in the ``stable`` branch and can be installed as follows. 15 | 16 | .. code-block:: bash 17 | 18 | $ pip install git+git://github.com/mila-udem/blocks.git@stable \ 19 | -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt 20 | 21 | .. note:: 22 | 23 | Blocks relies on several packages, such as Theano_ and picklable_itertools_, 24 | to be installed directly from GitHub. The only way of doing so reliably is 25 | through a ``requirements.txt`` file, which is why this installation command 26 | might look slightly different from what you're used to. 27 | 28 | Installing requirements from GitHub requires pip 1.5 or higher; you can 29 | update with ``pip update pip``. 30 | 31 | If you don't have administrative rights, add the ``--user`` switch to the 32 | install commands to install the packages in your home folder. If you want to 33 | update Blocks, simply repeat the first command with the ``--upgrade`` switch 34 | added to pull the latest version from GitHub. 35 | 36 | .. warning:: 37 | 38 | Pip may try to install or update NumPy and SciPy if they are not present or 39 | outdated. However, pip's versions might not be linked to an optimized BLAS 40 | implementation. To prevent this from happening make sure you update NumPy 41 | and SciPy using your system's package manager (e.g. ``apt-get`` or 42 | ``yum``), or use a Python distribution like Anaconda_, before installing 43 | Blocks. You can also pass the ``--no-deps`` switch and install all the 44 | requirements manually. 45 | 46 | If the installation crashes with ``ImportError: No module named 47 | numpy.distutils.core``, install NumPy and try again again. 48 | 49 | .. _picklable_itertools: https://github.com/dwf/picklable_itertools 50 | 51 | Requirements 52 | ------------ 53 | Blocks' requirements are 54 | 55 | * Theano_, for pretty much everything 56 | * PyYAML_, to parse the configuration file 57 | * six_, to support both Python 2 and 3 with a single codebase 58 | * Toolz_, to add a bit of functional programming where it is needed 59 | 60 | Bokeh_ is an optional requirement for if you want to use live plotting of your 61 | training progress (part of ``blocks-extras_``). 62 | 63 | nose2_ is an optional requirement, used to run the tests. 64 | 65 | We develop using the bleeding-edge version of Theano, so be sure to follow the 66 | `relevant installation instructions`_ to make sure that your Theano version is 67 | up to date if you didn't install it through Blocks. 68 | 69 | .. _Anaconda: https://store.continuum.io/cshop/anaconda/ 70 | .. _nose2: https://nose2.readthedocs.org/ 71 | .. _PyYAML: http://pyyaml.org/wiki/PyYAML 72 | .. _Bokeh: http://bokeh.pydata.org/ 73 | .. _Theano: http://deeplearning.net/software/theano/ 74 | .. _six: http://pythonhosted.org/six/ 75 | .. _Toolz: http://toolz.readthedocs.org/ 76 | .. _relevant installation instructions: http://deeplearning.net/software/theano/install.html#bleeding-edge-install-instructions 77 | .. _blocks-extras: https://github.com/mila-udem/blocks-extras 78 | 79 | Development 80 | ----------- 81 | 82 | If you want to work on Blocks' development, your first step is to `fork Blocks 83 | on GitHub`_. You will now want to install your fork of Blocks in editable mode. 84 | To install in your home directory, use the following command, replacing ``USER`` 85 | with your own GitHub user name: 86 | 87 | .. code-block:: bash 88 | 89 | $ pip install -e git+git@github.com:USER/blocks.git#egg=blocks[test,docs] --src=$HOME \ 90 | -r https://raw.githubusercontent.com/mila-udem/blocks/master/requirements.txt 91 | 92 | As with the usual installation, you can use ``--user`` or ``--no-deps`` if you 93 | need to. You can now make changes in the ``blocks`` directory created by pip, 94 | push to your repository and make a pull request. 95 | 96 | If you had already cloned the GitHub repository, you can use the following 97 | command from the folder you cloned Blocks to: 98 | 99 | .. code-block:: bash 100 | 101 | $ pip install -e file:.#egg=blocks[test,docs] -r requirements.txt 102 | 103 | .. _fork Blocks on GitHub: https://github.com/mila-udem/blocks/fork 104 | 105 | Documentation 106 | ~~~~~~~~~~~~~ 107 | 108 | If you want to build a local copy of the documentation, follow the instructions 109 | at the :doc:`documentation development guidelines `. 110 | -------------------------------------------------------------------------------- /doctests/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | 3 | import doctest 4 | import fnmatch 5 | import importlib 6 | import os 7 | import pkgutil 8 | 9 | import blocks 10 | import blocks.bricks 11 | from blocks.utils.testing import skip_if_not_available 12 | 13 | 14 | def setup(testobj): 15 | skip_if_not_available(modules=['nose2']) 16 | # Not importing unicode_literal because it gives problems 17 | # If needed, see https://dirkjan.ochtman.nl/writing/2014/07/06/ 18 | # single-source-python-23-doctests.html for a solution 19 | testobj.globs['absolute_import'] = absolute_import 20 | testobj.globs['print_function'] = print_function 21 | 22 | 23 | def load_tests(loader, tests, ignore): 24 | # This function loads doctests from all submodules and runs them 25 | # with the __future__ imports necessary for Python 2 26 | for _, module, _ in pkgutil.walk_packages(path=blocks.__path__, 27 | prefix=blocks.__name__ + '.'): 28 | try: 29 | tests.addTests(doctest.DocTestSuite( 30 | module=importlib.import_module(module), setUp=setup, 31 | optionflags=doctest.IGNORE_EXCEPTION_DETAIL)) 32 | except: # noqa: E722 33 | pass 34 | 35 | # This part loads the doctests from the documentation 36 | docs = [] 37 | for root, _, filenames in os.walk(os.path.join(blocks.__path__[0], 38 | '../docs')): 39 | for doc in fnmatch.filter(filenames, '*.rst'): 40 | docs.append(os.path.abspath(os.path.join(root, doc))) 41 | tests.addTests(doctest.DocFileSuite( 42 | *docs, module_relative=False, setUp=setup, 43 | optionflags=doctest.IGNORE_EXCEPTION_DETAIL)) 44 | 45 | return tests 46 | -------------------------------------------------------------------------------- /req-rtd.txt: -------------------------------------------------------------------------------- 1 | picklable-itertools==0.1.1 2 | progressbar2==3.10.0 3 | pyyaml==3.11 4 | six==1.10.0 5 | toolz==0.8.0 6 | git+https://github.com/Theano/Theano.git#egg=theano 7 | -------------------------------------------------------------------------------- /req-scrutinizer.txt: -------------------------------------------------------------------------------- 1 | flake8==2.3.0 # rq.filter: <2.4.0 2 | git+https://github.com/bartvm/pep257.git@numpy#egg=pep257 3 | -------------------------------------------------------------------------------- /req-travis-conda.txt: -------------------------------------------------------------------------------- 1 | coverage==4.2 2 | h5py==2.6.0 3 | mock==2.0.0 4 | nose==1.3.7 5 | numpy==1.11.1 6 | pillow==3.3.0 # rq.filter: >=3.3.0,<3.4.0 7 | pytables==3.2.2 8 | pyyaml==3.11 9 | pyzmq==15.4.0 10 | scipy==0.18.0 11 | six==1.10.0 12 | toolz==0.8.0 13 | -------------------------------------------------------------------------------- /req-travis-docs-pip.txt: -------------------------------------------------------------------------------- 1 | sphinx==1.4.5 2 | Pygments==2.1.3 3 | docutils==0.12 4 | mkdocs==0.15.3 5 | mock==2.0.0 6 | pillow==3.3.0 # rq.filter: >=3.3.0,<3.4.0 7 | git+https://github.com/rtfd/readthedocs-sphinx-ext.git@0.6-alpha#egg=readthedocs-sphinx-ext 8 | sphinx-rtd-theme==0.1.9 9 | alabaster>=0.7,<0.8,!=0.7.5 10 | commonmark==0.7.2 11 | recommonmark==0.4.0 12 | pyyaml==3.11 13 | six==1.10.0 14 | -------------------------------------------------------------------------------- /req-travis-pip.txt: -------------------------------------------------------------------------------- 1 | coveralls==1.1 2 | nose2[coverage_plugin]==0.6.5 3 | picklable-itertools==0.1.1 4 | progressbar2==3.10.0 5 | git+https://github.com/Theano/Theano.git#egg=theano 6 | git+https://github.com/mila-udem/fuel#egg=fuel 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.11.1 2 | picklable-itertools==0.1.1 3 | progressbar2==3.10.0 4 | pyyaml==3.11 5 | six==1.10.0 6 | toolz==0.8.0 7 | git+https://github.com/Theano/Theano.git#egg=theano 8 | git+https://github.com/mila-udem/fuel#egg=fuel 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import blocks 2 | from codecs import open 3 | from os import path 4 | from setuptools import find_packages, setup 5 | 6 | here = path.abspath(path.dirname(__file__)) 7 | 8 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 9 | while not f.readline().startswith('Blocks'): # Skip the badges 10 | pass 11 | long_description = 'Blocks\n' + f.read().strip() 12 | 13 | exec_results = {} 14 | with open(path.join(path.dirname(__file__), 'blocks/version.py')) as file_: 15 | exec(file_.read(), exec_results) 16 | version = exec_results['version'] 17 | 18 | setup( 19 | name='blocks', 20 | version=version, 21 | description='A Theano framework for building and training neural networks', 22 | long_description=long_description, 23 | url='https://github.com/mila-udem/blocks', 24 | author='University of Montreal', 25 | license='MIT', 26 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 27 | classifiers=[ 28 | 'Development Status :: 3 - Alpha', 29 | 'Intended Audience :: Science/Research', 30 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 31 | 'Topic :: Scientific/Engineering :: Image Recognition', 32 | 'License :: OSI Approved :: MIT License', 33 | 'Programming Language :: Python :: 2', 34 | 'Programming Language :: Python :: 2.7', 35 | 'Programming Language :: Python :: 3', 36 | 'Programming Language :: Python :: 3.4', 37 | ], 38 | keywords='theano machine learning neural networks deep learning', 39 | packages=find_packages(exclude=['examples', 'docs', 'doctests', 'tests']), 40 | scripts=['bin/blocks-continue'], 41 | setup_requires=['numpy'], 42 | install_requires=['numpy', 'six', 'pyyaml', 'toolz', 'theano', 43 | 'picklable-itertools', 'progressbar2', 'fuel'], 44 | extras_require={ 45 | 'test': ['mock', 'nose', 'nose2'], 46 | 'docs': ['sphinx', 'sphinx-rtd-theme'] 47 | }, 48 | zip_safe=False) 49 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/tests/__init__.py -------------------------------------------------------------------------------- /tests/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/tests/algorithms/__init__.py -------------------------------------------------------------------------------- /tests/bricks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/tests/bricks/__init__.py -------------------------------------------------------------------------------- /tests/bricks/test_cost.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy.testing import assert_allclose 3 | 4 | import theano 5 | from theano import tensor 6 | from theano import function 7 | 8 | from blocks.bricks import Softmax 9 | from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate 10 | 11 | 12 | def test_softmax_vector(): 13 | x = tensor.matrix('x') 14 | y = tensor.lvector('y') 15 | 16 | softmax_out = Softmax().apply(x) 17 | cost = CategoricalCrossEntropy().apply(y, softmax_out) 18 | 19 | cost_stable = Softmax().categorical_cross_entropy(y, x).mean() 20 | 21 | softmax_cost_func = function([x, y], cost) 22 | softmax_cost_stable_func = function([x, y], cost_stable) 23 | 24 | batch_size = 100 25 | x_size = 10 26 | 27 | rng = numpy.random.RandomState(1) 28 | x_val = rng.randn(batch_size, x_size).astype(theano.config.floatX) 29 | y_val = rng.randint(low=0, high=x_size, size=(batch_size)) 30 | softmax_cost = softmax_cost_func(x_val, y_val) 31 | softmax_cost_stable = softmax_cost_stable_func(x_val, y_val) 32 | 33 | assert_allclose(softmax_cost, softmax_cost_stable) 34 | 35 | 36 | def test_softmax_matrix(): 37 | x = tensor.matrix('x') 38 | y = tensor.matrix('y') 39 | 40 | softmax_out = Softmax().apply(x) 41 | cost = CategoricalCrossEntropy().apply(y, softmax_out) 42 | 43 | cost_stable = Softmax().categorical_cross_entropy(y, x).mean() 44 | 45 | softmax_cost_func = function([x, y], cost) 46 | softmax_cost_stable_func = function([x, y], cost_stable) 47 | 48 | batch_size = 2 49 | x_size = 2 50 | 51 | rng = numpy.random.RandomState(1) 52 | x_val = rng.randn(batch_size, x_size).astype(theano.config.floatX) 53 | y_val_us = rng.uniform(size=(batch_size, 54 | x_size)).astype(theano.config.floatX) 55 | y_val = y_val_us / numpy.expand_dims(y_val_us.sum(axis=1), axis=1) 56 | softmax_cost = softmax_cost_func(x_val, y_val) 57 | softmax_cost_stable = softmax_cost_stable_func(x_val, y_val) 58 | 59 | assert_allclose(softmax_cost, softmax_cost_stable, rtol=1e-5) 60 | 61 | 62 | def test_misclassification_rate(): 63 | y = tensor.vector(dtype='int32') 64 | yhat = tensor.matrix(theano.config.floatX) 65 | top1_brick = MisclassificationRate() 66 | top2_brick = MisclassificationRate(top_k=2) 67 | top3_brick = MisclassificationRate(top_k=3) 68 | f = theano.function([y, yhat], [top1_brick.apply(y, yhat), 69 | top2_brick.apply(y, yhat), 70 | top3_brick.apply(y, yhat)]) 71 | y_ = numpy.array([2, 1, 0, 1, 2], dtype='int32') 72 | yhat_ = numpy.array([[3, 2, 1, 0], 73 | [1, 8, 2, 1], 74 | [3, 8, 1, 2], 75 | [1, 6, 4, 2], 76 | [9, 7, 5, 5]], dtype='float32') 77 | top1_error = 0.6 78 | top2_error = 0.4 79 | top3_error = 0.2 80 | assert_allclose([top1_error, top2_error, top3_error], f(y_, yhat_)) 81 | -------------------------------------------------------------------------------- /tests/bricks/test_interfaces.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from theano import tensor 3 | from blocks.bricks import Linear 4 | from blocks.initialization import Constant, IsotropicGaussian 5 | 6 | 7 | def test_linearlike_subclass_initialize_works_overridden_w(): 8 | class NotQuiteLinear(Linear): 9 | @property 10 | def W(self): 11 | W = super(NotQuiteLinear, self).W 12 | return W / tensor.sqrt((W ** 2).sum(axis=0)) 13 | 14 | brick = NotQuiteLinear(5, 10, weights_init=IsotropicGaussian(0.02), 15 | biases_init=Constant(1)) 16 | brick.initialize() 17 | assert not numpy.isnan(brick.parameters[0].get_value()).any() 18 | numpy.testing.assert_allclose((brick.W ** 2).sum(axis=0).eval(), 1, 19 | rtol=1e-6) 20 | -------------------------------------------------------------------------------- /tests/bricks/test_lookup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy.testing import assert_equal, assert_raises 3 | 4 | import theano 5 | from theano import tensor 6 | 7 | from blocks.bricks.lookup import LookupTable 8 | 9 | 10 | def test_lookup_table(): 11 | lt = LookupTable(5, 3) 12 | lt.allocate() 13 | 14 | lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX)) 15 | 16 | x = tensor.lmatrix("x") 17 | y = lt.apply(x) 18 | f = theano.function([x], [y]) 19 | 20 | x_val = [[1, 2], [0, 3]] 21 | desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]], 22 | dtype=theano.config.floatX) 23 | assert_equal(f(x_val)[0], desired) 24 | 25 | # Test get_dim 26 | assert_equal(lt.get_dim(lt.apply.inputs[0]), 0) 27 | assert_equal(lt.get_dim(lt.apply.outputs[0]), lt.dim) 28 | assert_raises(ValueError, lt.get_dim, 'random_name') 29 | 30 | # Test feedforward interface 31 | assert lt.input_dim == 0 32 | assert lt.output_dim == 3 33 | lt.output_dim = 4 34 | assert lt.output_dim == 4 35 | 36 | def assign_input_dim(): 37 | lt.input_dim = 11 38 | assert_raises(ValueError, assign_input_dim) 39 | lt.input_dim = 0 40 | -------------------------------------------------------------------------------- /tests/bricks/test_wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from numpy.testing import assert_allclose 4 | from six.moves import cPickle 5 | from theano import tensor 6 | from blocks.bricks import Linear, Softmax 7 | from blocks.bricks.wrappers import WithExtraDims 8 | from blocks.initialization import Constant 9 | 10 | 11 | class LinearWithExtraDims(Linear): 12 | decorators = [WithExtraDims()] 13 | 14 | 15 | class SoftmaxWithExtraDims(Softmax): 16 | decorators = [WithExtraDims()] 17 | 18 | 19 | def test_with_extra_dims_ndim_gt_2(): 20 | X = tensor.tensor4('X') 21 | brick = LinearWithExtraDims( 22 | input_dim=3, output_dim=4, 23 | weights_init=Constant(1), biases_init=Constant(0)) 24 | brick.initialize() 25 | f = theano.function([X], brick.apply(X, extra_ndim=2)) 26 | assert_allclose( 27 | f(numpy.ones(shape=(2, 2, 2, 3), dtype=theano.config.floatX)), 28 | 3 * numpy.ones(shape=(2, 2, 2, 4), dtype=theano.config.floatX)) 29 | 30 | 31 | def test_with_extra_dims_ndim_leq_2(): 32 | X = tensor.matrix('X') 33 | brick = LinearWithExtraDims( 34 | input_dim=3, output_dim=4, 35 | weights_init=Constant(1), biases_init=Constant(0)) 36 | brick.initialize() 37 | f = theano.function([X], brick.apply(X, extra_ndim=0)) 38 | assert_allclose( 39 | f(numpy.ones(shape=(2, 3), dtype=theano.config.floatX)), 40 | 3 * numpy.ones(shape=(2, 4), dtype=theano.config.floatX)) 41 | 42 | 43 | def test_with_extra_dims_is_serializable(): 44 | brick = LinearWithExtraDims( 45 | input_dim=3, output_dim=4, 46 | weights_init=Constant(1), biases_init=Constant(0)) 47 | brick.initialize() 48 | cPickle.loads(cPickle.dumps(brick)) 49 | 50 | 51 | def test_with_extra_dims_cross_entropy_2d(): 52 | x = tensor.matrix('x') 53 | y = tensor.lvector('y') 54 | brick = SoftmaxWithExtraDims() 55 | f = theano.function( 56 | [y, x], [brick.categorical_cross_entropy(y, x, extra_ndim=0)]) 57 | assert_allclose( 58 | f([0, 1, 2, 3], 59 | [[1, 2, 1, 2], [1, 2, 3, 4], 60 | [4, 3, 2, 1], [2, 2, 2, 2]])[0], 61 | numpy.array([2.00640, 2.44019, 2.44019, 1.3863]), 62 | rtol=1e-5) 63 | 64 | 65 | def test_with_extra_dims_cross_entropy_3d(): 66 | x = tensor.tensor3('x') 67 | y = tensor.lmatrix('y') 68 | brick = SoftmaxWithExtraDims() 69 | f = theano.function( 70 | [y, x], [brick.categorical_cross_entropy(y, x, extra_ndim=1)]) 71 | assert_allclose( 72 | f([[0, 1], [2, 3]], 73 | [[[1, 2, 1, 2], [1, 2, 3, 4]], 74 | [[4, 3, 2, 1], [2, 2, 2, 2]]])[0], 75 | numpy.array([[2.0064, 2.44019], 76 | [2.44019, 1.3863]]), 77 | rtol=1e-5) 78 | -------------------------------------------------------------------------------- /tests/extensions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/tests/extensions/__init__.py -------------------------------------------------------------------------------- /tests/extensions/test_extensions.py: -------------------------------------------------------------------------------- 1 | import re 2 | from mock import Mock 3 | from numpy.testing import assert_raises 4 | 5 | from blocks.extensions import SimpleExtension, CompositeExtension, Timestamp 6 | from blocks.extensions.saveload import Checkpoint 7 | from blocks.extensions.predicates import OnLogRecord 8 | 9 | 10 | def test_parse_args(): 11 | assert (SimpleExtension.parse_args('before_batch', ('a', 'b')) == 12 | (('a',), ('b',))) 13 | assert (SimpleExtension.parse_args('before_epoch', ('a', 'b')) == 14 | ((), ('a', 'b'))) 15 | 16 | 17 | def test_add_list_condition(): 18 | extension_list = Checkpoint('extension_list').add_condition( 19 | ['before_first_epoch', 'after_epoch'], 20 | OnLogRecord('notification_name'), 21 | ('dest_path.kl',)) 22 | extension_iter = Checkpoint('extension_iter') 23 | extension_iter.add_condition( 24 | ['before_first_epoch'], 25 | OnLogRecord('notification_name'), 26 | ('dest_path.kl',)) 27 | extension_iter.add_condition( 28 | ['after_epoch'], 29 | OnLogRecord('notification_name'), 30 | ('dest_path.kl',)) 31 | assert len(extension_list._conditions) == len(extension_iter._conditions) 32 | assert_raises(ValueError, extension_iter.add_condition, 33 | callbacks_names='after_epoch', 34 | predicate=OnLogRecord('notification_name'), 35 | arguments=('dest_path.kl',)) 36 | 37 | 38 | def test_composite_extension_main_loop_assignment(): 39 | ext1 = Mock() 40 | ext2 = Mock() 41 | 42 | comp = CompositeExtension([ext1, ext2]) 43 | comp.main_loop = object() 44 | 45 | assert ext1.main_loop == comp.main_loop 46 | assert ext2.main_loop == comp.main_loop 47 | 48 | 49 | def test_composite_extension_dispatches(): 50 | ext1 = Mock() 51 | ext2 = Mock() 52 | 53 | comp = CompositeExtension([ext1, ext2]) 54 | comp.main_loop = object() 55 | 56 | comp.dispatch('before_training') 57 | ext1.dispatch.assert_called_once_with('before_training') 58 | ext2.dispatch.assert_called_once_with('before_training') 59 | 60 | comp.dispatch('after_batch', 5) 61 | ext1.dispatch.assert_called_with('after_batch', 5) 62 | ext2.dispatch.assert_called_with('after_batch', 5) 63 | 64 | 65 | def test_composite_extension_run_before(): 66 | class Foo(SimpleExtension): 67 | def __init__(self, num, **kwargs): 68 | self.num = num 69 | super(Foo, self).__init__(**kwargs) 70 | 71 | def do(self, which_callback, *args): 72 | self.num += 1 73 | 74 | class Bar(CompositeExtension): 75 | def do(self, which_callback, *args): 76 | self.num = 0 77 | for sub in self.sub_extensions: 78 | self.num += sub.num 79 | 80 | comp = Bar([Foo(1, before_training=True), 81 | Foo(2, before_training=True)], 82 | before_training=True) 83 | comp.main_loop = Mock() 84 | comp.dispatch('before_training') 85 | 86 | assert comp.num == 3 87 | 88 | 89 | def test_composite_extension_run_after(): 90 | class Foo(SimpleExtension): 91 | def __init__(self, num, **kwargs): 92 | self.num = num 93 | super(Foo, self).__init__(**kwargs) 94 | 95 | def do(self, which_callback, *args): 96 | self.num += 1 97 | 98 | class Bar(CompositeExtension): 99 | def do(self, which_callback, *args): 100 | self.num = 0 101 | for sub in self.sub_extensions: 102 | self.num += sub.num 103 | 104 | comp = Bar([Foo(1, before_training=True), 105 | Foo(2, before_training=True)], 106 | before_training=True, 107 | run_before_children=False) 108 | comp.main_loop = Mock() 109 | comp.dispatch('before_training') 110 | 111 | assert comp.num == 5 112 | 113 | 114 | def test_composite_extension_different_schedules(): 115 | class Foo(SimpleExtension): 116 | def __init__(self, **kwargs): 117 | self.do = Mock() 118 | super(Foo, self).__init__(**kwargs) 119 | 120 | def do(self, *args): 121 | pass 122 | 123 | a = Foo(after_batch=False, after_training=True) 124 | b = Foo(after_batch=True) 125 | comp = CompositeExtension([a, b], before_training=True) 126 | comp.main_loop = Mock() 127 | comp.do = Mock() 128 | comp.dispatch('before_training') 129 | comp.dispatch('after_batch') 130 | comp.dispatch('after_training') 131 | comp.do.assert_called_once_with('before_training') 132 | a.do.assert_called_once_with('after_training') 133 | b.do.assert_called_once_with('after_batch') 134 | 135 | 136 | def test_simple_extension_before_batch_callback(): 137 | 138 | class Foo(SimpleExtension): 139 | def __init__(self, **kwargs): 140 | self.do = Mock() 141 | super(Foo, self).__init__(**kwargs) 142 | 143 | def do(self, which_callback, *args): 144 | pass 145 | 146 | ext = Foo(before_batch=True) 147 | ext.main_loop = Mock() 148 | ext.dispatch('before_batch') 149 | ext.do.assert_called_once_with('before_batch') 150 | 151 | 152 | class InjectedTimestamp(Timestamp): 153 | def __init__(self, **kwargs): 154 | self.returns = ['foo', 'bar', 'baz'] 155 | super(InjectedTimestamp, self).__init__(**kwargs) 156 | 157 | def get_timestamp(self): 158 | if len(self.returns) > 0: 159 | return self.returns.pop() 160 | return super(InjectedTimestamp, self).get_timestamp() 161 | 162 | 163 | def test_timestamp(): 164 | def check(kwargs): 165 | if 'log_record' in kwargs: 166 | log_record = kwargs['log_record'] 167 | else: 168 | log_record = Timestamp.DEFAULT_LOG_RECORD 169 | ext = InjectedTimestamp(**kwargs) 170 | ext.main_loop = Mock() 171 | ext.main_loop.log.current_row = {} 172 | ext.do('after_epoch') 173 | assert ext.main_loop.log.current_row[log_record] == 'baz' 174 | ext.do('after_epoch') 175 | assert ext.main_loop.log.current_row[log_record] == 'bar' 176 | ext.do('after_epoch') 177 | assert ext.main_loop.log.current_row[log_record] == 'foo' 178 | # Exercise original get_timestamp. 179 | ext.do('after_epoch') 180 | sep = kwargs.get('separator', ' ') 181 | assert bool(re.match(''.join(['[0-9]{4}-[0-9]{2}-[0-9]{2}', sep, 182 | '[0-9]{2}(\\:[0-9]{2}){2}' 183 | '\\.[0-9]+']), 184 | ext.main_loop.log.current_row[log_record])) 185 | 186 | yield check, {} 187 | yield check, {'log_record': 'loggy mclogpants'} 188 | 189 | 190 | def test_timestamp_default_triggers(): 191 | def check(callback): 192 | ext = InjectedTimestamp() 193 | ext.main_loop = Mock() 194 | ext.main_loop.log.current_row = {} 195 | ext.dispatch(callback) 196 | assert ext.main_loop.log.current_row.get('timestamp') == 'baz' 197 | 198 | callbacks = ['before_training', 'after_epoch', 'on_error', 199 | 'on_interrupt', 'on_resumption', 'after_training'] 200 | 201 | for callback in callbacks: 202 | yield check, callback 203 | -------------------------------------------------------------------------------- /tests/extensions/test_monitoring.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from fuel.datasets import IterableDataset 4 | from numpy.testing import assert_allclose 5 | from theano import tensor 6 | 7 | from blocks.extensions import TrainingExtension, FinishAfter 8 | from blocks.extensions.monitoring import ( 9 | MonitoringExtension, 10 | TrainingDataMonitoring) 11 | from blocks.monitoring import aggregation 12 | from blocks.algorithms import GradientDescent, UpdatesAlgorithm, Scale 13 | from blocks.utils import shared_floatx 14 | from blocks.main_loop import MainLoop 15 | 16 | 17 | class MeanFeaturesTimesTarget(aggregation.MonitoredQuantity): 18 | 19 | def initialize(self): 20 | self._aggregated = 0. 21 | self._num_batches = 0 22 | 23 | def aggregate(self, features, targets): 24 | self._aggregated += features * targets 25 | self._num_batches += 1 26 | 27 | def get_aggregated_value(self): 28 | return self._aggregated / self._num_batches 29 | 30 | 31 | def test_monitoring_extension__record_name(): 32 | test_name = "test-test" 33 | 34 | monitor = MonitoringExtension() 35 | assert monitor._record_name(test_name) == test_name 36 | 37 | monitor = MonitoringExtension(prefix="abc") 38 | assert (monitor._record_name(test_name) == 39 | "abc" + monitor.SEPARATOR + test_name) 40 | 41 | monitor = MonitoringExtension(suffix="abc") 42 | assert (monitor._record_name(test_name) == 43 | test_name + monitor.SEPARATOR + "abc") 44 | 45 | monitor = MonitoringExtension(prefix="abc", suffix="def") 46 | assert (monitor._record_name(test_name) == 47 | "abc" + monitor.SEPARATOR + test_name + monitor.SEPARATOR + "def") 48 | 49 | try: 50 | monitor = MonitoringExtension(prefix="abc", suffix="def") 51 | monitor._record_name(None) 52 | except ValueError as e: 53 | assert str(e) == "record name must be a string" 54 | 55 | 56 | def test_training_data_monitoring(): 57 | weights = numpy.array([-1, 1], dtype=theano.config.floatX) 58 | features = [numpy.array(f, dtype=theano.config.floatX) 59 | for f in [[1, 2], [3, 5], [5, 8]]] 60 | targets = numpy.array([(weights * f).sum() for f in features]) 61 | n_batches = 3 62 | dataset = IterableDataset(dict(features=features, targets=targets)) 63 | 64 | x = tensor.vector('features') 65 | y = tensor.scalar('targets') 66 | W = shared_floatx([0, 0], name='W') 67 | V = shared_floatx(7, name='V') 68 | W_sum = W.sum().copy(name='W_sum') 69 | cost = ((x * W).sum() - y) ** 2 70 | cost.name = 'cost' 71 | 72 | class TrueCostExtension(TrainingExtension): 73 | 74 | def before_batch(self, data): 75 | self.main_loop.log.current_row['true_cost'] = ( 76 | ((W.get_value() * data["features"]).sum() - 77 | data["targets"]) ** 2) 78 | 79 | # Note, that unlike a Theano variable, a monitored 80 | # quantity can't be reused in more than one TrainingDataMonitoring 81 | 82 | ftt1 = MeanFeaturesTimesTarget( 83 | requires=[x, y], name='ftt1') 84 | ftt2 = MeanFeaturesTimesTarget( 85 | requires=[x, y], name='ftt2') 86 | 87 | main_loop = MainLoop( 88 | model=None, data_stream=dataset.get_example_stream(), 89 | algorithm=GradientDescent(cost=cost, parameters=[W], 90 | step_rule=Scale(0.001)), 91 | extensions=[ 92 | FinishAfter(after_n_epochs=1), 93 | TrainingDataMonitoring([W_sum, cost, V, ftt1], prefix="train1", 94 | after_batch=True), 95 | TrainingDataMonitoring([aggregation.mean(W_sum), cost, ftt2], 96 | prefix="train2", after_epoch=True), 97 | TrueCostExtension()]) 98 | 99 | main_loop.run() 100 | 101 | # Check monitoring of a shared varible 102 | assert_allclose(main_loop.log.current_row['train1_V'], 7.0) 103 | 104 | for i in range(n_batches): 105 | # The ground truth is written to the log before the batch is 106 | # processed, where as the extension writes after the batch is 107 | # processed. This is why the iteration numbers differs here. 108 | assert_allclose(main_loop.log[i]['true_cost'], 109 | main_loop.log[i + 1]['train1_cost']) 110 | assert_allclose( 111 | main_loop.log[n_batches]['train2_cost'], 112 | sum([main_loop.log[i]['true_cost'] 113 | for i in range(n_batches)]) / n_batches) 114 | assert_allclose( 115 | main_loop.log[n_batches]['train2_W_sum'], 116 | sum([main_loop.log[i]['train1_W_sum'] 117 | for i in range(1, n_batches + 1)]) / n_batches) 118 | 119 | # Check monitoring of non-Theano quantites 120 | for i in range(n_batches): 121 | assert_allclose(main_loop.log[i + 1]['train1_ftt1'], 122 | features[i] * targets[i]) 123 | assert_allclose(main_loop.log[n_batches]['train2_ftt2'], 124 | (features * targets[:, None]).mean(axis=0)) 125 | 126 | 127 | def test_training_data_monitoring_updates_algorithm(): 128 | features = [numpy.array(f, dtype=theano.config.floatX) 129 | for f in [[1, 2], [3, 5], [5, 8]]] 130 | targets = numpy.array([f.sum() for f in features]) 131 | dataset = IterableDataset(dict(features=features, targets=targets)) 132 | 133 | x = tensor.vector('features') 134 | y = tensor.scalar('targets') 135 | m = x.mean().copy(name='features_mean') 136 | t = y.sum().copy(name='targets_sum') 137 | 138 | main_loop = MainLoop( 139 | model=None, data_stream=dataset.get_example_stream(), 140 | algorithm=UpdatesAlgorithm(), 141 | extensions=[TrainingDataMonitoring([m, t], prefix="train1", 142 | after_batch=True)], 143 | ) 144 | main_loop.extensions[0].main_loop = main_loop 145 | assert len(main_loop.algorithm.updates) == 0 146 | main_loop.extensions[0].do('before_training') 147 | assert len(main_loop.algorithm.updates) > 0 148 | -------------------------------------------------------------------------------- /tests/extensions/test_progressbar.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from fuel.datasets import IterableDataset 4 | from fuel.schemes import (ConstantScheme, 5 | SequentialExampleScheme, 6 | SequentialScheme) 7 | from fuel.streams import DataStream 8 | from theano import tensor 9 | 10 | from blocks.algorithms import GradientDescent, Scale 11 | from blocks.extensions import FinishAfter, ProgressBar, Printing 12 | from blocks.main_loop import MainLoop 13 | from blocks.utils import shared_floatx 14 | 15 | 16 | def setup_mainloop(extension, iteration_scheme=None): 17 | """Set up a simple main loop for progress bar tests. 18 | 19 | Create a MainLoop, register the given extension, supply it with a 20 | DataStream and a minimal model/cost to optimize. 21 | 22 | """ 23 | # Since progressbar2 3.6.0, the `maxval` kwarg has been replaced by 24 | # `max_value`, which has a default value of 100. If we're still using 25 | # `maxval` by accident, this test should fail complaining that 26 | # the progress bar has received a value out of range. 27 | features = [numpy.array(f, dtype=theano.config.floatX) 28 | for f in [[1, 2]] * 101] 29 | dataset = IterableDataset(dict(features=features)) 30 | data_stream = DataStream(dataset, iteration_scheme=iteration_scheme) 31 | 32 | W = shared_floatx([0, 0], name='W') 33 | x = tensor.vector('features') 34 | cost = tensor.sum((x-W)**2) 35 | cost.name = "cost" 36 | 37 | algorithm = GradientDescent(cost=cost, parameters=[W], 38 | step_rule=Scale(1e-3)) 39 | 40 | main_loop = MainLoop( 41 | model=None, 42 | data_stream=data_stream, 43 | algorithm=algorithm, 44 | extensions=[ 45 | FinishAfter(after_n_epochs=1), 46 | extension]) 47 | 48 | return main_loop 49 | 50 | 51 | def test_progressbar(): 52 | main_loop = setup_mainloop(ProgressBar()) 53 | 54 | # We are happy if it does not crash or raise any exceptions 55 | main_loop.run() 56 | 57 | 58 | def test_progressbar_iter_per_epoch_indices(): 59 | iter_per_epoch = 100 60 | progress_bar = ProgressBar() 61 | main_loop = setup_mainloop( 62 | None, iteration_scheme=SequentialExampleScheme(iter_per_epoch)) 63 | progress_bar.main_loop = main_loop 64 | 65 | assert progress_bar.get_iter_per_epoch() == iter_per_epoch 66 | 67 | 68 | def test_progressbar_iter_per_epoch_batch_indices(): 69 | num_examples = 1000 70 | batch_size = 10 71 | progress_bar = ProgressBar() 72 | main_loop = setup_mainloop( 73 | None, iteration_scheme=SequentialScheme(num_examples, batch_size)) 74 | progress_bar.main_loop = main_loop 75 | 76 | assert progress_bar.get_iter_per_epoch() == num_examples // batch_size 77 | 78 | 79 | def test_progressbar_iter_per_epoch_batch_examples(): 80 | num_examples = 1000 81 | batch_size = 10 82 | progress_bar = ProgressBar() 83 | main_loop = setup_mainloop( 84 | None, iteration_scheme=ConstantScheme(batch_size, num_examples)) 85 | progress_bar.main_loop = main_loop 86 | 87 | assert progress_bar.get_iter_per_epoch() == num_examples // batch_size 88 | 89 | 90 | def test_printing(): 91 | main_loop = setup_mainloop(Printing()) 92 | 93 | # We are happy if it does not crash or raise any exceptions 94 | main_loop.run() 95 | -------------------------------------------------------------------------------- /tests/extensions/test_saveload.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | import tarfile 4 | import tempfile 5 | import theano 6 | import unittest 7 | from fuel.datasets import IterableDataset 8 | from numpy.testing import assert_allclose 9 | from theano import tensor 10 | 11 | from blocks.algorithms import GradientDescent 12 | from blocks.bricks import MLP 13 | from blocks.extensions import FinishAfter 14 | from blocks.extensions.saveload import Checkpoint, Load 15 | from blocks.initialization import Constant 16 | from blocks.main_loop import MainLoop 17 | from blocks.model import Model 18 | from blocks.utils.testing import skip_if_configuration_set 19 | 20 | 21 | class TestCheckpoint(unittest.TestCase): 22 | 23 | def setUp(self): 24 | """Create main loop and run it.""" 25 | mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), 26 | use_bias=False) 27 | mlp.initialize() 28 | self.W = mlp.linear_transformations[0].W 29 | x = tensor.vector('data') 30 | cost = mlp.apply(x).mean() 31 | data = numpy.random.rand(10, 10).astype(theano.config.floatX) 32 | self.data_stream = IterableDataset(data).get_example_stream() 33 | self.model = Model(cost) 34 | self.algorithm = GradientDescent(cost=cost, parameters=[self.W]) 35 | self.main_loop = MainLoop( 36 | model=self.model, 37 | data_stream=self.data_stream, 38 | algorithm=self.algorithm, 39 | extensions=[FinishAfter(after_n_batches=5), 40 | Checkpoint('myweirdmodel.tar', 41 | save_separately=['log'])] 42 | ) 43 | self.main_loop.run() 44 | 45 | def test_save_and_load(self): 46 | """Check that main loop have been saved properly.""" 47 | old_value = self.W.get_value() 48 | self.W.set_value(old_value * 2) 49 | new_main_loop = MainLoop( 50 | model=self.model, 51 | data_stream=self.data_stream, 52 | algorithm=self.algorithm, 53 | extensions=[Load('myweirdmodel.tar')] 54 | ) 55 | new_main_loop.extensions[0].main_loop = new_main_loop 56 | new_main_loop._run_extensions('before_training') 57 | assert_allclose(self.W.get_value(), old_value) 58 | 59 | def test_load_log_and_iteration_state(self): 60 | """Check we can save the log and iteration state separately.""" 61 | skip_if_configuration_set('log_backend', 'sqlite', 62 | 'Bug with log.status["resumed_from"]') 63 | new_main_loop = MainLoop( 64 | model=self.model, 65 | data_stream=self.data_stream, 66 | algorithm=self.algorithm, 67 | extensions=[Load('myweirdmodel.tar', True, True)] 68 | ) 69 | new_main_loop.extensions[0].main_loop = new_main_loop 70 | new_main_loop._run_extensions('before_training') 71 | # Check the log 72 | new_keys = sorted(new_main_loop.log.status.keys()) 73 | old_keys = sorted(self.main_loop.log.status.keys()) 74 | for new_key, old_key in zip(new_keys, old_keys): 75 | assert new_key == old_key 76 | assert (new_main_loop.log.status[new_key] == 77 | self.main_loop.log.status[old_key]) 78 | # Check the iteration state 79 | new = next(new_main_loop.iteration_state[1])['data'] 80 | old = next(self.main_loop.iteration_state[1])['data'] 81 | assert_allclose(new, old) 82 | 83 | def test_load_nonexisting(self): 84 | """Check behaviour when loading nonexisting main loop.""" 85 | load = Load('mynonexisting.tar') 86 | load.main_loop = self.main_loop 87 | load.do() 88 | 89 | def test_loading_exception(self): 90 | """Check loading exception.""" 91 | with tempfile.NamedTemporaryFile(delete=False) as f: 92 | f.write('a'.encode('utf-8')) 93 | load = Load(f.name) 94 | load.main_loop = self.main_loop 95 | self.assertRaises(tarfile.ReadError, load.do) 96 | 97 | def test_checkpoint_exception(self): 98 | """Check checkpoint exception.""" 99 | checkpoint = Checkpoint(None, save_separately=['foo']) 100 | checkpoint.main_loop = self.main_loop 101 | self.assertRaises(AttributeError, checkpoint.do, None) 102 | 103 | def tearDown(self): 104 | """Cleaning.""" 105 | if os.path.exists('myweirdmodel.tar'): 106 | os.remove('myweirdmodel.tar') 107 | -------------------------------------------------------------------------------- /tests/extensions/test_timing.py: -------------------------------------------------------------------------------- 1 | from numpy.testing import assert_allclose 2 | 3 | from blocks.extensions import Timing, FinishAfter 4 | from blocks.utils.testing import MockMainLoop 5 | 6 | 7 | def test_timing(): 8 | epochs = 2 9 | main_loop = MockMainLoop(delay_time=0.1, 10 | extensions=[Timing(prefix='each'), 11 | Timing(prefix='each_second', 12 | every_n_epochs=2), 13 | FinishAfter(after_n_epochs=epochs)]) 14 | main_loop.run() 15 | iterations = int(main_loop.log.status['iterations_done'] / epochs) 16 | assert_allclose( 17 | (main_loop.log[iterations]['each_time_train_this_epoch'] + 18 | main_loop.log[iterations]['each_time_train_this_epoch']) / 2, 19 | main_loop.log.current_row['each_second_time_train_this_epoch'], 20 | atol=1e-2) 21 | assert 'each_time_read_data_this_epoch' in main_loop.log[iterations] 22 | assert 'each_second_time_read_data_this_epoch' in main_loop.log[iterations] 23 | -------------------------------------------------------------------------------- /tests/extensions/test_training.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile 2 | 3 | import numpy 4 | from numpy.testing import assert_allclose 5 | 6 | import theano 7 | from fuel.datasets import IterableDataset 8 | from theano import tensor 9 | 10 | from blocks.algorithms import GradientDescent, Scale 11 | from blocks.config import config 12 | from blocks.extensions import FinishAfter, TrainingExtension 13 | from blocks.extensions.saveload import Checkpoint 14 | from blocks.extensions.training import SharedVariableModifier, TrackTheBest 15 | from blocks.extensions.predicates import OnLogRecord 16 | from blocks.main_loop import MainLoop 17 | from blocks.serialization import load 18 | from blocks.utils import shared_floatx 19 | from blocks.utils.testing import MockMainLoop, skip_if_configuration_set 20 | 21 | 22 | def test_shared_variable_modifier(): 23 | weights = numpy.array([-1, 1], dtype=theano.config.floatX) 24 | features = [numpy.array(f, dtype=theano.config.floatX) 25 | for f in [[1, 2], [3, 4], [5, 6]]] 26 | targets = [(weights * f).sum() for f in features] 27 | n_batches = 3 28 | dataset = IterableDataset(dict(features=features, targets=targets)) 29 | 30 | x = tensor.vector('features') 31 | y = tensor.scalar('targets') 32 | W = shared_floatx([0, 0], name='W') 33 | cost = ((x * W).sum() - y) ** 2 34 | cost.name = 'cost' 35 | 36 | step_rule = Scale(0.001) 37 | sgd = GradientDescent(cost=cost, parameters=[W], 38 | step_rule=step_rule) 39 | main_loop = MainLoop( 40 | model=None, data_stream=dataset.get_example_stream(), 41 | algorithm=sgd, 42 | extensions=[ 43 | FinishAfter(after_n_epochs=1), 44 | SharedVariableModifier( 45 | step_rule.learning_rate, 46 | lambda n: numpy.cast[theano.config.floatX](10. / n) 47 | )]) 48 | 49 | main_loop.run() 50 | 51 | assert_allclose(step_rule.learning_rate.get_value(), 52 | numpy.cast[theano.config.floatX](10. / n_batches)) 53 | 54 | 55 | def test_shared_variable_modifier_two_parameters(): 56 | weights = numpy.array([-1, 1], dtype=theano.config.floatX) 57 | features = [numpy.array(f, dtype=theano.config.floatX) 58 | for f in [[1, 2], [3, 4], [5, 6]]] 59 | targets = [(weights * f).sum() for f in features] 60 | n_batches = 3 61 | dataset = IterableDataset(dict(features=features, targets=targets)) 62 | 63 | x = tensor.vector('features') 64 | y = tensor.scalar('targets') 65 | W = shared_floatx([0, 0], name='W') 66 | cost = ((x * W).sum() - y) ** 2 67 | cost.name = 'cost' 68 | 69 | step_rule = Scale(0.001) 70 | sgd = GradientDescent(cost=cost, parameters=[W], 71 | step_rule=step_rule) 72 | modifier = SharedVariableModifier( 73 | step_rule.learning_rate, 74 | lambda _, val: numpy.cast[theano.config.floatX](val * 0.2)) 75 | main_loop = MainLoop( 76 | model=None, data_stream=dataset.get_example_stream(), 77 | algorithm=sgd, 78 | extensions=[FinishAfter(after_n_epochs=1), modifier]) 79 | 80 | main_loop.run() 81 | 82 | new_value = step_rule.learning_rate.get_value() 83 | assert_allclose(new_value, 84 | 0.001 * 0.2 ** n_batches, 85 | atol=1e-5) 86 | 87 | 88 | def test_track_the_best(): 89 | main_loop = MockMainLoop() 90 | extension = TrackTheBest("cost") 91 | extension.main_loop = main_loop 92 | 93 | main_loop.status['epochs_done'] += 1 94 | main_loop.status['iterations_done'] += 10 95 | main_loop.log.current_row['cost'] = 5 96 | extension.dispatch('after_epoch') 97 | assert main_loop.status['best_cost'] == 5 98 | assert main_loop.log.current_row['cost_best_so_far'] 99 | 100 | main_loop.status['epochs_done'] += 1 101 | main_loop.status['iterations_done'] += 10 102 | main_loop.log.current_row['cost'] = 6 103 | extension.dispatch('after_epoch') 104 | assert main_loop.status['best_cost'] == 5 105 | assert main_loop.log.current_row.get('cost_best_so_far', None) is None 106 | 107 | main_loop.status['epochs_done'] += 1 108 | main_loop.status['iterations_done'] += 10 109 | main_loop.log.current_row['cost'] = 5 110 | extension.dispatch('after_epoch') 111 | assert main_loop.status['best_cost'] == 5 112 | assert main_loop.log.current_row.get('cost_best_so_far', None) is None 113 | 114 | main_loop.status['epochs_done'] += 1 115 | main_loop.status['iterations_done'] += 10 116 | main_loop.log.current_row['cost'] = 4 117 | extension.dispatch('after_epoch') 118 | assert main_loop.status['best_cost'] == 4 119 | assert main_loop.log.current_row['cost_best_so_far'] 120 | 121 | 122 | class WriteCostExtension(TrainingExtension): 123 | 124 | def after_batch(self, batch): 125 | self.main_loop.log.current_row['cost'] = abs( 126 | self.main_loop.log.status['iterations_done'] - 5) + 3 127 | 128 | 129 | def test_save_the_best(): 130 | skip_if_configuration_set('log_backend', 'sqlite', 131 | "Known to be flaky with SQLite log backend.") 132 | with NamedTemporaryFile(dir=config.temp_dir) as dst,\ 133 | NamedTemporaryFile(dir=config.temp_dir) as dst_best: 134 | track_cost = TrackTheBest("cost", after_epoch=False, after_batch=True) 135 | main_loop = MockMainLoop( 136 | extensions=[FinishAfter(after_n_epochs=1), 137 | WriteCostExtension(), 138 | track_cost, 139 | Checkpoint(dst.name, after_batch=True, 140 | save_separately=['log']) 141 | .add_condition( 142 | ["after_batch"], 143 | OnLogRecord(track_cost.notification_name), 144 | (dst_best.name,))]) 145 | main_loop.run() 146 | 147 | assert main_loop.log[4]['saved_to'] == (dst.name, dst_best.name) 148 | assert main_loop.log[5]['saved_to'] == (dst.name, dst_best.name) 149 | assert main_loop.log[6]['saved_to'] == (dst.name,) 150 | with open(dst_best.name, 'rb') as src: 151 | assert load(src).log.status['iterations_done'] == 5 152 | -------------------------------------------------------------------------------- /tests/graph/test_bn.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy.testing import assert_allclose 3 | import theano 4 | from theano import tensor 5 | 6 | from blocks.bricks import (BatchNormalization, Sequence, Tanh, MLP, 7 | BatchNormalizedMLP) 8 | from blocks.filter import get_brick 9 | from blocks.graph import (ComputationGraph, batch_normalization, 10 | apply_batch_normalization, 11 | get_batch_normalization_updates) 12 | from blocks.initialization import Constant 13 | from blocks.roles import (has_roles, BATCH_NORM_POPULATION_MEAN, 14 | BATCH_NORM_POPULATION_STDEV) 15 | from blocks.utils import is_shared_variable 16 | 17 | 18 | def test_batch_normalization_simple(): 19 | x = tensor.matrix() 20 | eps = 1e-4 21 | bn = BatchNormalization(input_dim=4, epsilon=eps) 22 | bn.initialize() 23 | with batch_normalization(bn): 24 | y = bn.apply(x) 25 | rng = numpy.random.RandomState((2016, 1, 18)) 26 | x_ = rng.uniform(size=(5, 4)).astype(theano.config.floatX) 27 | y_ = y.eval({x: x_}) 28 | y_expected = (x_ - x_.mean(axis=0)) / numpy.sqrt(x_.var(axis=0) + eps) 29 | assert_allclose(y_, y_expected, rtol=1e-4) 30 | 31 | 32 | def test_batch_normalization_nested(): 33 | x = tensor.tensor4() 34 | eps = 1e-4 35 | r_dims = (0, 2, 3) 36 | batch_dims = (5, 4, 3, 2) 37 | bn = BatchNormalization(input_dim=batch_dims[1:], 38 | broadcastable=(False, True, True), 39 | epsilon=eps) 40 | seq = Sequence([bn.apply, Tanh().apply]) 41 | seq.initialize() 42 | with batch_normalization(seq): 43 | y = seq.apply(x) 44 | rng = numpy.random.RandomState((2016, 1, 18)) 45 | x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX) 46 | y_ = y.eval({x: x_}) 47 | y_expected = numpy.tanh((x_ - x_.mean(axis=r_dims, keepdims=True)) / 48 | numpy.sqrt(x_.var(axis=r_dims, keepdims=True) + 49 | eps)) 50 | assert_allclose(y_, y_expected, rtol=1e-4) 51 | 52 | 53 | def test_apply_batch_normalization_nested(): 54 | x = tensor.matrix() 55 | eps = 1e-8 56 | batch_dims = (3, 9) 57 | bn = BatchNormalization(input_dim=5, epsilon=eps) 58 | mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5], 59 | weights_init=Constant(0.4), biases_init=Constant(1)) 60 | mlp.initialize() 61 | y = mlp.apply(x) 62 | cg = apply_batch_normalization(ComputationGraph([y])) 63 | y_bn = cg.outputs[0] 64 | rng = numpy.random.RandomState((2016, 1, 18)) 65 | x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX) 66 | y_ = y_bn.eval({x: x_}) 67 | W_, b_ = map(lambda s: (getattr(mlp.linear_transformations[0], s) 68 | .get_value(borrow=True)), ['W', 'b']) 69 | z_ = numpy.dot(x_, W_) + b_ 70 | y_expected = numpy.tanh((z_ - z_.mean(axis=0)) / 71 | numpy.sqrt(z_.var(axis=0) + eps)) 72 | assert_allclose(y_, y_expected, rtol=1e-3) 73 | 74 | 75 | class TestSimpleGetBatchNormalizationUpdates(object): 76 | def setUp(self): 77 | self.mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) 78 | self.x = tensor.matrix() 79 | 80 | def simple_assertions(self, updates, num_bricks=2, num_updates=4, 81 | mean_only=False): 82 | """Shared assertions for simple tests.""" 83 | assert len(updates) == num_updates 84 | assert all(is_shared_variable(u[0]) for u in updates) 85 | # This order is somewhat arbitrary and implementation_dependent 86 | means = set(u[0] for u in updates 87 | if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN])) 88 | stdevs = set(u[0] for u in updates 89 | if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV])) 90 | assert means.isdisjoint(stdevs) 91 | assert len(set(get_brick(v) for v in means)) == num_bricks 92 | if not mean_only: 93 | assert len(set(get_brick(v) for v in stdevs)) == num_bricks 94 | else: 95 | assert len(stdevs) == 0 96 | 97 | def test_get_batch_normalization_updates(self): 98 | """Test that get_batch_normalization_updates works as expected.""" 99 | with batch_normalization(self.mlp): 100 | y_bn = self.mlp.apply(self.x) 101 | graph = ComputationGraph([y_bn]) 102 | updates = get_batch_normalization_updates(graph) 103 | self.simple_assertions(updates) 104 | 105 | def test_get_batch_normalization_updates_mean_only(self): 106 | """Test get_batch_normalization_updates with mean_only bricks.""" 107 | mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], mean_only=True) 108 | with batch_normalization(mlp): 109 | y_bn = mlp.apply(self.x) 110 | graph = ComputationGraph([y_bn]) 111 | updates = get_batch_normalization_updates(graph) 112 | self.simple_assertions(updates, num_updates=2, mean_only=True) 113 | 114 | def test_get_batch_normalization_updates_non_training_applications(self): 115 | """Test updates extracton in graph with non-training apply.""" 116 | y = self.mlp.apply(self.x) 117 | with batch_normalization(self.mlp): 118 | y_bn = self.mlp.apply(self.x) 119 | graph = ComputationGraph([y_bn, y]) 120 | updates = get_batch_normalization_updates(graph) 121 | self.simple_assertions(updates) 122 | 123 | def test_get_batch_normalization_updates_no_training(self): 124 | """Test for exception if there are no training-mode nodes.""" 125 | y = self.mlp.apply(self.x) 126 | graph = ComputationGraph([y]) 127 | numpy.testing.assert_raises(ValueError, 128 | get_batch_normalization_updates, graph) 129 | 130 | def test_get_batch_normalization_updates_duplicates_error(self): 131 | """Test that we get an error by default on multiple apply.""" 132 | with batch_normalization(self.mlp): 133 | y = self.mlp.apply(self.x) 134 | y2 = self.mlp.apply(self.x) 135 | graph = ComputationGraph([y, y2]) 136 | numpy.testing.assert_raises(ValueError, 137 | get_batch_normalization_updates, graph) 138 | 139 | def test_get_batch_normalization_updates_allow_duplicates(self): 140 | """Test get_batch_normalization_updates(allow_duplicates=True).""" 141 | with batch_normalization(self.mlp): 142 | y = self.mlp.apply(self.x) 143 | y2 = self.mlp.apply(self.x) 144 | graph = ComputationGraph([y, y2]) 145 | updates = get_batch_normalization_updates(graph, allow_duplicates=True) 146 | self.simple_assertions(updates, num_bricks=2, num_updates=8) 147 | -------------------------------------------------------------------------------- /tests/monitoring/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/tests/monitoring/__init__.py -------------------------------------------------------------------------------- /tests/monitoring/test_evaluators.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from fuel.datasets import IterableDataset 4 | from numpy.testing import assert_raises 5 | 6 | from blocks.graph import ComputationGraph 7 | from blocks.monitoring.evaluators import DatasetEvaluator 8 | from tests.monitoring.test_aggregation import TestBrick 9 | 10 | 11 | def test_dataset_evaluators(): 12 | X = theano.tensor.matrix('X') 13 | brick = TestBrick(name='test_brick') 14 | Y = brick.apply(X) 15 | graph = ComputationGraph([Y]) 16 | monitor_variables = [v for v in graph.auxiliary_variables] 17 | validator = DatasetEvaluator(monitor_variables) 18 | 19 | data = [numpy.arange(1, 5, dtype=theano.config.floatX).reshape(2, 2), 20 | numpy.arange(10, 16, dtype=theano.config.floatX).reshape(3, 2)] 21 | data_stream = IterableDataset(dict(X=data)).get_example_stream() 22 | 23 | values = validator.evaluate(data_stream) 24 | assert values['test_brick_apply_V_squared'] == 4 25 | numpy.testing.assert_allclose( 26 | values['test_brick_apply_mean_row_mean'], numpy.vstack(data).mean()) 27 | per_batch_mean = numpy.mean([batch.mean() for batch in data]) 28 | numpy.testing.assert_allclose( 29 | values['test_brick_apply_mean_batch_element'], per_batch_mean) 30 | 31 | with assert_raises(Exception) as ar: 32 | data_stream = IterableDataset(dict(X2=data)).get_example_stream() 33 | validator.evaluate(data_stream) 34 | assert "Not all data sources" in ar.exception.args[0] 35 | -------------------------------------------------------------------------------- /tests/monitoring/test_monitored_quantity.py: -------------------------------------------------------------------------------- 1 | from numpy.testing import assert_raises_regex 2 | import numpy 3 | import theano 4 | from fuel.datasets import IterableDataset 5 | 6 | from blocks.monitoring.evaluators import DatasetEvaluator 7 | from blocks.monitoring.aggregation import MonitoredQuantity 8 | from blocks.bricks.cost import CategoricalCrossEntropy 9 | 10 | 11 | class CrossEntropy(MonitoredQuantity): 12 | def __init__(self, **kwargs): 13 | super(CrossEntropy, self).__init__(**kwargs) 14 | 15 | def initialize(self): 16 | self.total_cross_entropy, self.examples_seen = 0.0, 0 17 | 18 | def aggregate(self, target, predicted): 19 | import numpy 20 | self.total_cross_entropy += -(target * numpy.log(predicted)).sum() 21 | self.examples_seen += 1 22 | 23 | def get_aggregated_value(self): 24 | res = self.total_cross_entropy / self.examples_seen 25 | return res 26 | 27 | 28 | def test_dataset_evaluators(): 29 | X = theano.tensor.vector('X') 30 | Y = theano.tensor.vector('Y') 31 | 32 | data = [numpy.arange(1, 7, dtype=theano.config.floatX).reshape(3, 2), 33 | numpy.arange(11, 17, dtype=theano.config.floatX).reshape(3, 2)] 34 | data_stream = IterableDataset(dict(X=data[0], 35 | Y=data[1])).get_example_stream() 36 | 37 | validator = DatasetEvaluator([ 38 | CrossEntropy(requires=[X, Y], 39 | name="monitored_cross_entropy0"), 40 | # to test two same quantities and make sure that state will be reset 41 | CrossEntropy(requires=[X, Y], 42 | name="monitored_cross_entropy1"), 43 | CategoricalCrossEntropy().apply(X, Y), ]) 44 | values = validator.evaluate(data_stream) 45 | numpy.testing.assert_allclose( 46 | values['monitored_cross_entropy1'], 47 | values['categoricalcrossentropy_apply_cost']) 48 | 49 | 50 | def test_dataset_evaluator_name_none(): 51 | assert_raises_regex(ValueError, 'must have names', 52 | DatasetEvaluator, [theano.tensor.scalar()]) 53 | 54 | 55 | def test_dataset_evaluator_name_uniqueness(): 56 | assert_raises_regex(ValueError, 'unique', 57 | DatasetEvaluator, [theano.tensor.scalar('A'), 58 | theano.tensor.scalar('A')]) 59 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | from numpy.testing import assert_raises 5 | 6 | from blocks.config import Configuration, ConfigurationError 7 | 8 | 9 | def load_config(contents): 10 | with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: 11 | f.write(contents) 12 | filename = f.name 13 | os.environ['BLOCKS_CONFIG'] = filename 14 | if 'BLOCKS_DATA_PATH' in os.environ: 15 | del os.environ['BLOCKS_DATA_PATH'] 16 | config = Configuration() 17 | config.add_config('data_path', str, env_var='BLOCKS_DATA_PATH') 18 | config.add_config('config_with_default', int, default='1', 19 | env_var='BLOCKS_CONFIG_TEST') 20 | config.add_config('config_without_default', str) 21 | config.load_yaml() 22 | return config 23 | 24 | 25 | class TestConfig(object): 26 | def setUp(self): 27 | self._environ = dict(os.environ) 28 | 29 | def tearDown(self): 30 | os.environ.clear() 31 | os.environ.update(self._environ) 32 | 33 | def test_config(self): 34 | config = load_config('data_path: yaml_path') 35 | assert config.data_path == 'yaml_path' 36 | os.environ['BLOCKS_DATA_PATH'] = 'env_path' 37 | assert config.data_path == 'env_path' 38 | assert config.config_with_default == 1 39 | os.environ['BLOCKS_CONFIG_TEST'] = '2' 40 | assert config.config_with_default == 2 41 | assert_raises(AttributeError, getattr, config, 42 | 'non_existing_config') 43 | assert_raises(ConfigurationError, getattr, config, 44 | 'config_without_default') 45 | config.data_path = 'manual_path' 46 | assert config.data_path == 'manual_path' 47 | config.new_config = 'new_config' 48 | assert config.new_config == 'new_config' 49 | 50 | def test_empty_config(self): 51 | load_config('') 52 | -------------------------------------------------------------------------------- /tests/test_initialization.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | 3 | import numpy 4 | import theano 5 | from numpy.testing import assert_equal, assert_allclose, assert_raises 6 | 7 | from blocks.initialization import Constant, IsotropicGaussian, Sparse, SparseND 8 | from blocks.initialization import Uniform, Orthogonal, Identity 9 | from blocks.utils import pack 10 | 11 | 12 | def test_constant(): 13 | def check_constant(const, shape, ground_truth): 14 | # rng unused, so pass None. 15 | init = Constant(const).generate(None, ground_truth.shape) 16 | assert ground_truth.dtype == theano.config.floatX 17 | assert ground_truth.shape == init.shape 18 | assert_equal(ground_truth, init) 19 | 20 | # Test scalar init. 21 | yield (check_constant, 5, (5, 5), 22 | 5 * numpy.ones((5, 5), dtype=theano.config.floatX)) 23 | # Test broadcasting. 24 | yield (check_constant, [1, 2, 3], (7, 3), 25 | numpy.array([[1, 2, 3]] * 7, dtype=theano.config.floatX)) 26 | yield (check_constant, numpy.array([[1], [2], [3]]), (3, 2), 27 | numpy.array([[1, 1], [2, 2], [3, 3]], dtype=theano.config.floatX)) 28 | 29 | assert str(Constant(1.0)).endswith(' constant=1.0>') 30 | 31 | 32 | def test_identity(): 33 | assert str(Identity(2.0)).endswith(' mult=2.0>') 34 | 35 | 36 | def test_gaussian(): 37 | rng = numpy.random.RandomState(1) 38 | 39 | def check_gaussian(rng, mean, std, shape): 40 | weights = IsotropicGaussian(std, mean).generate(rng, shape) 41 | assert weights.shape == shape 42 | assert weights.dtype == theano.config.floatX 43 | assert_allclose(weights.mean(), mean, atol=1e-2) 44 | assert_allclose(weights.std(), std, atol=1e-2) 45 | yield check_gaussian, rng, 0, 1, (500, 600) 46 | yield check_gaussian, rng, 5, 3, (600, 500) 47 | 48 | assert str(IsotropicGaussian(1.0, 2.0)).endswith(' mean=2.0, std=1.0>') 49 | 50 | 51 | def test_uniform(): 52 | rng = numpy.random.RandomState(1) 53 | 54 | def check_uniform(rng, mean, width, std, shape): 55 | weights = Uniform(mean=mean, width=width, 56 | std=std).generate(rng, shape) 57 | assert weights.shape == shape 58 | assert weights.dtype == theano.config.floatX 59 | assert_allclose(weights.mean(), mean, atol=1e-2) 60 | if width is not None: 61 | std_ = width / numpy.sqrt(12) 62 | else: 63 | std_ = std 64 | assert_allclose(std_, weights.std(), atol=1e-2) 65 | yield check_uniform, rng, 0, 0.05, None, (500, 600) 66 | yield check_uniform, rng, 0, None, 0.001, (600, 500) 67 | yield check_uniform, rng, 5, None, 0.004, (700, 300) 68 | 69 | assert_raises(ValueError, Uniform, 0, 1, 1) 70 | 71 | assert str(Uniform(1.0, 2.0)).endswith(' mean=1.0, width=2.0>') 72 | 73 | 74 | def test_sparse(): 75 | rng = numpy.random.RandomState(1) 76 | 77 | def check_sparse(rng, num_init, weights_init, sparse_init, shape, total): 78 | weights = Sparse(num_init=num_init, weights_init=weights_init, 79 | sparse_init=sparse_init).generate(rng, shape) 80 | assert weights.shape == shape 81 | assert weights.dtype == theano.config.floatX 82 | if sparse_init is None: 83 | if isinstance(num_init, numbers.Integral): 84 | assert (numpy.count_nonzero(weights) <= 85 | weights.size - num_init * weights.shape[0]) 86 | else: 87 | assert (numpy.count_nonzero(weights) <= 88 | weights.size - num_init * weights.shape[1]) 89 | if total is not None: 90 | assert numpy.sum(weights) == total 91 | 92 | yield check_sparse, rng, 5, Constant(1.), None, (10, 10), None 93 | yield check_sparse, rng, 0.5, Constant(1.), None, (10, 10), None 94 | yield check_sparse, rng, 0.5, Constant(1.), Constant(1.), (10, 10), None 95 | yield check_sparse, rng, 3, Constant(1.), None, (10, 10), 30 96 | yield check_sparse, rng, 3, Constant(0.), Constant(1.), (10, 10), 70 97 | yield check_sparse, rng, 0.3, Constant(1.), None, (10, 10), 30 98 | yield check_sparse, rng, 0.3, Constant(0.), Constant(1.), (10, 10), 70 99 | 100 | 101 | def test_sparse_nd(): 102 | rng = numpy.random.RandomState(1) 103 | 104 | def check_sparse(rng, axis, num_init, shape, weights_init=Constant(1.)): 105 | weights = SparseND(axis=axis, num_init=num_init, 106 | weights_init=weights_init).generate(rng, shape) 107 | assert weights.shape == shape 108 | assert weights.dtype == theano.config.floatX 109 | if isinstance(num_init, numbers.Integral): 110 | nnz = numpy.prod([s for i, s in enumerate(shape) 111 | if i in pack(axis)]) * num_init 112 | assert numpy.count_nonzero(weights) == nnz 113 | else: 114 | atom_size = numpy.prod([s for i, s in enumerate(shape) 115 | if i not in pack(axis)]) 116 | nnz_atom = int(num_init * atom_size) 117 | num_atoms = numpy.prod([s for i, s in enumerate(shape) 118 | if i in pack(axis)]) 119 | nnz = nnz_atom * num_atoms 120 | assert numpy.count_nonzero(weights) == nnz 121 | 122 | yield check_sparse, rng, 1, 5, (10, 11) 123 | yield check_sparse, rng, 2, 3, (7, 8, 9) 124 | yield check_sparse, rng, (2, 3), 5. / 6., (2, 3, 5, 7) 125 | yield check_sparse, rng, (0, 1), 3, (3, 5, 7, 11) 126 | yield check_sparse, rng, (0, 2, 3), 0.5, (2, 3, 2, 6) 127 | 128 | 129 | def test_orthogonal(): 130 | rng = numpy.random.RandomState(1) 131 | 132 | def check_orthogonal(rng, shape, scale=1.0): 133 | W = Orthogonal(scale).generate(rng, shape) 134 | 135 | assert W.shape == shape 136 | 137 | # For square matrices the following to should 138 | # be diagonal. For non-square matrices, we relax 139 | # a bit. 140 | WWT = numpy.dot(W, W.T) 141 | WTW = numpy.dot(W.T, W) 142 | 143 | atol = 0.2 144 | 145 | # Sanity check, just to be save 146 | assert WWT.shape == (shape[0], shape[0]) 147 | assert WTW.shape == (shape[1], shape[1]) 148 | 149 | # Diagonals ~= 1. ? 150 | assert_allclose(numpy.diag(WWT), scale ** 2, atol=atol) 151 | assert_allclose(numpy.diag(WTW), scale ** 2, atol=atol) 152 | 153 | # Non-diagonal ~= 0. ? 154 | WWT_residum = WWT - numpy.eye(shape[0]) * scale ** 2 155 | WTW_residum = WTW - numpy.eye(shape[1]) * scale ** 2 156 | 157 | assert_allclose(WWT_residum, 0., atol=atol) 158 | assert_allclose(WTW_residum, 0., atol=atol) 159 | 160 | yield check_orthogonal, rng, (50, 50) 161 | yield check_orthogonal, rng, (50, 51) 162 | yield check_orthogonal, rng, (51, 50) 163 | yield check_orthogonal, rng, (50, 50), .5 164 | yield check_orthogonal, rng, (50, 51), .5 165 | yield check_orthogonal, rng, (51, 50), .5 166 | 167 | assert str(Orthogonal(3.0)).endswith(' scale=3.0>') 168 | -------------------------------------------------------------------------------- /tests/test_log.py: -------------------------------------------------------------------------------- 1 | import os 2 | from operator import getitem 3 | 4 | from numpy.testing import assert_raises 5 | 6 | from blocks.log import TrainingLog 7 | from blocks.serialization import load, dump 8 | 9 | 10 | def test_training_log(): 11 | log = TrainingLog() 12 | 13 | # test basic writing capabilities 14 | log[0]['field'] = 45 15 | assert log[0]['field'] == 45 16 | assert log[1] == {} 17 | assert log.current_row['field'] == 45 18 | log.status['iterations_done'] += 1 19 | assert log.status['iterations_done'] == 1 20 | assert log.previous_row['field'] == 45 21 | 22 | assert_raises(ValueError, getitem, log, -1) 23 | 24 | # test iteration 25 | assert len(list(log)) == 2 26 | 27 | 28 | def test_pickle_log(): 29 | log1 = TrainingLog() 30 | with open('log1.tar', 'wb') as f: 31 | dump(log1, f) 32 | with open('log1.tar', 'rb') as f: 33 | log2 = load(f) 34 | with open('log2.tar', 'wb') as f: 35 | dump(log2, f) 36 | with open('log2.tar', 'rb') as f: 37 | load(f) # loading an unresumed log works 38 | log2.resume() 39 | with open('log3.tar', 'wb') as f: 40 | dump(log2, f) 41 | with open('log3.tar', 'rb') as f: 42 | load(f) # loading a resumed log does not work 43 | os.remove('log1.tar') 44 | os.remove('log2.tar') 45 | os.remove('log3.tar') 46 | -------------------------------------------------------------------------------- /tests/test_main_loop.py: -------------------------------------------------------------------------------- 1 | import os 2 | import signal 3 | import time 4 | from itertools import count 5 | from multiprocessing import Process 6 | 7 | from fuel.datasets import IterableDataset 8 | from mock import MagicMock, ANY 9 | from numpy.testing import assert_raises 10 | from six.moves import cPickle 11 | 12 | from blocks.main_loop import MainLoop 13 | from blocks.extensions import TrainingExtension, FinishAfter, Printing 14 | from blocks.utils import unpack 15 | from blocks.config import config 16 | from blocks.utils.testing import MockAlgorithm, MockMainLoop 17 | 18 | 19 | class WriteBatchExtension(TrainingExtension): 20 | """Writes data saved by MockAlgorithm to the log.""" 21 | def after_batch(self, _): 22 | self.main_loop.log.current_row['batch'] = \ 23 | self.main_loop.algorithm.batch 24 | 25 | 26 | def test_main_loop(): 27 | old_config_profile_value = config.profile 28 | config.profile = True 29 | 30 | main_loop = MainLoop( 31 | MockAlgorithm(), IterableDataset(range(10)).get_example_stream(), 32 | extensions=[WriteBatchExtension(), FinishAfter(after_n_epochs=2)]) 33 | # regression test to check that accessing 'iteration_state' 34 | # before training does not lead to a crash 35 | main_loop.iteration_state 36 | main_loop.run() 37 | assert_raises(AttributeError, getattr, main_loop, 'model') 38 | 39 | assert main_loop.log.status['iterations_done'] == 20 40 | assert main_loop.log.status['_epoch_ends'] == [10, 20] 41 | assert len(main_loop.log) == 20 42 | for i in range(20): 43 | assert main_loop.log[i + 1]['batch'] == {'data': i % 10} 44 | 45 | config.profile = old_config_profile_value 46 | 47 | 48 | def test_training_resumption(): 49 | def do_test(with_serialization): 50 | data_stream = IterableDataset(range(10)).get_example_stream() 51 | main_loop = MainLoop( 52 | MockAlgorithm(), data_stream, 53 | extensions=[WriteBatchExtension(), 54 | FinishAfter(after_n_batches=14)]) 55 | main_loop.run() 56 | assert main_loop.log.status['iterations_done'] == 14 57 | 58 | if with_serialization: 59 | main_loop = cPickle.loads(cPickle.dumps(main_loop)) 60 | 61 | finish_after = unpack( 62 | [ext for ext in main_loop.extensions 63 | if isinstance(ext, FinishAfter)], singleton=True) 64 | finish_after.add_condition( 65 | ["after_batch"], 66 | predicate=lambda log: log.status['iterations_done'] == 27) 67 | main_loop.run() 68 | assert main_loop.log.status['iterations_done'] == 27 69 | assert main_loop.log.status['epochs_done'] == 2 70 | for i in range(27): 71 | assert main_loop.log[i + 1]['batch'] == {"data": i % 10} 72 | 73 | do_test(False) 74 | do_test(True) 75 | 76 | 77 | def test_training_interrupt(): 78 | def process_batch(batch): 79 | time.sleep(0.1) 80 | 81 | algorithm = MockAlgorithm() 82 | algorithm.process_batch = process_batch 83 | 84 | main_loop = MockMainLoop( 85 | algorithm=algorithm, 86 | data_stream=IterableDataset(count()).get_example_stream(), 87 | extensions=[Printing()] 88 | ) 89 | 90 | p = Process(target=main_loop.run) 91 | p.start() 92 | time.sleep(0.1) 93 | os.kill(p.pid, signal.SIGINT) 94 | time.sleep(0.1) 95 | assert p.is_alive() 96 | os.kill(p.pid, signal.SIGINT) 97 | time.sleep(0.2) 98 | assert not p.is_alive() 99 | p.join() 100 | 101 | 102 | def test_error(): 103 | ext = TrainingExtension() 104 | ext.after_batch = MagicMock(side_effect=KeyError) 105 | ext.on_error = MagicMock() 106 | main_loop = MockMainLoop(extensions=[ext, FinishAfter(after_epoch=True)]) 107 | assert_raises(KeyError, main_loop.run) 108 | ext.on_error.assert_called_once_with(ANY) 109 | assert 'got_exception' in main_loop.log.current_row 110 | 111 | ext.on_error = MagicMock(side_effect=AttributeError) 112 | main_loop = MockMainLoop(extensions=[ext, FinishAfter(after_epoch=True)]) 113 | assert_raises(KeyError, main_loop.run) 114 | ext.on_error.assert_called_once_with(ANY) 115 | assert 'got_exception' in main_loop.log.current_row 116 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from theano import tensor 4 | from numpy.testing import assert_allclose, assert_raises 5 | 6 | from blocks.bricks import MLP, Tanh 7 | from blocks.model import Model 8 | from blocks.graph import add_role, PARAMETER 9 | from blocks.utils import shared_floatx 10 | 11 | 12 | def test_model(): 13 | x = tensor.matrix('x') 14 | mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1") 15 | mlp2 = MLP([Tanh()], [30, 40], name="mlp2") 16 | h1 = mlp1.apply(x) 17 | h2 = mlp2.apply(h1) 18 | 19 | model = Model(h2) 20 | assert model.get_top_bricks() == [mlp1, mlp2] 21 | # The order of parameters returned is deterministic but 22 | # not sensible. 23 | assert list(model.get_parameter_dict().items()) == [ 24 | ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b), 25 | ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b), 26 | ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b), 27 | ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W), 28 | ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W), 29 | ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)] 30 | 31 | # Test getting and setting parameter values 32 | mlp3 = MLP([Tanh()], [10, 10]) 33 | mlp3.allocate() 34 | model3 = Model(mlp3.apply(x)) 35 | parameter_values = { 36 | '/mlp/linear_0.W': 2 * numpy.ones((10, 10), 37 | dtype=theano.config.floatX), 38 | '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)} 39 | model3.set_parameter_values(parameter_values) 40 | assert numpy.all( 41 | mlp3.linear_transformations[0].parameters[0].get_value() == 2) 42 | assert numpy.all( 43 | mlp3.linear_transformations[0].parameters[1].get_value() == 3) 44 | got_parameter_values = model3.get_parameter_values() 45 | assert len(got_parameter_values) == len(parameter_values) 46 | for name, value in parameter_values.items(): 47 | assert_allclose(value, got_parameter_values[name]) 48 | 49 | # Test exception is raised if parameter shapes don't match 50 | def helper(): 51 | parameter_values = { 52 | '/mlp/linear_0.W': 2 * numpy.ones((11, 11), 53 | dtype=theano.config.floatX), 54 | '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)} 55 | model3.set_parameter_values(parameter_values) 56 | assert_raises(ValueError, helper) 57 | 58 | # Test name conflict handling 59 | mlp4 = MLP([Tanh()], [10, 10]) 60 | 61 | def helper(): 62 | Model(mlp4.apply(mlp3.apply(x))) 63 | assert_raises(ValueError, helper) 64 | 65 | 66 | def test_model_handles_brickless_parameteres(): 67 | x = tensor.matrix('x') 68 | v = shared_floatx(numpy.zeros((10, 10)), name='V') 69 | add_role(v, PARAMETER) 70 | y = x.dot(v) 71 | model = Model(y) 72 | assert list(model.get_parameter_dict().items()) == [('V', v)] 73 | -------------------------------------------------------------------------------- /tests/test_roles.py: -------------------------------------------------------------------------------- 1 | import blocks.roles 2 | from six.moves import cPickle 3 | 4 | 5 | def test_role_serialization(): 6 | """Test that roles compare equal before and after serialization.""" 7 | roles = [blocks.roles.INPUT, 8 | blocks.roles.OUTPUT, 9 | blocks.roles.COST, 10 | blocks.roles.PARAMETER, 11 | blocks.roles.AUXILIARY, 12 | blocks.roles.WEIGHT, 13 | blocks.roles.BIAS, 14 | blocks.roles.FILTER] 15 | 16 | for role in roles: 17 | deserialized = cPickle.loads(cPickle.dumps(role)) 18 | assert deserialized == role 19 | -------------------------------------------------------------------------------- /tests/test_search.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from theano import tensor 4 | from numpy.testing import assert_allclose 5 | 6 | from blocks.bricks import Tanh, Initializable 7 | from blocks.bricks.attention import SequenceContentAttention 8 | from blocks.bricks.base import application 9 | from blocks.bricks.lookup import LookupTable 10 | from blocks.bricks.recurrent import SimpleRecurrent 11 | from blocks.bricks.sequence_generators import ( 12 | SequenceGenerator, Readout, SoftmaxEmitter, LookupFeedback) 13 | from blocks.graph import ComputationGraph 14 | from blocks.initialization import IsotropicGaussian 15 | from blocks.filter import VariableFilter 16 | from blocks.search import BeamSearch 17 | 18 | 19 | class SimpleGenerator(Initializable): 20 | """The top brick. 21 | 22 | It is often convenient to gather all bricks of the model under the 23 | roof of a single top brick. 24 | 25 | """ 26 | def __init__(self, dimension, alphabet_size, **kwargs): 27 | super(SimpleGenerator, self).__init__(**kwargs) 28 | lookup = LookupTable(alphabet_size, dimension) 29 | transition = SimpleRecurrent( 30 | activation=Tanh(), 31 | dim=dimension, name="transition") 32 | attention = SequenceContentAttention( 33 | state_names=transition.apply.states, 34 | attended_dim=dimension, match_dim=dimension, name="attention") 35 | readout = Readout( 36 | readout_dim=alphabet_size, 37 | source_names=[transition.apply.states[0], 38 | attention.take_glimpses.outputs[0]], 39 | emitter=SoftmaxEmitter(name="emitter"), 40 | feedback_brick=LookupFeedback(alphabet_size, dimension), 41 | name="readout") 42 | generator = SequenceGenerator( 43 | readout=readout, transition=transition, attention=attention, 44 | name="generator") 45 | 46 | self.lookup = lookup 47 | self.generator = generator 48 | self.children = [lookup, generator] 49 | 50 | @application 51 | def cost(self, chars, chars_mask, targets, targets_mask): 52 | return self.generator.cost_matrix( 53 | targets, targets_mask, 54 | attended=self.lookup.apply(chars), 55 | attended_mask=chars_mask) 56 | 57 | @application 58 | def generate(self, chars): 59 | return self.generator.generate( 60 | n_steps=3 * chars.shape[0], batch_size=chars.shape[1], 61 | attended=self.lookup.apply(chars), 62 | attended_mask=tensor.ones(chars.shape)) 63 | 64 | 65 | def test_beam_search_smallest(): 66 | a = numpy.array([[3, 6, 4], [1, 2, 7]]) 67 | ind, mins = BeamSearch._smallest(a, 2) 68 | assert numpy.all(numpy.array(ind) == numpy.array([[1, 1], [0, 1]])) 69 | assert numpy.all(mins == [1, 2]) 70 | 71 | 72 | def test_beam_search(): 73 | """Test beam search using the model similar to the reverse_words demo. 74 | 75 | Ideally this test should be done with a trained model, but so far 76 | only with a randomly initialized one. So it does not really test 77 | the ability to find the best output sequence, but only correctness 78 | of returned costs. 79 | 80 | """ 81 | rng = numpy.random.RandomState(1234) 82 | alphabet_size = 20 83 | beam_size = 10 84 | length = 15 85 | 86 | simple_generator = SimpleGenerator(10, alphabet_size, seed=1234) 87 | simple_generator.weights_init = IsotropicGaussian(0.5) 88 | simple_generator.biases_init = IsotropicGaussian(0.5) 89 | simple_generator.initialize() 90 | 91 | inputs = tensor.lmatrix('inputs') 92 | samples, = VariableFilter( 93 | applications=[simple_generator.generator.generate], 94 | name="outputs")( 95 | ComputationGraph(simple_generator.generate(inputs))) 96 | 97 | input_vals = numpy.tile(rng.randint(alphabet_size, size=(length,)), 98 | (beam_size, 1)).T 99 | 100 | search = BeamSearch(samples) 101 | results, mask, costs = search.search( 102 | {inputs: input_vals}, 0, 3 * length, as_arrays=True) 103 | # Just check sum 104 | assert results.sum() == 2816 105 | 106 | true_costs = simple_generator.cost( 107 | tensor.as_tensor_variable(input_vals), 108 | numpy.ones((length, beam_size), dtype=theano.config.floatX), 109 | tensor.as_tensor_variable(results), mask).eval() 110 | true_costs = (true_costs * mask).sum(axis=0) 111 | assert_allclose(costs.sum(axis=0), true_costs, rtol=1e-5) 112 | 113 | # Test `as_lists=True` 114 | results2, costs2 = search.search({inputs: input_vals}, 115 | 0, 3 * length) 116 | for i in range(len(results2)): 117 | assert results2[i] == list(results.T[i, :mask.T[i].sum()]) 118 | -------------------------------------------------------------------------------- /tests/test_select.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import theano 4 | from numpy.testing import assert_raises 5 | 6 | from blocks.bricks.base import Brick 7 | from blocks.select import Path, Selector 8 | 9 | 10 | class MockBrickTop(Brick): 11 | 12 | def __init__(self, children, **kwargs): 13 | super(MockBrickTop, self).__init__(**kwargs) 14 | self.children = children 15 | self.parameters = [] 16 | 17 | 18 | class MockBrickBottom(Brick): 19 | 20 | def __init__(self, **kwargs): 21 | super(MockBrickBottom, self).__init__(**kwargs) 22 | self.parameters = [theano.shared(0, "V"), theano.shared(0, "W")] 23 | 24 | 25 | def test_path(): 26 | path1 = Path.parse("/brick") 27 | assert path1.nodes == (Path.BrickName("brick"),) 28 | 29 | path2 = Path.parse("/brick.W") 30 | assert path2.nodes == (Path.BrickName("brick"), Path.ParameterName("W")) 31 | 32 | path3 = Path.parse("/brick1/brick2") 33 | assert path3.nodes == (Path.BrickName("brick1"), Path.BrickName("brick2")) 34 | 35 | path4 = deepcopy(path3) 36 | assert path4 == path3 37 | assert path4 != path2 38 | assert hash(path4) == hash(path3) 39 | assert hash(path4) != hash(path2) 40 | 41 | 42 | def test_selector_get_parameters_uniqueness(): 43 | top = MockBrickTop( 44 | [MockBrickBottom(name="bottom"), MockBrickBottom(name="bottom")], 45 | name="top") 46 | 47 | selector = Selector([top]) 48 | assert_raises(ValueError, selector.get_parameters) 49 | 50 | 51 | def test_selector(): 52 | b1 = MockBrickBottom(name="b1") 53 | b2 = MockBrickBottom(name="b2") 54 | b3 = MockBrickBottom(name="b3") 55 | t1 = MockBrickTop([b1, b2], name="t1") 56 | t2 = MockBrickTop([b2, b3], name="t2") 57 | 58 | s1 = Selector([t1]) 59 | s11 = s1.select("/t1/b1") 60 | assert s11.bricks[0] == b1 61 | assert len(s11.bricks) == 1 62 | s12 = s1.select("/t1") 63 | assert s12.bricks[0] == t1 64 | assert len(s12.bricks) == 1 65 | 66 | s2 = Selector([t1, t2]) 67 | s21 = s2.select("/t2/b2") 68 | assert s21.bricks[0] == b2 69 | assert len(s21.bricks) == 1 70 | 71 | assert s2.select("/t2/b2.V")[0] == b2.parameters[0] 72 | 73 | parameters = list(s1.get_parameters().items()) 74 | assert parameters[0][0] == "/t1/b1.V" 75 | assert parameters[0][1] == b1.parameters[0] 76 | assert parameters[1][0] == "/t1/b1.W" 77 | assert parameters[1][1] == b1.parameters[1] 78 | assert parameters[2][0] == "/t1/b2.V" 79 | assert parameters[2][1] == b2.parameters[0] 80 | assert parameters[3][0] == "/t1/b2.W" 81 | assert parameters[3][1] == b2.parameters[1] 82 | -------------------------------------------------------------------------------- /tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | import tarfile 4 | from pickle import PicklingError 5 | from io import BytesIO 6 | from tempfile import NamedTemporaryFile 7 | 8 | import numpy 9 | import theano 10 | from numpy.testing import assert_allclose, assert_raises 11 | 12 | from blocks.config import config 13 | from theano import tensor, shared 14 | from blocks.bricks import MLP, Linear 15 | from blocks.initialization import Constant 16 | from blocks.serialization import (load, dump, secure_dump, load_parameters, 17 | _Renamer, add_to_dump, dump_and_add_to_dump, 18 | continue_training) 19 | 20 | 21 | def test_renamer(): 22 | x = tensor.matrix('features') 23 | layer = Linear(10, 10) 24 | y = layer.apply(x) 25 | named = shared(name='named', value=numpy.zeros(2)) 26 | tag_named = shared(value=numpy.zeros(2)) 27 | tag_named.tag.name = 'tag_named' 28 | unnamed = shared(value=numpy.zeros(2)) 29 | variables = [layer.W, named, tag_named, unnamed, unnamed, unnamed] 30 | renamer = _Renamer() 31 | names = [renamer(n) for n in variables] 32 | true_names = ['|linear.W', 'named', 'tag_named', 'parameter', 33 | 'parameter_2', 'parameter_3'] 34 | assert set(names) == set(true_names) 35 | 36 | 37 | def foo(): # To test warnings 38 | pass 39 | 40 | 41 | def test_serialization(): 42 | 43 | # Create a simple MLP to dump. 44 | mlp = MLP(activations=[None, None], dims=[10, 10, 10], 45 | weights_init=Constant(1.), use_bias=False) 46 | mlp.initialize() 47 | W = mlp.linear_transformations[1].W 48 | W.set_value(W.get_value() * 2) 49 | 50 | # Ensure warnings are raised when __main__ namespace objects are dumped. 51 | foo.__module__ = '__main__' 52 | import __main__ 53 | __main__.__dict__['foo'] = foo 54 | mlp.foo = foo 55 | with NamedTemporaryFile(delete=False) as f: 56 | with warnings.catch_warnings(record=True) as w: 57 | dump(mlp.foo, f) 58 | assert len(w) == 1 59 | assert '__main__' in str(w[-1].message) 60 | 61 | # Check the parameters. 62 | with NamedTemporaryFile(delete=False) as f: 63 | dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) 64 | with open(f.name, 'rb') as ff: 65 | numpy_data = load_parameters(ff) 66 | assert set(numpy_data.keys()) == \ 67 | set(['/mlp/linear_0.W', '/mlp/linear_1.W']) 68 | assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10))) 69 | assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX 70 | 71 | # Ensure that it can be unpickled. 72 | with open(f.name, 'rb') as ff: 73 | mlp = load(ff) 74 | assert_allclose(mlp.linear_transformations[1].W.get_value(), 75 | numpy.ones((10, 10)) * 2) 76 | 77 | # Ensure that duplicate names are dealt with. 78 | for child in mlp.children: 79 | child.name = 'linear' 80 | with NamedTemporaryFile(delete=False) as f: 81 | dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) 82 | with open(f.name, 'rb') as ff: 83 | numpy_data = load_parameters(ff) 84 | assert set(numpy_data.keys()) == \ 85 | set(['/mlp/linear.W', '/mlp/linear.W_2']) 86 | 87 | # Check when we don't dump the main object. 88 | with NamedTemporaryFile(delete=False) as f: 89 | dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W]) 90 | with tarfile.open(f.name, 'r') as tarball: 91 | assert set(tarball.getnames()) == set(['_parameters']) 92 | 93 | 94 | def test_add_to_dump(): 95 | 96 | # Create a simple MLP to dump. 97 | mlp = MLP(activations=[None, None], dims=[10, 10, 10], 98 | weights_init=Constant(1.), use_bias=False) 99 | mlp.initialize() 100 | W = mlp.linear_transformations[1].W 101 | W.set_value(W.get_value() * 2) 102 | mlp2 = MLP(activations=[None, None], dims=[10, 10, 10], 103 | weights_init=Constant(1.), use_bias=False, 104 | name='mlp2') 105 | mlp2.initialize() 106 | 107 | # Ensure that adding to dump is working. 108 | with NamedTemporaryFile(delete=False) as f: 109 | dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) 110 | with open(f.name, 'rb+') as ff: 111 | add_to_dump(mlp.children[0], ff, 'child_0', 112 | parameters=[mlp.children[0].W]) 113 | add_to_dump(mlp.children[1], ff, 'child_1') 114 | with tarfile.open(f.name, 'r') as tarball: 115 | assert set(tarball.getnames()) == set(['_pkl', '_parameters', 116 | 'child_0', 'child_1']) 117 | 118 | # Ensure that we can load any object from the tarball. 119 | with open(f.name, 'rb') as ff: 120 | saved_children_0 = load(ff, 'child_0') 121 | saved_children_1 = load(ff, 'child_1') 122 | assert_allclose(saved_children_0.W.get_value(), 123 | numpy.ones((10, 10))) 124 | assert_allclose(saved_children_1.W.get_value(), 125 | numpy.ones((10, 10)) * 2) 126 | 127 | # Check the error if using a reserved name. 128 | with open(f.name, 'rb+') as ff: 129 | assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl']) 130 | 131 | # Check the error if saving an object with other parameters 132 | with open(f.name, 'rb+') as ff: 133 | assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], 134 | **dict(parameters=[mlp2.children[0].W, 135 | mlp2.children[1].W])) 136 | 137 | # Check the warning if adding to a dump with no parameters 138 | with NamedTemporaryFile(delete=False) as f: 139 | dump(mlp, f) 140 | with open(f.name, 'rb+') as ff: 141 | assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], 142 | **dict(parameters=[mlp2.children[0].W, 143 | mlp2.children[1].W])) 144 | 145 | 146 | def test_secure_dump(): 147 | foo = object() 148 | bar = lambda: None # flake8: noqa 149 | with NamedTemporaryFile(delete=False, dir=config.temp_dir) as f: 150 | secure_dump(foo, f.name) 151 | assert_raises(PicklingError, secure_dump, bar, f.name) 152 | with open(f.name, 'rb') as f: 153 | assert type(load(f)) is object 154 | 155 | 156 | def test_dump_and_add_to_dump(): 157 | x = 3 158 | y = 2 159 | with NamedTemporaryFile(delete=False) as f: 160 | dump_and_add_to_dump(x, f, None, {'y': y}) 161 | assert load(open(f.name, 'rb')) == x 162 | assert load(open(f.name, 'rb'), 'y') == y 163 | 164 | 165 | def test_protocol0_regression(): 166 | """Check for a regression where protocol 0 dumps fail on load.""" 167 | brick = Linear(5, 10) 168 | brick.allocate() 169 | buf = BytesIO() 170 | dump(brick, buf, parameters=list(brick.parameters), protocol=0) 171 | try: 172 | load(buf) 173 | except TypeError: 174 | assert False # Regression 175 | -------------------------------------------------------------------------------- /tests/test_theano_expressions.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from numpy.testing import assert_allclose 4 | from theano import tensor 5 | 6 | from blocks.theano_expressions import l2_norm, hessian_times_vector 7 | 8 | 9 | def test_l2_norm(): 10 | assert_allclose(l2_norm([2]).eval(), 2.0) 11 | assert_allclose(l2_norm([3, 4]).eval(), 5.0) 12 | assert_allclose(l2_norm([3, [1, 2]]).eval(), 14.0 ** 0.5) 13 | assert_allclose( 14 | l2_norm([3, [1, 2], [[1, 2], [3, 4]]]).eval(), 44.0 ** 0.5) 15 | assert_allclose( 16 | l2_norm([3, [1, 2], [[1, 2], [3, 4]]], squared=True).eval(), 44.0) 17 | 18 | 19 | def test_hessian_times_vector(): 20 | x_y = tensor.vector('x_y') 21 | x, y = x_y[0], x_y[1] 22 | # The Hessian of this should be the identity 23 | c = 0.5 * (x ** 2 + y ** 2) 24 | g = tensor.grad(c, x_y) 25 | 26 | v = tensor.vector('v') 27 | Hv = hessian_times_vector(g, x_y, v) 28 | Hv_rop = hessian_times_vector(g, x_y, v, r_op=True) 29 | 30 | f = theano.function([x_y, v], Hv) 31 | f_rop = theano.function([x_y, v], Hv_rop) 32 | 33 | x_y_val = numpy.random.rand(2).astype(theano.config.floatX) 34 | v_val = numpy.random.rand(2).astype(theano.config.floatX) 35 | 36 | assert_allclose(v_val, f(x_y_val, v_val)) 37 | assert_allclose(v_val, f_rop(x_y_val, v_val)) 38 | -------------------------------------------------------------------------------- /tests/test_variable_filter.py: -------------------------------------------------------------------------------- 1 | from nose.tools import raises 2 | 3 | from blocks.bricks import Bias, Linear, Logistic 4 | from blocks.bricks.parallel import Merge 5 | from blocks.filter import VariableFilter 6 | from blocks.graph import ComputationGraph 7 | from blocks.roles import BIAS, FILTER, PARAMETER, OUTPUT 8 | 9 | from theano import tensor 10 | 11 | 12 | def test_variable_filter(): 13 | # Creating computation graph 14 | brick1 = Linear(input_dim=2, output_dim=2, name='linear1') 15 | brick2 = Bias(2, name='bias1') 16 | activation = Logistic(name='sigm') 17 | 18 | x = tensor.vector() 19 | h1 = brick1.apply(x, call_id='brick1_call_id') 20 | h2 = activation.apply(h1, call_id='act') 21 | h2.name = "h2act" 22 | y = brick2.apply(h2) 23 | cg = ComputationGraph(y) 24 | 25 | parameters = [brick1.W, brick1.b, brick2.parameters[0]] 26 | bias = [brick1.b, brick2.parameters[0]] 27 | brick1_bias = [brick1.b] 28 | 29 | # Testing filtering by role 30 | role_filter = VariableFilter(roles=[PARAMETER]) 31 | assert parameters == role_filter(cg.variables) 32 | role_filter = VariableFilter(roles=[FILTER]) 33 | assert [] == role_filter(cg.variables) 34 | 35 | # Testing filtering by role using each_role flag 36 | role_filter = VariableFilter(roles=[PARAMETER, BIAS]) 37 | assert parameters == role_filter(cg.variables) 38 | role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True) 39 | assert not parameters == role_filter(cg.variables) 40 | assert bias == role_filter(cg.variables) 41 | 42 | # Testing filtering by bricks classes 43 | brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear]) 44 | assert brick1_bias == brick_filter(cg.variables) 45 | 46 | # Testing filtering by bricks instances 47 | brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) 48 | assert brick1_bias == brick_filter(cg.variables) 49 | 50 | # Testing filtering by brick instance 51 | brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) 52 | assert brick1_bias == brick_filter(cg.variables) 53 | 54 | # Testing filtering by name 55 | name_filter = VariableFilter(name='W_norm') 56 | assert [cg.variables[2]] == name_filter(cg.variables) 57 | 58 | # Testing filtering by name regex 59 | name_filter_regex = VariableFilter(name_regex='W_no.?m') 60 | assert [cg.variables[2]] == name_filter_regex(cg.variables) 61 | 62 | # Testing filtering by theano name 63 | theano_name_filter = VariableFilter(theano_name='h2act') 64 | assert [cg.variables[11]] == theano_name_filter(cg.variables) 65 | 66 | # Testing filtering by theano name regex 67 | theano_name_filter_regex = VariableFilter(theano_name_regex='h2a.?t') 68 | assert [cg.variables[11]] == theano_name_filter_regex(cg.variables) 69 | 70 | brick1_apply_variables = [cg.variables[1], cg.variables[8]] 71 | # Testing filtering by application 72 | appli_filter = VariableFilter(applications=[brick1.apply]) 73 | assert brick1_apply_variables == appli_filter(cg.variables) 74 | 75 | # Testing filtering by unbound application 76 | unbound_appli_filter = VariableFilter(applications=[Linear.apply]) 77 | assert brick1_apply_variables == unbound_appli_filter(cg.variables) 78 | 79 | # Testing filtering by call identifier 80 | call_id_filter = VariableFilter(call_id='brick1_call_id') 81 | assert brick1_apply_variables == call_id_filter(cg.variables) 82 | 83 | input1 = tensor.matrix('input1') 84 | input2 = tensor.matrix('input2') 85 | merge = Merge(['input1', 'input2'], [5, 6], 2) 86 | merged = merge.apply(input1, input2) 87 | merge_cg = ComputationGraph(merged) 88 | outputs = VariableFilter( 89 | roles=[OUTPUT], bricks=[merge])(merge_cg.variables) 90 | assert merged in outputs 91 | assert len(outputs) == 3 92 | 93 | outputs_application = VariableFilter( 94 | roles=[OUTPUT], applications=[merge.apply])(merge_cg.variables) 95 | assert outputs_application == [merged] 96 | 97 | 98 | @raises(TypeError) 99 | def test_variable_filter_roles_error(): 100 | # Creating computation graph 101 | brick1 = Linear(input_dim=2, output_dim=2, name='linear1') 102 | 103 | x = tensor.vector() 104 | h1 = brick1.apply(x) 105 | cg = ComputationGraph(h1) 106 | # testing role error 107 | VariableFilter(roles=PARAMETER)(cg.variables) 108 | 109 | 110 | @raises(TypeError) 111 | def test_variable_filter_applications_error(): 112 | # Creating computation graph 113 | brick1 = Linear(input_dim=2, output_dim=2, name='linear1') 114 | 115 | x = tensor.vector() 116 | h1 = brick1.apply(x) 117 | cg = ComputationGraph(h1) 118 | VariableFilter(applications=brick1.apply)(cg.variables) 119 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mila-iqia/blocks/c69c2dc2b9c90a8eac9e432690eb59ff99d2f28a/tests/utils/__init__.py -------------------------------------------------------------------------------- /tests/utils/test_containers.py: -------------------------------------------------------------------------------- 1 | from blocks.utils.containers import AnnotatingList 2 | 3 | 4 | def test_annotating_list(): 5 | lst = AnnotatingList(range(10)) 6 | assert repr(lst) == repr(list(range(10))) 7 | assert lst == list(range(10)) 8 | assert lst != list(range(9)) 9 | assert lst[0] == 0 10 | lst[0] = 10 11 | del lst[0] 12 | lst.insert(0, 0) 13 | assert lst == list(range(10)) 14 | -------------------------------------------------------------------------------- /tests/utils/test_imports.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def test_no_theano_import(): 5 | del sys.modules['theano'] 6 | import blocks.utils # noqa: F401 7 | assert 'theano' not in sys.modules 8 | from blocks.utils import dict_union # noqa: F401 9 | assert 'theano' not in sys.modules 10 | 11 | 12 | def test_imports(): 13 | from blocks.utils import dict_union 14 | from blocks.utils import check_theano_variable # noqa: F401 15 | from blocks.utils.utils import dict_union # noqa: F811,F401 16 | from blocks.utils.theano_utils import check_theano_variable # noqa: F811,F401,E501 17 | -------------------------------------------------------------------------------- /tests/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | from numpy.testing import assert_raises 2 | from theano import tensor 3 | 4 | from blocks.utils import check_theano_variable, unpack, find_bricks 5 | from blocks.bricks import MLP, Sequence, Tanh, Identity, Logistic 6 | 7 | 8 | def test_unpack(): 9 | assert unpack((1, 2)) == [1, 2] 10 | assert unpack([1, 2]) == [1, 2] 11 | assert unpack([1]) == 1 12 | test = object() 13 | assert unpack(test) is test 14 | assert_raises(ValueError, unpack, [1, 2], True) 15 | 16 | 17 | def test_check_theano_variable(): 18 | check_theano_variable(None, 3, 'float') 19 | check_theano_variable([[1, 2]], 2, 'int') 20 | assert_raises(ValueError, check_theano_variable, 21 | tensor.vector(), 2, 'float') 22 | assert_raises(ValueError, check_theano_variable, 23 | tensor.vector(), 1, 'int') 24 | 25 | 26 | class TestFindBricks(object): 27 | def setUp(self): 28 | self.mlp = MLP([Sequence([Identity(name='id1').apply, 29 | Tanh(name='tanh1').apply], 30 | name='sequence1'), 31 | Sequence([Logistic(name='logistic1').apply, 32 | Identity(name='id2').apply, 33 | Tanh(name='tanh2').apply], 34 | name='sequence2'), 35 | Logistic(name='logistic2'), 36 | Sequence([Sequence([Logistic(name='logistic3').apply], 37 | name='sequence4').apply], 38 | name='sequence3')], 39 | [10, 5, 9, 5, 9]) 40 | 41 | def test_find_zeroth_level(self): 42 | found = find_bricks([self.mlp], lambda x: isinstance(x, MLP)) 43 | assert len(found) == 1 44 | assert found[0] == self.mlp 45 | 46 | def test_find_zeroth_level_repeated(self): 47 | found = find_bricks([self.mlp, self.mlp], lambda x: isinstance(x, MLP)) 48 | assert len(found) == 1 49 | assert found[0] == self.mlp 50 | 51 | def test_find_all_unique(self): 52 | found = find_bricks([self.mlp, self.mlp] + list(self.mlp.children), 53 | lambda _: True) 54 | assert len(found) == 16 # 12 activations plus 4 linear transformations 55 | 56 | def test_find_none(self): 57 | found = find_bricks([self.mlp], lambda _: False) 58 | assert len(found) == 0 59 | 60 | def test_find_first_level(self): 61 | found = set(find_bricks([self.mlp], lambda x: isinstance(x, Sequence))) 62 | assert len(found) == 5 63 | assert self.mlp in found 64 | found.remove(self.mlp) 65 | sequences = set(self.mlp.activations[0:2] + 66 | [self.mlp.activations[3], 67 | self.mlp.activations[3].children[0]]) 68 | assert sequences == found 69 | 70 | def test_find_second_and_third_level(self): 71 | found = set(find_bricks([self.mlp], lambda x: isinstance(x, Identity))) 72 | assert len(found) == 2 73 | assert self.mlp.activations[0].children[0] in found 74 | assert self.mlp.activations[1].children[1] in found 75 | 76 | def test_find_first_and_second_and_third_level(self): 77 | found = set(find_bricks([self.mlp], lambda x: isinstance(x, Logistic))) 78 | assert self.mlp.activations[2] in found 79 | assert self.mlp.activations[1].children[0] in found 80 | assert self.mlp.activations[3].children[0].children[0] 81 | --------------------------------------------------------------------------------