├── .bumpversion.cfg ├── .cookiecutterrc ├── .coveragerc ├── .editorconfig ├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── CHANGELOG.rst ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── appveyor.yml ├── ci ├── appveyor-bootstrap.py ├── appveyor-download.py ├── appveyor-with-compiler.cmd ├── bootstrap.py └── templates │ ├── .travis.yml │ └── appveyor.yml ├── docs ├── FirstSteps │ ├── index_first_steps.rst │ └── why-alp.rst ├── Setup │ ├── config_CLI_launch.rst │ ├── how_does_it_work.rst │ ├── index_setup.rst │ └── requirements.rst ├── Tutorials │ ├── index_tuto.rst │ ├── tuto0.rst │ ├── tuto1.rst │ ├── tuto2.rst │ ├── tuto3.rst │ └── tuto4.rst ├── Userguide │ ├── Experiment.rst │ ├── Services.rst │ └── index_userguide.rst ├── _static │ ├── architecture.svg │ ├── download.png │ ├── last_bouquetin.png │ └── last_bouquetin.svg ├── _templates │ └── last_modified.html ├── conf.py ├── index.rst ├── projectevolution │ ├── authors.rst │ ├── changelog.rst │ ├── contribute.rst │ └── index_projectevolution.rst ├── reference │ ├── alp.appcom.rst │ ├── alp.backend.rst │ ├── alp.rst │ └── index.rst ├── requirements.txt └── spelling_wordlist.txt ├── install-prereqs.sh ├── req ├── requirements.txt ├── requirements_cli.txt └── requirements_first.txt ├── setup.cfg ├── setup.py ├── src └── alp │ ├── __init__.py │ ├── appcom │ ├── __init__.py │ ├── core.py │ ├── ensembles.py │ └── utils.py │ ├── backend │ ├── __init__.py │ ├── common.py │ ├── keras_backend.py │ └── sklearn_backend.py │ ├── celapp.py │ ├── cli.py │ ├── cli_utils.py │ ├── dbbackend │ ├── __init__.py │ └── mongo_backend.py │ └── utils │ ├── __init__.py │ └── utils_tests.py ├── tests ├── backend │ ├── test_common.py │ ├── test_keras_backend.py │ └── test_sklearn_backend.py ├── core │ └── test_hpoptim.py ├── dbbackend │ └── test_mongodb.py └── test_alp.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.3.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | 8 | [bumpversion:file:docs/conf.py] 9 | 10 | [bumpversion:file:src/alp/__init__.py] 11 | 12 | -------------------------------------------------------------------------------- /.cookiecutterrc: -------------------------------------------------------------------------------- 1 | # This file exists so you can easily regenerate your project. 2 | # 3 | # `cookiepatcher` is a convenient shim around `cookiecutter` 4 | # for regenerating projects (it will generate a .cookiecutterrc 5 | # automatically for any template). To use it: 6 | # 7 | # pip install cookiepatcher 8 | # cookiepatcher gh:ionelmc/cookiecutter-pylibrary project-path 9 | # 10 | # See: 11 | # https://pypi.python.org/pypi/cookiecutter 12 | # 13 | # Alternatively, you can run: 14 | # 15 | # cookiecutter --overwrite-if-exists --config-file=project-path/.cookiecutterrc gh:ionelmc/cookiecutter-pylibrary 16 | 17 | default_context: 18 | 19 | appveyor: 'yes' 20 | c_extension_cython: 'no' 21 | c_extension_optional: 'no' 22 | c_extension_support: 'no' 23 | codacy: 'yes' 24 | codeclimate: 'yes' 25 | codecov: 'yes' 26 | command_line_interface: 'plain' 27 | command_line_interface_bin_name: 'alp' 28 | coveralls: 'yes' 29 | distribution_name: 'alp' 30 | email: 'thomas.boquet@hec.ca' 31 | full_name: 'Thomas Boquet' 32 | github_username: 'tboquet' 33 | landscape: 'no' 34 | package_name: 'alp' 35 | project_name: 'ALP' 36 | project_short_description: 'Machine learning for teams' 37 | release_date: 'today' 38 | repo_name: 'python-alp' 39 | requiresio: 'yes' 40 | scrutinizer: 'no' 41 | sphinx_doctest: 'yes' 42 | sphinx_theme: 'sphinx-py3doc-enhanced-theme' 43 | test_matrix_configurator: 'no' 44 | test_matrix_separate_coverage: 'no' 45 | test_runner: 'pytest' 46 | travis: 'yes' 47 | version: '0.1.0' 48 | website: 'https://tboquet.github.io' 49 | year: 'now' 50 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [paths] 2 | source = 3 | src/alp 4 | */site-packages/alp 5 | 6 | [run] 7 | branch = True 8 | source = alp 9 | parallel = true 10 | 11 | [report] 12 | show_missing = true 13 | precision = 2 14 | omit = *migrations* 15 | exclude_lines = 16 | pragma: no cover 17 | raise Exception 18 | except Exception as e: 19 | except Exception: 20 | raise NotImplementedError 21 | except MemoryError 22 | raise TypeError 23 | except KeyboardInterrupt -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # see http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | end_of_line = lf 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | charset = utf-8 11 | 12 | [*.{bat,cmd,ps1}] 13 | end_of_line = crlf 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | .eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | venv*/ 22 | pyvenv*/ 23 | 24 | # Installer logs 25 | pip-log.txt 26 | 27 | # Unit test / coverage reports 28 | .coverage 29 | .tox 30 | .coverage.* 31 | nosetests.xml 32 | coverage.xml 33 | htmlcov 34 | 35 | # Translations 36 | *.mo 37 | 38 | # Mr Developer 39 | .mr.developer.cfg 40 | .project 41 | .pydevproject 42 | .idea 43 | *.iml 44 | *.komodoproject 45 | 46 | # Complexity 47 | output/*.html 48 | output/*/index.html 49 | 50 | # Sphinx 51 | docs/_build 52 | 53 | .DS_Store 54 | *~ 55 | .*.sw[po] 56 | .build 57 | .ve 58 | .env 59 | .cache 60 | .pytest 61 | .bootstrap 62 | .appveyor.token 63 | *.bak 64 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: '3.5' 3 | sudo: required 4 | dist: trusty 5 | services: 6 | - mongodb 7 | - docker 8 | env: 9 | global: 10 | - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so 11 | - SEGFAULT_SIGNALS=all 12 | - TEST_MODE=ON 13 | matrix: 14 | - TOXENV=check 15 | - TOXENV=docs 16 | 17 | - TOXENV=py27,coveralls,codecov 18 | - TOXENV=py34,coveralls,codecov 19 | - TOXENV=py35,coveralls,codecov 20 | addons: 21 | code_climate: 22 | repo_token: dfe5865d21322900ba6972d30da44e2859d3533fead6c26cdf217ebb540000ce 23 | apt: 24 | packages: 25 | - libhdf5-dev 26 | before_install: 27 | - echo $TRAVIS_PULL_REQUEST_BRANCH 28 | - echo $TRAVIS_BRANCH 29 | - SUFFIX='' 30 | - if [[ "$TRAVIS_BRANCH" =~ dev ]]; then SUFFIX=dev; fi 31 | - if [[ "$TRAVIS_PULL_REQUEST_BRANCH" =~ dev ]]; then SUFFIX=dev; fi 32 | - echo $SUFFIX 33 | - sudo mkdir /parameters_h5 34 | - sudo chmod 777 /parameters_h5 35 | - sudo mkdir /data_generator 36 | - sudo chmod 777 /data_generator 37 | - sudo rabbitmqctl stop 38 | - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 1397BC53640DB551 39 | - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv EA312927 40 | - echo "deb http://repo.mongodb.org/apt/ubuntu trusty/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list 41 | - sudo apt-get update 42 | - sudo apt-get install -y libhdf5-dev 43 | - sudo apt-get install -y mongodb-org 44 | - python --version 45 | - uname -a 46 | - lsb_release -a 47 | - if [[ ("$TOXENV" == "py27,coveralls,codecov") ]]; then docker run --name mongo_results -v /opt/data/mongo_data/results:/data/db -p 27018:27017 -d --restart=always mongo; docker run --name mongo_models -v /opt/data/mongo_data/models:/data/db -d --restart=always mongo; docker run -d -v /opt/data/rabbitmq/dev/log:/dev/log -v /opt/data/rabbitmq:/var/lib/rabbitmq --name=rabbitmq_sched -p 8080:15672 -p 5672:5672 --restart=always rabbitmq:3-management; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=keras_worker --restart=always tboquet/travis_worker_${SUFFIX}k:latest; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=sklearn_worker --restart=always tboquet/travis_worker_${SUFFIX}sk:latest;fi 48 | - if [[ ("$TOXENV" == "py34,coveralls,codecov" || "$TOXENV" == "py35,coveralls,codecov") ]]; then docker run --name mongo_results -v /opt/data/mongo_data/results:/data/db -p 27018:27017 -d --restart=always mongo; docker run --name mongo_models -v /opt/data/mongo_data/models:/data/db -d --restart=always mongo; docker run -d -v /opt/data/rabbitmq/dev/log:/dev/log -v /opt/data/rabbitmq:/var/lib/rabbitmq --name=rabbitmq_sched -p 8080:15672 -p 5672:5672 --restart=always rabbitmq:3-management; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=keras_worker --restart=always tboquet/travis_worker_${SUFFIX}k:py3; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=sklearn_worker --restart=always tboquet/travis_worker_${SUFFIX}sk:py3;fi 49 | - docker ps 50 | - sleep 3 51 | - if [[ ("$TOXENV" == "py27,coveralls,codecov" || "$TOXENV" == "py34,coveralls,codecov" || "$TOXENV" == "py35,coveralls,codecov") ]]; then docker logs sklearn_worker;docker logs keras_worker;fi 52 | install: 53 | - pip install tox 54 | - pip install --upgrade pip 55 | - virtualenv --version 56 | - easy_install --version 57 | - pip --version 58 | - tox --version 59 | script: 60 | - tox -v 61 | after_failure: 62 | - more .tox/log/* | cat 63 | - more .tox/*/log/* | cat 64 | - docker logs sklearn_worker 65 | - docker logs keras_worker 66 | before_cache: 67 | - rm -rf $HOME/.cache/pip/log 68 | cache: 69 | directories: 70 | - $HOME/.cache/pip 71 | notifications: 72 | email: 73 | on_success: never 74 | on_failure: always 75 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Authors 3 | ======= 4 | 5 | * Thomas Boquet - https://github.com/tboquet 6 | * Paul Lemaître 7 | 8 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Changelog 3 | ========= 4 | 5 | 6 | 0.3.0 (2017-01-17) 7 | ================== 8 | 9 | * Command Line Interface to launch services. 10 | * sklearn backend is stable with 12 models supported and all sklearn metrics 11 | * Keras backend supports custom objects 12 | * asynchronous fit is stable for all backends 13 | * fuel generators are supported as training data and validation data source 14 | * Ensemble class in core (abstraction for many models) 15 | * Basic HyperParameter optimisation 16 | * Better documentation 17 | 18 | 19 | 0.2.0 (2016-04-21) 20 | ================== 21 | 22 | * Keras backend is stable 23 | 24 | 25 | 0.1.0 (2016-04-12) 26 | ================== 27 | 28 | * First release 29 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Contributions are welcome, and they are greatly appreciated! Every 6 | little bit helps, and credit will always be given. 7 | 8 | Bug reports 9 | =========== 10 | 11 | When `reporting a bug `_ please include: 12 | 13 | * Your operating system name and version. 14 | * Any details about your local setup that might be helpful in troubleshooting. 15 | * Detailed steps to reproduce the bug. 16 | 17 | Documentation improvements 18 | ========================== 19 | 20 | ALP could always use more documentation, whether as part of the 21 | official ALP docs, in docstrings, or even on the web in blog posts, 22 | articles, and such. 23 | 24 | Feature requests and feedback 25 | ============================= 26 | 27 | The best way to send feedback is to file an issue at https://github.com/tboquet/python-alp/issues. 28 | 29 | If you are proposing a feature: 30 | 31 | * Explain in detail how it would work. 32 | * Keep the scope as narrow as possible, to make it easier to implement. 33 | * Remember that this is a volunteer-driven project, and that code contributions are welcome :) 34 | 35 | Development 36 | =========== 37 | 38 | To set up `python-alp` for local development: 39 | 40 | 1. Fork `python-alp `_ 41 | (look for the "Fork" button). 42 | 2. Clone your fork locally:: 43 | 44 | git clone git@github.com:your_name_here/python-alp.git 45 | 46 | 3. Create a branch for local development:: 47 | 48 | git checkout -b name-of-your-bugfix-or-feature 49 | 50 | Now you can make your changes locally. 51 | 52 | 4. When you're done making changes, run all the checks, doc builder and spell checker with `tox `_ one command:: 53 | 54 | tox 55 | 56 | 5. Commit your changes and push your branch to GitHub:: 57 | 58 | git add . 59 | git commit -m "Your detailed description of your changes." 60 | git push origin name-of-your-bugfix-or-feature 61 | 62 | 6. Submit a pull request through the GitHub website. 63 | 64 | Pull Request Guidelines 65 | ======================= 66 | 67 | If you need some code review or feedback while you're developing the code just make the pull request. 68 | 69 | For merging, you should: 70 | 71 | 1. Include passing tests (run ``tox``) [1]_. 72 | 2. Update documentation when there's new API, functionality etc. 73 | 3. Add a note to ``CHANGELOG.rst`` about the changes. 74 | 4. Add yourself to ``AUTHORS.rst``. 75 | 76 | .. [1] If you don't have all the necessary python versions available locally you can rely on Travis - it will 77 | `run the tests `_ for each change you add in the pull request. 78 | 79 | It will be slower though ... 80 | 81 | Tips 82 | ==== 83 | 84 | To run a subset of tests:: 85 | 86 | tox -e envname -- py.test -k test_myfeature 87 | 88 | To run all the test environments in *parallel* (you need to ``pip install detox``):: 89 | 90 | detox 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | All contributions by Thomas Boquet: 2 | Copyright (c) 2016 Thomas Boquet 3 | 4 | All contributions by R2000 inc.: 5 | Copyright (c) 2016 R2000 inc. 6 | 7 | All other contributions: 8 | Copyright (c) 2016, the respective contributors. 9 | 10 | Licensed under the Apache License, Version 2.0 (the "License"); 11 | you may not use this file except in compliance with the License. 12 | You may obtain a copy of the License at 13 | 14 | http://www.apache.org/licenses/LICENSE-2.0 15 | 16 | Unless required by applicable law or agreed to in writing, software 17 | distributed under the License is distributed on an "AS IS" BASIS, 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | See the License for the specific language governing permissions and 20 | limitations under the License. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft docs 2 | graft examples 3 | graft src 4 | graft ci 5 | graft tests 6 | 7 | include .bumpversion.cfg 8 | include .coveragerc 9 | include .cookiecutterrc 10 | include .editorconfig 11 | include .isort.cfg 12 | include *.sh 13 | include req/requirements.txt 14 | include req/requirements_first.txt 15 | include req/requirements_cli.txt 16 | 17 | include AUTHORS.rst 18 | include CHANGELOG.rst 19 | include CONTRIBUTING.rst 20 | include LICENSE 21 | include README.rst 22 | 23 | include tox.ini .travis.yml appveyor.yml 24 | 25 | global-exclude *.py[cod] __pycache__ *.so *.dylib 26 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Overview 3 | ======== 4 | 5 | .. start-badges 6 | 7 | |travis| |requires| |coveralls| |codecov| |codacy| |docs| 8 | 9 | .. |travis| image:: https://travis-ci.org/tboquet/python-alp.svg?branch=master 10 | :alt: Travis-CI Build Status 11 | :target: https://travis-ci.org/tboquet/python-alp 12 | 13 | .. |requires| image:: https://requires.io/github/tboquet/python-alp/requirements.svg?branch=master 14 | :alt: Requirements Status 15 | :target: https://requires.io/github/tboquet/python-alp/requirements/?branch=master 16 | 17 | .. |coveralls| image:: https://coveralls.io/repos/tboquet/python-alp/badge.svg?branch=master&service=github 18 | :alt: Coverage Status 19 | :target: https://coveralls.io/r/tboquet/python-alp 20 | 21 | .. |codecov| image:: https://codecov.io/github/tboquet/python-alp/coverage.svg?branch=master 22 | :alt: Coverage Status 23 | :target: https://codecov.io/github/tboquet/python-alp 24 | 25 | .. |codacy| image:: https://img.shields.io/codacy/b7f6d79244d8480099a3593db2de9560.svg?style=flat 26 | :target: https://www.codacy.com/app/tboquet/python-alp 27 | :alt: Codacy Code Quality Status 28 | 29 | .. |docs| image:: https://readthedocs.org/projects/python-alp/badge/?style=flat 30 | :target: https://readthedocs.org/projects/python-alp 31 | :alt: Documentation Status 32 | 33 | .. end-badges 34 | 35 | 36 | ALP helps you experiment with a lot of machine learning models quickly. It provides you with a simple way of scheduling and recording experiments. 37 | 38 | This library has been developped to work well with Keras and Scikit-learn but can suit a lot of other frameworks. 39 | 40 | Documentation 41 | ============= 42 | 43 | http://python-alp.readthedocs.io/ 44 | 45 | Quickstart 46 | ========== 47 | 48 | Clone the repo and install the library: 49 | 50 | .. code-block:: bash 51 | 52 | git clone https://github.com/tboquet/python-alp.git 53 | cd python-alp 54 | python setup.py install 55 | 56 | Install the Command Line Interface dependencies: 57 | 58 | .. code-block:: bash 59 | 60 | cd req 61 | pip install -r requirements_cli.txt 62 | 63 | Generate a base configuration using an absolute path: 64 | 65 | .. code-block:: bash 66 | 67 | alp --verbose genconfig --outdir=/path/to/a/directory --cpu 68 | 69 | Launch the services: 70 | 71 | .. code-block:: bash 72 | 73 | alp --verbose service start /path/to/a/directory/.alp/containers.json 74 | 75 | Check the status of your containers: 76 | 77 | .. code-block:: bash 78 | 79 | alp --verbose status /path/to/a/directory/.alp/containers.json 80 | 81 | 82 | Log in to the Jupyter notebook you just launched in your browser @ :code:`localhost:440` using the password :code:`default`. 83 | 84 | Launch some experiments! 85 | 86 | .. code-block:: python 87 | 88 | # we import numpy and fix the seed 89 | import numpy as np 90 | np.random.seed(1337) # for reproducibility 91 | 92 | # we import alp and Keras tools that we will use 93 | import alp 94 | from keras.datasets import mnist 95 | from keras.models import Sequential 96 | from keras.layers import Dense, Dropout, Activation, Flatten 97 | from keras.utils import np_utils 98 | import keras.backend as K 99 | from keras.optimizers import Adam 100 | from alp.appcom.ensembles import HParamsSearch 101 | 102 | # if you use tensorflow you must use this configuration 103 | # so that it doesn't use all of the GPU's memory (default config) 104 | import tensorflow as tf 105 | 106 | config = tf.ConfigProto(allow_soft_placement=True) 107 | config.gpu_options.allow_growth = True 108 | session = tf.Session(config=config) 109 | K.set_session(session) 110 | 111 | batch_size = 128 112 | nb_classes = 10 113 | nb_epoch = 12 114 | 115 | # input image dimensions 116 | img_rows, img_cols = 28, 28 117 | # number of features to use 118 | nb_features = 32 119 | 120 | # the data, shuffled and split between train and test sets 121 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 122 | 123 | X_train = X_train.astype('float32') 124 | X_test = X_test.astype('float32') 125 | X_train /= 255 126 | X_test /= 255 127 | print('X_train shape:', X_train.shape) 128 | print(X_train.shape[0], 'train samples') 129 | print(X_test.shape[0], 'test samples') 130 | 131 | if K.image_dim_ordering() == 'th': 132 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 133 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 134 | input_shape = (1, img_rows, img_cols) 135 | else: 136 | X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) 137 | X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) 138 | input_shape = (img_rows, img_cols, 1) 139 | 140 | # convert class vectors to binary class matrices 141 | Y_train = np_utils.to_categorical(y_train, nb_classes) 142 | Y_test = np_utils.to_categorical(y_test, nb_classes) 143 | 144 | # put the data in the form ALP expects 145 | data, data_val = dict(), dict() 146 | data["X"] = X_train[:500] 147 | data["y"] = Y_train[:500] 148 | data_val["X"] = X_test[:500] 149 | data_val["y"] = Y_test[:500] 150 | 151 | # Define and compile the model 152 | 153 | model = Sequential() 154 | 155 | model.add(Flatten(input_shape=input_shape)) 156 | model.add(Dense(nb_features)) 157 | model.add(Activation('relu')) 158 | model.add(Dropout(0.25)) 159 | 160 | model.add(Dense(128)) 161 | model.add(Activation('relu')) 162 | model.add(Dropout(0.5)) 163 | model.add(Dense(nb_classes)) 164 | model.add(Activation('softmax')) 165 | 166 | model.compile(loss='categorical_crossentropy', 167 | optimizer='adadelta', 168 | metrics=['accuracy']) 169 | 170 | # Define you experiment 171 | 172 | from alp.appcom.core import Experiment 173 | 174 | expe = Experiment(model) 175 | 176 | # Fit the model linked to your experiment 177 | results = expe.fit([data], [data_val], nb_epoch=2, batch_size=batch_size) 178 | 179 | # Predict using your model 180 | expe.predict(data['X']) 181 | 182 | 183 | `Get started with the tutorial series!`_ 184 | 185 | * Free software: Apache license 186 | 187 | .. _`docker setup`: http://python-alp.readthedocs.io/en/latest/dockersetup.html 188 | .. _`Get started with the tutorial series!`: http://python-alp.readthedocs.io/en/latest/Tutorials/index_tuto.html 189 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{branch}-{build}' 2 | build: off 3 | cache: 4 | - '%LOCALAPPDATA%\pip\Cache' 5 | environment: 6 | global: 7 | WITH_COMPILER: 'cmd /E:ON /V:ON /C .\ci\appveyor-with-compiler.cmd' 8 | matrix: 9 | - TOXENV: check 10 | PYTHON_HOME: C:\Python27 11 | PYTHON_VERSION: '2.7' 12 | PYTHON_ARCH: '32' 13 | 14 | - TOXENV: 'py27,codecov' 15 | TOXPYTHON: C:\Python27\python.exe 16 | PYTHON_HOME: C:\Python27 17 | PYTHON_VERSION: '2.7' 18 | PYTHON_ARCH: '32' 19 | 20 | - TOXENV: 'py27,codecov' 21 | TOXPYTHON: C:\Python27-x64\python.exe 22 | WINDOWS_SDK_VERSION: v7.0 23 | PYTHON_HOME: C:\Python27-x64 24 | PYTHON_VERSION: '2.7' 25 | PYTHON_ARCH: '64' 26 | 27 | - TOXENV: 'py34,codecov' 28 | TOXPYTHON: C:\Python34\python.exe 29 | PYTHON_HOME: C:\Python34 30 | PYTHON_VERSION: '3.4' 31 | PYTHON_ARCH: '32' 32 | 33 | - TOXENV: 'py34,codecov' 34 | TOXPYTHON: C:\Python34-x64\python.exe 35 | WINDOWS_SDK_VERSION: v7.1 36 | PYTHON_HOME: C:\Python34-x64 37 | PYTHON_VERSION: '3.4' 38 | PYTHON_ARCH: '64' 39 | 40 | - TOXENV: 'py35,codecov' 41 | TOXPYTHON: C:\Python35\python.exe 42 | PYTHON_HOME: C:\Python35 43 | PYTHON_VERSION: '3.5' 44 | PYTHON_ARCH: '32' 45 | 46 | - TOXENV: 'py35,codecov' 47 | TOXPYTHON: C:\Python35-x64\python.exe 48 | PYTHON_HOME: C:\Python35-x64 49 | PYTHON_VERSION: '3.5' 50 | PYTHON_ARCH: '64' 51 | 52 | init: 53 | - ps: echo $env:TOXENV 54 | - ps: ls C:\Python* 55 | install: 56 | - python -u ci\appveyor-bootstrap.py 57 | - '%PYTHON_HOME%\Scripts\virtualenv --version' 58 | - '%PYTHON_HOME%\Scripts\easy_install --version' 59 | - '%PYTHON_HOME%\Scripts\pip --version' 60 | - '%PYTHON_HOME%\Scripts\tox --version' 61 | test_script: 62 | - '%WITH_COMPILER% %PYTHON_HOME%\Scripts\tox' 63 | 64 | on_failure: 65 | - ps: dir "env:" 66 | - ps: get-content .tox\*\log\* 67 | artifacts: 68 | - path: dist\* 69 | 70 | ### To enable remote debugging uncomment this (also, see: http://www.appveyor.com/docs/how-to/rdp-to-build-worker): 71 | # on_finish: 72 | # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) 73 | -------------------------------------------------------------------------------- /ci/appveyor-bootstrap.py: -------------------------------------------------------------------------------- 1 | """ 2 | AppVeyor will at least have few Pythons around so there's no point of implementing a bootstrapper in PowerShell. 3 | 4 | This is a port of https://github.com/pypa/python-packaging-user-guide/blob/master/source/code/install.ps1 5 | with various fixes and improvements that just weren't feasible to implement in PowerShell. 6 | """ 7 | from __future__ import print_function 8 | from os import environ 9 | from os.path import exists 10 | from subprocess import check_call 11 | 12 | try: 13 | from urllib.request import urlretrieve 14 | except ImportError: 15 | from urllib import urlretrieve 16 | 17 | BASE_URL = "https://www.python.org/ftp/python/" 18 | GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py" 19 | GET_PIP_PATH = "C:\get-pip.py" 20 | URLS = { 21 | ("2.7", "64"): BASE_URL + "2.7.10/python-2.7.10.amd64.msi", 22 | ("2.7", "32"): BASE_URL + "2.7.10/python-2.7.10.msi", 23 | # NOTE: no .msi installer for 3.3.6 24 | ("3.3", "64"): BASE_URL + "3.3.3/python-3.3.3.amd64.msi", 25 | ("3.3", "32"): BASE_URL + "3.3.3/python-3.3.3.msi", 26 | ("3.4", "64"): BASE_URL + "3.4.3/python-3.4.3.amd64.msi", 27 | ("3.4", "32"): BASE_URL + "3.4.3/python-3.4.3.msi", 28 | ("3.5", "64"): BASE_URL + "3.5.0/python-3.5.0-amd64.exe", 29 | ("3.5", "32"): BASE_URL + "3.5.0/python-3.5.0.exe", 30 | } 31 | INSTALL_CMD = { 32 | # Commands are allowed to fail only if they are not the last command. Eg: uninstall (/x) allowed to fail. 33 | "2.7": [["msiexec.exe", "/L*+!", "install.log", "/qn", "/x", "{path}"], 34 | ["msiexec.exe", "/L*+!", "install.log", "/qn", "/i", "{path}", "TARGETDIR={home}"]], 35 | "3.3": [["msiexec.exe", "/L*+!", "install.log", "/qn", "/x", "{path}"], 36 | ["msiexec.exe", "/L*+!", "install.log", "/qn", "/i", "{path}", "TARGETDIR={home}"]], 37 | "3.4": [["msiexec.exe", "/L*+!", "install.log", "/qn", "/x", "{path}"], 38 | ["msiexec.exe", "/L*+!", "install.log", "/qn", "/i", "{path}", "TARGETDIR={home}"]], 39 | "3.5": [["{path}", "/quiet", "TargetDir={home}"]], 40 | } 41 | 42 | 43 | def download_file(url, path): 44 | print("Downloading: {} (into {})".format(url, path)) 45 | progress = [0, 0] 46 | 47 | def report(count, size, total): 48 | progress[0] = count * size 49 | if progress[0] - progress[1] > 1000000: 50 | progress[1] = progress[0] 51 | print("Downloaded {:,}/{:,} ...".format(progress[1], total)) 52 | 53 | dest, _ = urlretrieve(url, path, reporthook=report) 54 | return dest 55 | 56 | 57 | def install_python(version, arch, home): 58 | print("Installing Python", version, "for", arch, "bit architecture to", home) 59 | if exists(home): 60 | return 61 | 62 | path = download_python(version, arch) 63 | print("Installing", path, "to", home) 64 | success = False 65 | for cmd in INSTALL_CMD[version]: 66 | cmd = [part.format(home=home, path=path) for part in cmd] 67 | print("Running:", " ".join(cmd)) 68 | try: 69 | check_call(cmd) 70 | except Exception as exc: 71 | print("Failed command", cmd, "with:", exc) 72 | if exists("install.log"): 73 | with open("install.log") as fh: 74 | print(fh.read()) 75 | else: 76 | success = True 77 | if success: 78 | print("Installation complete!") 79 | else: 80 | print("Installation failed") 81 | 82 | 83 | def download_python(version, arch): 84 | for _ in range(3): 85 | try: 86 | return download_file(URLS[version, arch], "installer.exe") 87 | except Exception as exc: 88 | print("Failed to download:", exc) 89 | print("Retrying ...") 90 | 91 | 92 | def install_pip(home): 93 | pip_path = home + "/Scripts/pip.exe" 94 | python_path = home + "/python.exe" 95 | if exists(pip_path): 96 | print("pip already installed.") 97 | else: 98 | print("Installing pip...") 99 | download_file(GET_PIP_URL, GET_PIP_PATH) 100 | print("Executing:", python_path, GET_PIP_PATH) 101 | check_call([python_path, GET_PIP_PATH]) 102 | 103 | 104 | def install_packages(home, *packages): 105 | cmd = [home + "/Scripts/pip.exe", "install"] 106 | cmd.extend(packages) 107 | check_call(cmd) 108 | 109 | 110 | if __name__ == "__main__": 111 | install_python(environ['PYTHON_VERSION'], environ['PYTHON_ARCH'], environ['PYTHON_HOME']) 112 | install_pip(environ['PYTHON_HOME']) 113 | install_packages(environ['PYTHON_HOME'], "setuptools>=18.0.1", "wheel", "tox", "virtualenv>=13.1.0") 114 | -------------------------------------------------------------------------------- /ci/appveyor-download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Use the AppVeyor API to download Windows artifacts. 4 | 5 | Taken from: https://bitbucket.org/ned/coveragepy/src/tip/ci/download_appveyor.py 6 | # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 7 | # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt 8 | """ 9 | from __future__ import unicode_literals 10 | 11 | import argparse 12 | import os 13 | import requests 14 | import zipfile 15 | 16 | 17 | def make_auth_headers(): 18 | """Make the authentication headers needed to use the Appveyor API.""" 19 | path = os.path.expanduser("~/.appveyor.token") 20 | if not os.path.exists(path): 21 | raise RuntimeError( 22 | "Please create a file named `.appveyor.token` in your home directory. " 23 | "You can get the token from https://ci.appveyor.com/api-token" 24 | ) 25 | with open(path) as f: 26 | token = f.read().strip() 27 | 28 | headers = { 29 | 'Authorization': 'Bearer {}'.format(token), 30 | } 31 | return headers 32 | 33 | 34 | def download_latest_artifacts(account_project, build_id): 35 | """Download all the artifacts from the latest build.""" 36 | if build_id is None: 37 | url = "https://ci.appveyor.com/api/projects/{}".format(account_project) 38 | else: 39 | url = "https://ci.appveyor.com/api/projects/{}/build/{}".format(account_project, build_id) 40 | build = requests.get(url, headers=make_auth_headers()).json() 41 | jobs = build['build']['jobs'] 42 | print(u"Build {0[build][version]}, {1} jobs: {0[build][message]}".format(build, len(jobs))) 43 | 44 | for job in jobs: 45 | name = job['name'] 46 | print(u" {0}: {1[status]}, {1[artifactsCount]} artifacts".format(name, job)) 47 | 48 | url = "https://ci.appveyor.com/api/buildjobs/{}/artifacts".format(job['jobId']) 49 | response = requests.get(url, headers=make_auth_headers()) 50 | artifacts = response.json() 51 | 52 | for artifact in artifacts: 53 | is_zip = artifact['type'] == "Zip" 54 | filename = artifact['fileName'] 55 | print(u" {0}, {1} bytes".format(filename, artifact['size'])) 56 | 57 | url = "https://ci.appveyor.com/api/buildjobs/{}/artifacts/{}".format(job['jobId'], filename) 58 | download_url(url, filename, make_auth_headers()) 59 | 60 | if is_zip: 61 | unpack_zipfile(filename) 62 | os.remove(filename) 63 | 64 | 65 | def ensure_dirs(filename): 66 | """Make sure the directories exist for `filename`.""" 67 | dirname, _ = os.path.split(filename) 68 | if dirname and not os.path.exists(dirname): 69 | os.makedirs(dirname) 70 | 71 | 72 | def download_url(url, filename, headers): 73 | """Download a file from `url` to `filename`.""" 74 | ensure_dirs(filename) 75 | response = requests.get(url, headers=headers, stream=True) 76 | if response.status_code == 200: 77 | with open(filename, 'wb') as f: 78 | for chunk in response.iter_content(16 * 1024): 79 | f.write(chunk) 80 | else: 81 | print(u" Error downloading {}: {}".format(url, response)) 82 | 83 | 84 | def unpack_zipfile(filename): 85 | """Unpack a zipfile, using the names in the zip.""" 86 | with open(filename, 'rb') as fzip: 87 | z = zipfile.ZipFile(fzip) 88 | for name in z.namelist(): 89 | print(u" extracting {}".format(name)) 90 | ensure_dirs(name) 91 | z.extract(name) 92 | 93 | parser = argparse.ArgumentParser(description='Download artifacts from AppVeyor.') 94 | parser.add_argument('--id', 95 | metavar='PROJECT_ID', 96 | default='tboquet/python-alp', 97 | help='Project ID in AppVeyor.') 98 | parser.add_argument('build', 99 | nargs='?', 100 | metavar='BUILD_ID', 101 | help='Build ID in AppVeyor. Eg: master-123') 102 | 103 | if __name__ == "__main__": 104 | # import logging 105 | # logging.basicConfig(level="DEBUG") 106 | args = parser.parse_args() 107 | download_latest_artifacts(args.id, args.build) 108 | -------------------------------------------------------------------------------- /ci/appveyor-with-compiler.cmd: -------------------------------------------------------------------------------- 1 | :: To build extensions for 64 bit Python 3, we need to configure environment 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) 4 | :: 5 | :: To build extensions for 64 bit Python 2, we need to configure environment 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) 8 | :: 9 | :: 32 bit builds do not require specific environment configurations. 10 | :: 11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the 12 | :: cmd interpreter, at least for (SDK v7.0) 13 | :: 14 | :: More details at: 15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows 16 | :: http://stackoverflow.com/a/13751649/163740 17 | :: 18 | :: Author: Olivier Grisel 19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ 20 | SET COMMAND_TO_RUN=%* 21 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows 22 | SET WIN_WDK="c:\Program Files (x86)\Windows Kits\10\Include\wdf" 23 | ECHO SDK: %WINDOWS_SDK_VERSION% ARCH: %PYTHON_ARCH% 24 | 25 | 26 | IF "%PYTHON_VERSION%"=="3.5" ( 27 | IF EXIST %WIN_WDK% ( 28 | REM See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ 29 | REN %WIN_WDK% 0wdf 30 | ) 31 | GOTO main 32 | ) 33 | 34 | IF "%PYTHON_ARCH%"=="32" ( 35 | GOTO main 36 | ) 37 | 38 | SET DISTUTILS_USE_SDK=1 39 | SET MSSdk=1 40 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% 41 | CALL "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release 42 | 43 | :main 44 | 45 | ECHO Executing: %COMMAND_TO_RUN% 46 | CALL %COMMAND_TO_RUN% || EXIT 1 47 | -------------------------------------------------------------------------------- /ci/bootstrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import, print_function, unicode_literals 4 | 5 | import os 6 | import sys 7 | from os.path import exists 8 | from os.path import join 9 | from os.path import dirname 10 | from os.path import abspath 11 | 12 | 13 | if __name__ == "__main__": 14 | base_path = dirname(dirname(abspath(__file__))) 15 | print("Project path: {0}".format(base_path)) 16 | env_path = join(base_path, ".tox", "bootstrap") 17 | if sys.platform == "win32": 18 | bin_path = join(env_path, "Scripts") 19 | else: 20 | bin_path = join(env_path, "bin") 21 | if not exists(env_path): 22 | import subprocess 23 | print("Making bootstrap env in: {0} ...".format(env_path)) 24 | try: 25 | subprocess.check_call(["virtualenv", env_path]) 26 | except Exception: 27 | subprocess.check_call([sys.executable, "-m", "virtualenv", env_path]) 28 | print("Installing `jinja2` into bootstrap environment ...") 29 | subprocess.check_call([join(bin_path, "pip"), "install", "jinja2"]) 30 | activate = join(bin_path, "activate_this.py") 31 | exec(compile(open(activate, "rb").read(), activate, "exec"), dict(__file__=activate)) 32 | 33 | import jinja2 34 | 35 | import subprocess 36 | 37 | 38 | jinja = jinja2.Environment( 39 | loader=jinja2.FileSystemLoader(join(base_path, "ci", "templates")), 40 | trim_blocks=True, 41 | lstrip_blocks=True, 42 | keep_trailing_newline=True 43 | ) 44 | 45 | tox_environments = [ 46 | line.strip() 47 | for line in subprocess.check_output(['tox', '--listenvs'], universal_newlines=True).splitlines() 48 | ] 49 | tox_environments = [line for line in tox_environments if line not in ['clean', 'report', 'docs', 'check']] 50 | 51 | 52 | for name in os.listdir(join("ci", "templates")): 53 | with open(join(base_path, name), "w") as fh: 54 | fh.write(jinja.get_template(name).render(tox_environments=tox_environments)) 55 | print("Wrote {}".format(name)) 56 | print("DONE.") 57 | -------------------------------------------------------------------------------- /ci/templates/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: '3.5' 3 | sudo: false 4 | env: 5 | global: 6 | - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so 7 | - SEGFAULT_SIGNALS=all 8 | matrix: 9 | - TOXENV=check 10 | - TOXENV=docs 11 | {% for env in tox_environments %}{{ '' }} 12 | - TOXENV={{ env }},coveralls,codecov 13 | {% endfor %} 14 | 15 | before_install: 16 | - python --version 17 | - uname -a 18 | - lsb_release -a 19 | install: 20 | - pip install tox 21 | - virtualenv --version 22 | - easy_install --version 23 | - pip --version 24 | - tox --version 25 | script: 26 | - tox -v 27 | after_failure: 28 | - more .tox/log/* | cat 29 | - more .tox/*/log/* | cat 30 | before_cache: 31 | - rm -rf $HOME/.cache/pip/log 32 | cache: 33 | directories: 34 | - $HOME/.cache/pip 35 | notifications: 36 | email: 37 | on_success: never 38 | on_failure: always 39 | -------------------------------------------------------------------------------- /ci/templates/appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{branch}-{build}' 2 | build: off 3 | cache: 4 | - '%LOCALAPPDATA%\pip\Cache' 5 | environment: 6 | global: 7 | WITH_COMPILER: 'cmd /E:ON /V:ON /C .\ci\appveyor-with-compiler.cmd' 8 | matrix: 9 | - TOXENV: check 10 | PYTHON_HOME: C:\Python27 11 | PYTHON_VERSION: '2.7' 12 | PYTHON_ARCH: '32' 13 | 14 | {% for env in tox_environments %}{% if env.startswith(('py27', 'py34', 'py35')) %} 15 | - TOXENV: '{{ env }},codecov' 16 | TOXPYTHON: C:\Python{{ env[2:4] }}\python.exe 17 | PYTHON_HOME: C:\Python{{ env[2:4] }} 18 | PYTHON_VERSION: '{{ env[2] }}.{{ env[3] }}' 19 | PYTHON_ARCH: '32' 20 | 21 | - TOXENV: '{{ env }},codecov' 22 | TOXPYTHON: C:\Python{{ env[2:4] }}-x64\python.exe 23 | {%- if env.startswith(('py2', 'py33', 'py34')) %} 24 | 25 | WINDOWS_SDK_VERSION: v7.{{ '1' if env.startswith('py3') else '0' }} 26 | {%- endif %} 27 | 28 | PYTHON_HOME: C:\Python{{ env[2:4] }}-x64 29 | PYTHON_VERSION: '{{ env[2] }}.{{ env[3] }}' 30 | PYTHON_ARCH: '64' 31 | 32 | {% endif %}{% endfor %} 33 | init: 34 | - ps: echo $env:TOXENV 35 | - ps: ls C:\Python* 36 | install: 37 | - python -u ci\appveyor-bootstrap.py 38 | - '%PYTHON_HOME%\Scripts\virtualenv --version' 39 | - '%PYTHON_HOME%\Scripts\easy_install --version' 40 | - '%PYTHON_HOME%\Scripts\pip --version' 41 | - '%PYTHON_HOME%\Scripts\tox --version' 42 | test_script: 43 | - '%WITH_COMPILER% %PYTHON_HOME%\Scripts\tox' 44 | 45 | on_failure: 46 | - ps: dir "env:" 47 | - ps: get-content .tox\*\log\* 48 | artifacts: 49 | - path: dist\* 50 | 51 | ### To enable remote debugging uncomment this (also, see: http://www.appveyor.com/docs/how-to/rdp-to-build-worker): 52 | # on_finish: 53 | # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) 54 | -------------------------------------------------------------------------------- /docs/FirstSteps/index_first_steps.rst: -------------------------------------------------------------------------------- 1 | First steps with Alp 2 | -------------------- 3 | 4 | .. include :: why-alp.rst 5 | -------------------------------------------------------------------------------- /docs/FirstSteps/why-alp.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Why ALP? 3 | ======== 4 | 5 | We noticed that, when dealing with a Machine Learning problem, we sometime spend more time working on building a model, testing different architectures, comparing results than actually work on the ideas that will solve our problem. To help that process, we developed an Asynchronous Learning Platform (ALP) that uses the hardware (CPU+GPU) at a convenient capacity. That platform relies on independant services running on Docker containers. For this plateform to be easy to use, we built a convenient command line interface from wich you can easily launch, stop, remove, update and monitor a configuration. 6 | 7 | The whole system runs in the background so that the final user does not directly interact with the databases or the broker and just runs code in an usual Jupyter Notebook or from an application. You can also launch monitoring containers and access different dashboards to supervise all of your experiments. Moreover, it is possible to easily retrieve one of the trained model along with it's parameters at test time. 8 | 9 | ================================ 10 | What kind of models can you use? 11 | ================================ 12 | 13 | So far, the whole Keras_ neural network library is supported, as well as several models from the `scikit-learn`_ library. 14 | 15 | 16 | ============================================== 17 | What do I need to run ALP? What is inside ALP? 18 | ============================================== 19 | 20 | You need to use a machine running Linux to use ALP [1]_. 21 | ALP relies on Docker, RabbitMQ, Celery, MongoDB and nvidia-docker. It also supports interfacing with Fuel thus depends on Theano. It's implemented in Python. However since all services runs into Docker containers, your OS only needs Docker (and nvidia-docker if you want to use a NVIDIA GPU). 22 | 23 | All of this concepts and dependencies are explained later in the Setup and Userguide sections. 24 | 25 | 26 | ====================== 27 | How could ALP help me? 28 | ====================== 29 | 30 | We believe it might be useful for several applications such as: 31 | 32 | - **hyperparameters tuning**: for instance if you want to test several architectures on your neural network model, ALP can help you in dealing with the tedious task of logging all the architectures, parameters and results. They are all automatically stored in the databases and you just have to select the best model given the validation(s) you specified. 33 | - **fitting several models on several data streams**: you have data streams coming from a source and you want to fit a lot of online models, it is easy with ALP. With the support of Fuel generators, you could transform your data on the fly. The he learning is then done using the resources of the host and the parameters of the models are stored. You could even code an API that returns prediction to your data service. 34 | - **post analysis**: extract and explore the parameters of models given their score on several data blocks. Sometimes it could be helpful to visualise the successful set of parameters. 35 | 36 | - **model deployment in production**: when a model is trained, you can load it and deploy it instantly in production. 37 | 38 | 39 | 40 | .. [1] unfortunately at the time of the development, running MongoDB in a Windows Docker was not a possibility, but we will check out that soon. 41 | 42 | 43 | .. _Keras: http://keras.io/ 44 | .. _`scikit-learn`: http://scikit-learn.org/stable/ 45 | -------------------------------------------------------------------------------- /docs/Setup/config_CLI_launch.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | Launching ALP with the CLI 3 | ========================== 4 | 5 | To begin, we can generate a base configuration using ALP CLI. We choose to write configuration files on the host machine in order to be able to customize them easily afterwards. 6 | 7 | 8 | .. code-block:: bash 9 | 10 | alp --help 11 | 12 | Will provide you with some help about the command line interface. 13 | 14 | Generating a new configuration is as easy as: 15 | 16 | .. code-block:: bash 17 | 18 | alp --verbose genconfig --outdir=/path/to/a/directory 19 | 20 | 21 | The command will generate a base configuration with one controler, one scikit learn worker and one keras worker. 22 | We specify the output directory where we want to write the three configuration files. The first file :code:`alpdb.json` defines the connection between the database of models and other containers. The second file :code:`alpapp.json` defines the connections between the broker, its database and the other containers. The third file :code:`containers.json` defines all the containers of the architecture. The linking is automatically done and ALP will use the newly created files to launch a new instance. 23 | 24 | In any case, verify that the ports that you want to use are free for the broker to communicate with the monitoring containers and for the jupyter notebooks (if any) to run. 25 | 26 | To start all the services you can use :code:`alp service start`: 27 | 28 | .. code-block:: bash 29 | 30 | alp --verbose service start /path/to/a/directory 31 | 32 | You can then take a look at the status of the containers: 33 | 34 | .. code-block:: bash 35 | 36 | alp --verbose status /path/to/a/directory 37 | 38 | 39 | You should be able to access the Jupyter notebook on the port :code:`440` of the machine where you launched the services. 40 | -------------------------------------------------------------------------------- /docs/Setup/how_does_it_work.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | How does ALP works? 3 | =================== 4 | 5 | it seems complicated but fear not. 6 | -------------------------------------------------------------------------------- /docs/Setup/index_setup.rst: -------------------------------------------------------------------------------- 1 | How to setup ALP? 2 | ----------------- 3 | 4 | .. include :: requirements.rst 5 | .. include :: config_CLI_launch.rst 6 | -------------------------------------------------------------------------------- /docs/Setup/requirements.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Requirements 3 | ============ 4 | 5 | Because the whole architecture has a lot of components we use Docker_ to manage the platform and isolates the services. 6 | 7 | ALP has been developed to run on Ubuntu and has not been tested on other OS. 8 | 9 | You should first `install Docker`_ and `install nvidia-docker`_, then play a bit with docker (check if you can access your GPU with nvidia-docker). Then, you should be ready to install ALP. 10 | 11 | You can then get ALP via pip: 12 | 13 | .. code-block:: bash 14 | 15 | pip install git+git://github.com/tboquet/python-alp 16 | 17 | 18 | That will install ALP on your machine, and you will be able to launch it via the Command Line Interface. 19 | 20 | .. _Docker: https://www.docker.com/ 21 | .. _`nvidia-docker`: https://github.com/NVIDIA/nvidia-docker 22 | .. _`install Docker`: https://docs.docker.com/engine/installation/linux/ubuntulinux/ 23 | .. _`install nvidia-docker`: https://github.com/NVIDIA/nvidia-docker/wiki/Installation 24 | 25 | -------------------------------------------------------------------------------- /docs/Tutorials/index_tuto.rst: -------------------------------------------------------------------------------- 1 | Some tutorials and usecases 2 | --------------------------- 3 | 4 | .. include:: tuto0.rst 5 | .. include:: tuto1.rst 6 | .. include:: tuto2.rst 7 | .. include:: tuto3.rst 8 | .. include:: tuto4.rst -------------------------------------------------------------------------------- /docs/Tutorials/tuto0.rst: -------------------------------------------------------------------------------- 1 | =================================================================== 2 | Tutorial 0 : how to launch a basic experiment with keras or sklearn 3 | =================================================================== 4 | 5 | Step 1 : launching alp 6 | ~~~~~~~~~~~~~~~~~~~~~~ 7 | 8 | 9 | Follow the instructions in the setup section. 10 | We assume at this point that you have a Jupyter notebook running on the controller. 11 | 12 | 13 | Step 2 : defining your model 14 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 15 | 16 | You can follow step from `Step 2.1 : Keras`_ or from `Step 2.2 : Scikit learn`_ regarding if you want to use Keras_ or `scikit-learn`_. In both case we will do the right imports, get some classification data, put them in the ALP format and instanciate a model. The important thing at the end of step 2 is to have the :code:`data`, :code:`data_val` and :code:`model` objects and a model ready. 17 | 18 | Step 2.1 : Keras 19 | ++++++++++++++++ 20 | 21 | The following code gets some data and declares a simple artificial neural network with Keras: 22 | 23 | .. code-block:: python 24 | 25 | # we import numpy and fix the seed 26 | import numpy as np 27 | np.random.seed(1337) # for reproducibility 28 | 29 | # we import alp and Keras tools that we will use 30 | import alp 31 | from keras.datasets import mnist 32 | from keras.models import Sequential 33 | from keras.layers import Dense, Dropout, Activation, Flatten 34 | from keras.utils import np_utils 35 | import keras.backend as K 36 | from keras.optimizers import Adam 37 | from alp.appcom.ensembles import HParamsSearch 38 | 39 | # if you use tensorflow you must use this configuration 40 | # so that it doesn't use all of the GPU's memory (default config) 41 | import tensorflow as tf 42 | 43 | config = tf.ConfigProto(allow_soft_placement=True) 44 | config.gpu_options.allow_growth = True 45 | session = tf.Session(config=config) 46 | K.set_session(session) 47 | 48 | batch_size = 128 49 | nb_classes = 10 50 | nb_epoch = 12 51 | 52 | # input image dimensions 53 | img_rows, img_cols = 28, 28 54 | # number of features to use 55 | nb_filters = 32 56 | 57 | # the data, shuffled and split between train and test sets 58 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 59 | 60 | X_train = X_train.astype('float32') 61 | X_test = X_test.astype('float32') 62 | X_train /= 255 63 | X_test /= 255 64 | print('X_train shape:', X_train.shape) 65 | print(X_train.shape[0], 'train samples') 66 | print(X_test.shape[0], 'test samples') 67 | 68 | if K.image_dim_ordering() == 'th': 69 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 70 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 71 | input_shape = (1, img_rows, img_cols) 72 | else: 73 | X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) 74 | X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) 75 | input_shape = (img_rows, img_cols, 1) 76 | 77 | # convert class vectors to binary class matrices 78 | Y_train = np_utils.to_categorical(y_train, nb_classes) 79 | Y_test = np_utils.to_categorical(y_test, nb_classes) 80 | 81 | # put the data in the form ALP expects 82 | data, data_val = dict(), dict() 83 | data["X"] = X_train 84 | data["y"] = Y_train 85 | data_val["X"] = X_test 86 | data_val["y"] = Y_test 87 | 88 | # finally define and compile the model 89 | 90 | model = Sequential() 91 | 92 | model.add(Flatten(input_shape=input_shape)) 93 | model.add(Dense(nb_filters)) 94 | model.add(Activation('relu')) 95 | model.add(Dropout(0.25)) 96 | 97 | model.add(Dense(128)) 98 | model.add(Activation('relu')) 99 | model.add(Dropout(0.5)) 100 | model.add(Dense(nb_classes)) 101 | model.add(Activation('softmax')) 102 | 103 | model.compile(loss='categorical_crossentropy', 104 | optimizer='adadelta', 105 | metrics=['accuracy']) 106 | 107 | Note that we compile the model so that we also have information about the optimizer. 108 | 109 | 110 | Step 2.2 : Scikit learn 111 | +++++++++++++++++++++++ 112 | 113 | The following code gets some data and declares a simple logistic regression with :code:`scikit-learn`: 114 | 115 | .. code-block:: python 116 | 117 | # some imports 118 | from sklearn import cross_validation 119 | from sklearn import datasets 120 | from sklearn.linear_model import LogisticRegression 121 | 122 | # get some data 123 | iris = datasets.load_iris() 124 | X_train, X_test, y_train, y_test = cross_validation.train_test_split( 125 | iris.data, iris.target, test_size=0.2, random_state=0) 126 | 127 | # put the data in the form ALP expects 128 | data, data_val = dict(), dict() 129 | data["X"] = X_train 130 | data["y"] = y_train 131 | data_val["X"] = X_test 132 | data_val["y"] = y_test 133 | 134 | # define the model 135 | model = LogisticRegression() 136 | 137 | Please note that by default for the :code:`LogisticRegression`, the :code:`multi-class` parameter is set to OvR, that is to say one classifier per class. On the iris dataset, it means 3 classifiers. Unlike in Keras, the model is not compiled. So far, the measure of performance (validation metric) can only be the mean absolute error, but we will soon have several metrics working. 138 | 139 | 140 | Step 3 : fitting the model with ALP 141 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 142 | 143 | Step 3.1 : defining the Experiment 144 | ++++++++++++++++++++++++++++++++++ 145 | 146 | In ALP, the base object is the Experiment. 147 | An Experiment trains, predicts, saves and logs a model. 148 | So the first step is to import and define the Experiment object. 149 | 150 | .. code-block:: python 151 | 152 | from alp.appcom.core import Experiment 153 | 154 | expe = Experiment(model) 155 | 156 | 157 | Step 3.2 : fit the model 158 | ++++++++++++++++++++++++ 159 | 160 | You have access to two types of methods to fit the model. 161 | 162 | * The :code:`fit` and :code:`fit_gen` methods allows you to fit the model in the same process. 163 | 164 | For the :code:`scikit-learn` backend, you can launch the computation with the following command without extra arguments: 165 | 166 | .. code-block:: python 167 | 168 | expe.fit([data], [data_val]) 169 | 170 | Note that the :code:`data` and the :code:`data_val` are put in lists. 171 | 172 | 173 | With Keras you might want to specify the number of epochs and the batch_size, as you would have done to fit directly a Keras :code:`model` object. These arguments will flow trough to the final call. Note that they are not necessary for the fit, see the default arguments in the `Keras model doc `_. 174 | 175 | .. code-block:: python 176 | 177 | expe.fit([data], [data_val], nb_epoch=2, batch_size=batch_size) 178 | 179 | In both cases, the model is trained and automatically saved in the databases. 180 | 181 | * The :code:`fit_async` method sends the model to the broker container that will manage the training using the workers you defined in the setup phase. The commands are then straightforward: 182 | For the :code:`scikit-learn` backend: 183 | 184 | .. code-block:: python 185 | 186 | expe.fit_async([data], [data_val]) 187 | 188 | 189 | For the Keras backend you still need to provide extra arguments to override the defaults. 190 | 191 | .. code-block:: python 192 | 193 | expe.fit_async([data], [data_val], nb_epoch=2, batch_size=batch_size) 194 | 195 | In both cases, the model is also trained and automatically saved in the databases. 196 | 197 | 198 | 199 | Step 4 : Identifying and reusing the fitted model 200 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 201 | 202 | Once the experiment has been fitted, you can access the id of the model in the db and load it to make prediction or access the parameters in the current process. 203 | 204 | .. code-block:: python 205 | 206 | print(expe.mod_id) 207 | print(expe.data_id) 208 | 209 | expe.load_model(expe.mod_id, expe.data_id) 210 | 211 | 212 | It's then possible to make predictions using the loaded model. 213 | 214 | .. code-block:: python 215 | 216 | expe.predict(data['X']) 217 | 218 | You could of course provide new data to the model. You can also load the model in another experiment. 219 | 220 | .. _Keras: http://keras.io/ 221 | .. _`scikit-learn`: http://scikit-learn.org/stable/ 222 | -------------------------------------------------------------------------------- /docs/Tutorials/tuto1.rst: -------------------------------------------------------------------------------- 1 | =================================================================== 2 | Tutorial 1 : Simple Hyperparameter Tuning with ALP - sklearn models 3 | =================================================================== 4 | 5 | In this tutorial, we will get some data, build an Experiment with a 6 | simple model and tune the parameters of the model to get the best 7 | performance on validation data (by launching several experiments). We 8 | will then reuse this best model on unseen test data an check that it’s 9 | better than the untuned model. The whole thing will be using the 10 | asynchronous fit to highlight the capacity of ALP. 11 | 12 | 1 - Get some data 13 | ~~~~~~~~~~~~~~~~~ 14 | 15 | Let us start with the usual Iris dataset. Note that we will split the 16 | test set in 2 samples of size 25: the "validation" set to select the 17 | best model, and the "new" set to assess that the selected model was the 18 | best. 19 | 20 | .. code:: python 21 | 22 | from sklearn import datasets 23 | from sklearn.model_selection import train_test_split 24 | 25 | # get some data 26 | iris = datasets.load_iris() 27 | X_train, X_test, y_train, y_test = train_test_split( 28 | iris.data, iris.target, test_size=50, random_state=0) 29 | X_test_val, X_test_new, y_test_val, y_test_new = train_test_split( 30 | X_test, y_test, test_size=25, random_state=1) 31 | 32 | # put it in ALP expected format 33 | data, data_val, data_new = dict(), dict(), dict() 34 | data["X"], data["y"] = X_train, y_train 35 | data_val["X"], data_val["y"] = X_test_val, y_test_val 36 | data_new["X"], data_new["y"] = X_test_new, y_test_new 37 | 38 | 39 | 2 - Define an easy model and an ALP Experiment in a loop 40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | We will define a simple `LogisticRegression`_ to demostrate how to use ensembles of experiments in ALP. 43 | 44 | Let us first define an helper function. 45 | 46 | .. code:: python 47 | 48 | import random 49 | import sklearn.linear_model 50 | from alp.appcom.core import Experiment 51 | from operator import mul 52 | 53 | def grid_search(grid_dict, tries, model_type='LogisticRegression'): 54 | ''' This function randomly build Experiments with different hyperparameters and return the list of experiments. 55 | 56 | Args: 57 | grid_dict(dict) : hyperparameter grid from which to draw samples from 58 | tries(int) : number of model to be generated and tested 59 | async(bool) : should the fit be asynchronous 60 | model_type(string) : type of model to be tested (must be in sklearn.linear_model) 61 | 62 | Returns: 63 | expes(list): a list of Experiments. 64 | 65 | ''' 66 | 67 | expes = dict() 68 | 69 | # 1 - infos 70 | size_grid = reduce(mul, [len(v) for v in grid_dict.values()]) 71 | print("grid size: {}".format(size_grid)) 72 | print("tries: {}".format(tries)) 73 | 74 | 75 | # 2 - models loop 76 | for i in range(tries): 77 | select_params = {} 78 | key = [str(i)] 79 | for k, v in grid_dict.items(): 80 | value = random.choice(v) 81 | select_params[k] = value 82 | key += ['{}:{}'.format(k, value)] 83 | model = getattr(sklearn.linear_model, model_type)(**select_params) 84 | expe = Experiment(model) 85 | expes['_'.join(key)] = expe 86 | return expes 87 | 88 | 89 | Details of what this function does is: 90 | 1. display some infos about the size of the grid. 91 | 2. models loop: as many times as :code:`tries`, it selects randomly a point in the hyperparameter grid, creates an Experiment object with the model parametrized with this point. 92 | 93 | 94 | 95 | 3 - Run the grid search 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 97 | 98 | We use the :class:`~alp.appcom.ensembles.HParamsSearch` class to wrap several :class:`~alp.appcom.core.Experiment`. 99 | For now, because the grid is defined outside of the class, you have to pass a dictionnary mapping experiments name to :class:`~alp.appcom.core.Experiment`. 100 | 101 | .. code:: python 102 | 103 | from alp.appcom.ensemble import HParamsSearch 104 | 105 | # setting the seed for reproducibility: feel free to change it 106 | random.seed(12345) 107 | 108 | # defining the grid that will be explored 109 | grid_tol = [i*10**-j for i in (1,2,5) for j in (1, 2, 3, 4, 5, 6)] 110 | grid_C = [i*10**-j for i in (1,2,5) for j in (-2, -1, 1, 2, 3, 4, 5, 6)] 111 | grid = {'tol':grid_tol, 'C':grid_C} 112 | 113 | tries = 100 114 | 115 | expes = grid_search(grid, tries) 116 | 117 | # we define the ensemble with our experiments and a metric 118 | ensemble = HParamsSearch(experiments=expes, metric='score', op=np.max) 119 | 120 | results = ensemble.fit([data], [data_val]) 121 | 122 | label, predictions = ensemble.predict(data['X']) 123 | print('Best model: {}'.format(label) 124 | 125 | .. note:: 126 | 127 | You can also use the :meth:`~alp.appcom.ensembles.HParamsSearch.fit_async` method. 128 | 129 | 130 | .. parsed-literal:: 131 | 132 | grid size : 432 133 | tries : 100 134 | 135 | Best model: 52_C:100_tol:1e-06 136 | 137 | 138 | A word on the interpretation of the params: 139 | * the parameter C is the regularisation parameter of the Logistic Regression. A small value of C means a higher L2 constraint on w (the L2 constraint is not applied on C, the intercept parameter). A larger C can lead to overfitting, while a smaller value can lead to too much regularization. As such, it is the ideal candidate for automatic tuning. 140 | * the tol parameter is the tolerance for stopping criteria. Our experiments did not show a strong impact of this parameter unless it was set to high values. 141 | 142 | 4 - Validation that the best model is better than the untuned one 143 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 144 | 145 | ALP makes prediction with the loaded best model on the unseen data easy. 146 | The accuracy of the best model is decent (one mistake over 25 points). 147 | 148 | .. code:: python 149 | 150 | label, predictions = ensemble.predict(data_new['X']) 151 | print('Best model: {}'.format(label)) 152 | 153 | 154 | .. parsed-literal:: 155 | 156 | 0.96 157 | 158 | We can now create an untuned model (C=1 by default) and assess its precision on unseen data is lower that the tuned one. 159 | 160 | .. code:: python 161 | 162 | model = sklearn.linear_model.LogisticRegression() 163 | expe = Experiment(model) 164 | expe.fit([data], [data_val]) 165 | pred_worst_new = expe.predict(X_test_new) 166 | print(sklearn.metrics.accuracy_score(pred_worst_new, data_new["y"])) 167 | 168 | 169 | .. parsed-literal:: 170 | 171 | 0.88 172 | 173 | 174 | .. _LogisticRegression: http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html 175 | -------------------------------------------------------------------------------- /docs/Tutorials/tuto2.rst: -------------------------------------------------------------------------------- 1 | ===================================================== 2 | Tutorial 2 : Feed simple data to your ALP Experiment 3 | ===================================================== 4 | 5 | In this tutorial, we will build an Experiment with a simple model and 6 | fit it on various number of pieces of data. The aim of this tutorial is 7 | to explain the expected behaviour of ALP. 8 | 9 | 1 - Get some data 10 | ~~~~~~~~~~~~~~~~~ 11 | 12 | Let us start with the usual Iris dataset. 13 | 14 | .. code:: python 15 | 16 | from sklearn import datasets 17 | from sklearn.model_selection import train_test_split 18 | from sklearn.metrics import accuracy_score 19 | 20 | # get some data 21 | iris = datasets.load_iris() 22 | X_train, X_val, y_train, y_val = train_test_split( 23 | iris.data, iris.target, test_size=100, random_state=0) 24 | 25 | The data is then put in the form ALP expects: a dictionary with a field 26 | 'X' for the input and a field 'y' for the output. Note that the same is 27 | done for the validation data. 28 | 29 | .. code:: python 30 | 31 | data, data_val = dict(), dict() 32 | data["X"], data["y"] = X_train, y_train 33 | data_val["X"], data_val["y"] = X_val, y_val 34 | 35 | Let us shuffle the data some more. After these lines, 2 more datasets 36 | are created. 37 | 38 | .. code:: python 39 | 40 | more_data, some_more_data = dict(), dict() 41 | more_data["X"], some_more_data["X"], more_data["y"], some_more_data["y"] = train_test_split( 42 | iris.data, iris.target, test_size=75, random_state=1) 43 | 44 | 2 - Expected behaviour with sklearn 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | 2.1 - Defining the experiment and model 48 | +++++++++++++++++++++++++++++++++++++++ 49 | 50 | We then define a first simple sklearn logistic regression. 51 | 52 | .. code:: python 53 | 54 | from alp.appcom.core import Experiment 55 | from sklearn.linear_model import LogisticRegression 56 | 57 | lr = LogisticRegression() 58 | Expe = Experiment(lr) 59 | 60 | 2.2 - Fitting with one data set and one validation 61 | ++++++++++++++++++++++++++++++++++++++++++++++++++ 62 | 63 | Fitting one data set with one validation set is done this way: 64 | 65 | .. code:: python 66 | 67 | Expe.fit([data],[data_val]) 68 | 69 | 70 | .. parsed-literal:: 71 | 72 | ({'data_id': '1c59c0c562a5abdb84ad4f4a2c1868bf', 73 | 'metrics': {'iter': nan, 74 | 'score': [0.97999999999999998], 75 | 'val_score': [0.93999999999999995]}, 76 | 'model_id': '5cabd17bbac6934fb487fa7f69bbda6e', 77 | 'params_dump': u'/parameters_h5/5cabd17bbac6934fb487fa7f69bbda6e1c59c0c562a5abdb84ad4f4a2c1868bf.h5'}, 78 | None) 79 | 80 | 81 | 82 | Now let's take a look at the results: 83 | 84 | * there is a data\_id field: that is where the data is stored in the appropriate collection. 85 | 86 | * there is a model\_id field: this is where the model architecture is stored. 87 | 88 | * the param\_dump field is path of a file where the *attributes* of the fitted model are stored. 89 | 90 | * the metrics field is itself a dictionary with several attributes: 91 | * the iter field is here for compatibility with the keras backend. 92 | 93 | * the score field is model specific, you will have to look into sklearn's documentation to see what kind of metric is used. For the logistic regression, it is the accuracy. This field is then the accuracy of the fitted model on the training data. 94 | 95 | * the val\_score is the score on the validation data (it is still the accuracy in this case). 96 | 97 | 98 | You can access the full result of the experiment in the full\_res 99 | attribut of the object. 100 | 101 | .. code:: python 102 | 103 | Expe.full_res 104 | 105 | 106 | .. parsed-literal:: 107 | 108 | {'data_id': '1c59c0c562a5abdb84ad4f4a2c1868bf', 109 | 'metrics': {'iter': nan, 110 | 'score': [0.97999999999999998], 111 | 'val_score': [0.93999999999999995]}, 112 | 'model_id': '5cabd17bbac6934fb487fa7f69bbda6e', 113 | 'params_dump': u'/parameters_h5/5cabd17bbac6934fb487fa7f69bbda6e1c59c0c562a5abdb84ad4f4a2c1868bf.h5'} 114 | 115 | 116 | 117 | Predicting the "more\_data" on the model fitted on "data" is done this 118 | way. 119 | 120 | .. code:: python 121 | 122 | pred_on_more_data = Expe.predict(more_data["X"]) 123 | 124 | At this point, pred\_on\_more\_data is a vector of prediction. It's 125 | accuracy is obtained as follows: 126 | 127 | .. code:: python 128 | 129 | accuracy_score(pred_on_more_data,more_data["y"]) 130 | 131 | 132 | 133 | 134 | .. parsed-literal:: 135 | 136 | 0.95999999999999996 137 | 138 | 139 | 140 | Now you can check that the full\_res field of the Expe object was not 141 | modified during the predict call. 142 | 143 | .. code:: python 144 | 145 | Expe.full_res 146 | 147 | 148 | 149 | 150 | .. parsed-literal:: 151 | 152 | {'data_id': '1c59c0c562a5abdb84ad4f4a2c1868bf', 153 | 'metrics': {'iter': nan, 154 | 'score': [0.97999999999999998], 155 | 'val_score': [0.93999999999999995]}, 156 | 'model_id': '5cabd17bbac6934fb487fa7f69bbda6e', 157 | 'params_dump': u'/parameters_h5/5cabd17bbac6934fb487fa7f69bbda6e1c59c0c562a5abdb84ad4f4a2c1868bf.h5'} 158 | 159 | 160 | 161 | 2.3 - Fitting with one data set and no validation: 162 | ++++++++++++++++++++++++++++++++++++++++++++++++++ 163 | 164 | If you want to fit an experiment and don't have a validation set, you 165 | need to specify a None in the validation field. Note that all the fields 166 | have changed. Since the data has changed, the data\_id is different. The 167 | model created is a new one, so are the parameters. Finally, the metrics 168 | are different. 169 | 170 | .. code:: python 171 | 172 | Expe.fit([some_more_data],[None]) 173 | 174 | 175 | 176 | 177 | .. parsed-literal:: 178 | 179 | ({'data_id': '3554c1421fd9056e69c3cdf1b0ec8c3f', 180 | 'metrics': {'iter': nan, 'score': [0.95999999999999996], 'val_score': [nan]}, 181 | 'model_id': 'ceb5d5632334515c4ebbd72a256bd421', 182 | 'params_dump': u'/parameters_h5/ceb5d5632334515c4ebbd72a256bd4213554c1421fd9056e69c3cdf1b0ec8c3f.h5'}, 183 | None) 184 | 185 | 186 | 187 | As a result, the model actually stored in the Experiment at that time of 188 | the code execution is not the same as in 2.2. You can check that by 189 | predicting on the more\_data set and check that the score is not the 190 | same. 191 | 192 | .. code:: python 193 | 194 | pred_on_more_data = Expe.predict(more_data["X"]) 195 | accuracy_score(pred_on_more_data,more_data["y"]) 196 | 197 | 198 | 199 | 200 | .. parsed-literal:: 201 | 202 | 0.94666666666666666 203 | 204 | 205 | 206 | 2.4 - Fitting several dataset 207 | +++++++++++++++++++++++++++++ 208 | 209 | Now it's an important point since the behavior of sklearn differs from 210 | the keras one: if you feed different datasets to an Experiment with an 211 | sklearn model, ALP proceeds as such: 212 | 213 | * the first model is fitted, then the score and validation score are computed (on the first validation data, if provided). 214 | 215 | * the second model is fitted, then the score and validation score are computed (on the second validation data, if provided). 216 | 217 | * and so on 218 | 219 | As a result, the parameters data\_id, model\_id and param\_dumps in the 220 | full\_res field of the Experiment of the following line are the one of 221 | the second model. The metrics (score and val\_score) fields have a 222 | length of 2, one for each model. 223 | 224 | Note that you can specify a None as validation set if you don't want to 225 | validate a certain model. 226 | 227 | .. code:: python 228 | 229 | Expe.fit([data,more_data],[None,some_more_data]) 230 | 231 | 232 | .. parsed-literal:: 233 | 234 | ({'data_id': '2767007837282c3da5a86cfe41b57cce', 235 | 'metrics': {'iter': nan, 236 | 'score': [0.97999999999999998, 0.94666666666666666], 237 | 'val_score': [nan, 0.92000000000000004]}, 238 | 'model_id': 'c6f885968087dc779ce47f3f1af86a9b', 239 | 'params_dump': u'/parameters_h5/c6f885968087dc779ce47f3f1af86a9b2767007837282c3da5a86cfe41b57cce.h5'}, 240 | None) 241 | -------------------------------------------------------------------------------- /docs/Tutorials/tuto3.rst: -------------------------------------------------------------------------------- 1 | ==================================================== 2 | Tutorial 3 : Feed more data with Fuel or generators 3 | ==================================================== 4 | 5 | Because we aim at supporting online learning on streamed data, we think that generators support was a good start. 6 | We support Fuel_, a library that helps you to pre-process and yield chunks of data while being serializable. 7 | 8 | 1 - Create some data 9 | ~~~~~~~~~~~~~~~~~~~~ 10 | 11 | You can easily use Fuel_ iterators in an Experiment. 12 | We will first create some fake data. 13 | 14 | .. code-block:: python 15 | 16 | import fuel 17 | import numpy as np 18 | input_dim = 2 19 | nb_hidden = 4 20 | nb_class = 2 21 | batch_size = 5 22 | train_samples = 512 23 | test_samples = 128 24 | (X_tr, y_tr), (X_te, y_te) = get_test_data(nb_train=train_samples, 25 | nb_test=test_samples, 26 | input_shape=(input_dim,), 27 | classification=True, 28 | nb_class=nb_class) 29 | 30 | y_tr = np_utils.to_categorical(y_tr) 31 | y_te = np_utils.to_categorical(y_te) 32 | 33 | data, data_val = dict(), dict() 34 | 35 | X = np.concat([X_tr, X_te]) 36 | y = np.concat([y_tr, y_te]) 37 | 38 | inputs = [X, X] 39 | outputs = [y] 40 | 41 | 42 | 2 - Transform the data 43 | ~~~~~~~~~~~~~~~~~~~~~~ 44 | 45 | We then import an helper function that will convert our list of inputs to an HDF5 dataset. 46 | This dataset has a simple structure and we can divide it into multiple sets. 47 | 48 | 49 | .. code-block:: python 50 | 51 | # we save the mean and the scale (inverse of the standard deviation) 52 | # for each channel 53 | scale = 1.0 / inputs[0].std(axis=0) 54 | shift = - scale * inputs[0].mean(axis=0) 55 | 56 | # for 3 sets, we need 3 slices 57 | slices = [0, 256, 512] 58 | 59 | # and 3 names 60 | names = ['train', 'test', 'valid'] 61 | 62 | file_name = 'test_data_' 63 | file_path_f = to_fuel_h5(inputs, outputs, slices, names, file_name, '/data_generator') 64 | 65 | 66 | 3 - Build your generator 67 | ~~~~~~~~~~~~~~~~~~~~~~~~ 68 | 69 | The next step is to construct our Fuel generator using our dataset, a scheme and to transform the data so it's prepared for our model. 70 | 71 | 72 | .. code-block:: python 73 | 74 | train_set = H5PYDataset(file_path_f, 75 | which_sets=('train','test', 'valid')) 76 | 77 | scheme = SequentialScheme(examples=128, batch_size=32) 78 | 79 | data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme) 80 | 81 | stand_stream_train = ScaleAndShift(data_stream=data_stream_train, 82 | scale=scale, shift=shift, 83 | which_sources=('input_X',)) 84 | 85 | 86 | 4 - Build and wrap your model 87 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 88 | 89 | We finally build our model and wrap it in an experiment. 90 | 91 | 92 | .. code-block:: python 93 | 94 | inputs = Input(shape=(input_dim,), name='X') 95 | 96 | x = Dense(nb_hidden, activation='relu')(inputs) 97 | x = Dense(nb_hidden, activation='relu')(x) 98 | predictions = Dense(nb_class, activation='softmax')(x) 99 | 100 | model = Model(input=inputs, output=predictions) 101 | 102 | model.compile(loss='categorical_crossentropy', 103 | optimizer='rmsprop', 104 | metrics=['accuracy']) 105 | 106 | expe = Experiment(model) 107 | 108 | 109 | 5 - Train your model 110 | ~~~~~~~~~~~~~~~~~~~~ 111 | 112 | We can finally use the :meth:`alp.appcom.core.Experiment.fit_gen` method with our model and dataset. 113 | 114 | 115 | .. code-block:: python 116 | 117 | expe.fit_gen([gen], [val], nb_epoch=2, 118 | model=model, 119 | metrics=metrics, 120 | custom_objects=cust_objects, 121 | samples_per_epoch=128, 122 | nb_val_samples=128) 123 | 124 | You can also use :meth:`alp.appcom.core.Experiment.fit_gen_async` with the same function parameters if you have a worker running. 125 | 126 | .. code-block:: python 127 | 128 | expe.fit_gen([gen], [val], nb_epoch=2, 129 | model=model, 130 | metrics=metrics, 131 | custom_objects=cust_objects, 132 | samples_per_epoch=128, 133 | nb_val_samples=128) 134 | 135 | 136 | .. _Fuel: https://github.com/mila-udem/fuel 137 | -------------------------------------------------------------------------------- /docs/Tutorials/tuto4.rst: -------------------------------------------------------------------------------- 1 | ======================================================== 2 | Tutorial 4 : how to use custom layers for Keras with ALP 3 | ======================================================== 4 | 5 | Because serialization of complex Python objects is still a challenge we will present a way of sending a custom layer to a Keras model with ALP. 6 | 7 | 8 | 1 - Get a dataset 9 | ~~~~~~~~~~~~~~~~~ 10 | 11 | We will work with the CIFAR10 dataset available via Keras. 12 | 13 | .. code-block:: python 14 | 15 | from keras.datasets import cifar10 16 | from keras.preprocessing.image import ImageDataGenerator 17 | from keras.models import Sequential 18 | from keras.layers import Dense, Dropout, Activation, Flatten 19 | from keras.layers import Convolution2D, MaxPooling2D 20 | from keras.optimizers import SGD 21 | from keras.utils import np_utils 22 | 23 | from fuel.datasets.hdf5 import H5PYDataset 24 | from fuel.schemes import SequentialScheme 25 | from fuel.streams import DataStream 26 | from fuel.transformers import ScaleAndShift 27 | 28 | from alp.appcom.core import Experiment 29 | 30 | from alp.appcom.utils import to_fuel_h5 31 | 32 | import numpy as np 33 | 34 | nb_classes = 10 35 | nb_epoch = 25 36 | 37 | # input image dimensions 38 | img_rows, img_cols = 32, 32 39 | # the CIFAR10 images are RGB 40 | img_channels = 3 41 | 42 | # the data, shuffled and split between train and test sets 43 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 44 | 45 | X_train = X_train.astype('float32') 46 | X_test = X_test.astype('float32') 47 | X_train = X_train/255 48 | X_test = X_test/255 49 | 50 | batch_size = 128 51 | print('X_train shape:', X_train.shape) 52 | print(X_train.shape[0], 'train samples') 53 | print(X_test.shape[0], 'test samples') 54 | 55 | # convert class vectors to binary class matrices 56 | Y_train = np_utils.to_categorical(y_train, nb_classes) 57 | Y_test = np_utils.to_categorical(y_test, nb_classes) 58 | 59 | 60 | 2 - Build the generators 61 | ~~~~~~~~~~~~~~~~~~~~~~~~ 62 | 63 | We build two generators, one for training and one for validation. 64 | 65 | 66 | .. code-block:: python 67 | 68 | def dump_data(): 69 | inputs = [np.concatenate([X_train, X_test])] 70 | outputs = [np.concatenate([Y_train, Y_test])] 71 | 72 | file_name = 'test_data_dropout' 73 | scale = 1.0 / inputs[0].std(axis=0) 74 | shift = - scale * inputs[0].mean(axis=0) 75 | 76 | file_path, i_names, o_names = to_fuel_h5(inputs, outputs, [0, 50000], 77 | ['train', 'test'], 78 | file_name, 79 | '/data_generator') 80 | return file_path, scale, shift, i_names, o_names 81 | 82 | file_path, scale, shift, i_names, o_names = dump_data() 83 | 84 | 85 | def make_gen(set_to_gen, nb_examples): 86 | file_path_f = file_path 87 | names_select = i_names 88 | train_set = H5PYDataset(file_path_f, 89 | which_sets=set_to_gen) 90 | 91 | scheme = SequentialScheme(examples=nb_examples, batch_size=64) 92 | 93 | data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme) 94 | 95 | stand_stream_train = ScaleAndShift(data_stream=data_stream_train, 96 | scale=scale, shift=shift, 97 | which_sources=(names_select[-1],)) 98 | return stand_stream_train, train_set, data_stream_train 99 | 100 | train, data_tr, data_stream_tr = make_gen(('train',), 50000) 101 | test, data_te, data_stream_te = make_gen(('test',), 10000) 102 | 103 | 104 | 3 - Build your custom layer 105 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 106 | 107 | Imagine you want to reimplement a dropout layer. We could wrap it in a function that returns the object: 108 | 109 | 110 | .. code-block:: python 111 | 112 | def return_custom(): 113 | import keras.backend as K 114 | import numpy as np 115 | from keras.engine import Layer 116 | class Dropout_cust(Layer): 117 | '''Applies Dropout to the input. 118 | ''' 119 | def __init__(self, p, **kwargs): 120 | self.p = p 121 | if 0. < self.p < 1.: 122 | self.uses_learning_phase = True 123 | self.supports_masking = True 124 | super(Dropout_cust, self).__init__(**kwargs) 125 | 126 | def call(self, x, mask=None): 127 | if 0. < self.p < 1.: 128 | x = K.in_train_phase(K.dropout(x, level=self.p), x) 129 | return x 130 | 131 | def get_config(self): 132 | config = {'p': self.p} 133 | base_config = super(Dropout_cust, self).get_config() 134 | return dict(list(base_config.items()) + list(config.items())) 135 | return Dropout_cust 136 | 137 | 138 | 4 - Build you model 139 | ~~~~~~~~~~~~~~~~~~~ 140 | 141 | We then define our model and call our function to instanciate this custom layer. 142 | 143 | .. code-block:: python 144 | 145 | model = Sequential() 146 | 147 | model.add(Convolution2D(64, 3, 3, border_mode='same', 148 | input_shape=(img_channels, img_rows, img_cols))) 149 | model.add(Activation('relu')) 150 | model.add(Convolution2D(64, 3, 3)) 151 | model.add(Activation('relu')) 152 | model.add(MaxPooling2D(pool_size=(2, 2))) 153 | model.add(Dropout(0.25)) 154 | 155 | model.add(Convolution2D(128, 3, 3, border_mode='same')) 156 | model.add(Activation('relu')) 157 | model.add(Convolution2D(128, 3, 3)) 158 | model.add(Activation('relu')) 159 | model.add(MaxPooling2D(pool_size=(2, 2))) 160 | model.add(Dropout(0.25)) 161 | 162 | model.add(Flatten()) 163 | model.add(Dense(1024)) 164 | model.add(Activation('relu')) 165 | model.add(return_custom()(0.5)) 166 | model.add(Dense(nb_classes)) 167 | model.add(Activation('softmax')) 168 | 169 | sgd = SGD(lr=0.02, decay=1e-7, momentum=0.9, nesterov=True) 170 | model.compile(loss='categorical_crossentropy', 171 | optimizer=sgd, 172 | metrics=['accuracy']) 173 | 174 | 175 | 5 - Fit your model 176 | ~~~~~~~~~~~~~~~~~~ 177 | 178 | We then map the name of the custom object to our function that returns the custom object in a dictionnary. 179 | After wrapping the model in an :meth:`alp.appcom.core.Experiment`, we call the :meth:`alp.appcom.core.Experiment.fit_gen` method and send the custom_objects. 180 | 181 | .. code-block:: python 182 | 183 | custom_objects = {'Dropout_p': return_custom} 184 | 185 | expe = Experiment(model) 186 | 187 | results = expe.fit_gen_async([train], [test], nb_epoch=nb_epoch, 188 | model=model, 189 | metrics=['accuracy'], 190 | samples_per_epoch=50000, 191 | nb_val_samples=10000, 192 | verbose=2, 193 | custom_objects=custom_objects)) 194 | 195 | .. note:: 196 | 197 | Why do we wrap this class and all the dependencies? 198 | 199 | We use dill to be able to serialize object but unfortunatly, handling class with inheritance is not doable. It's also easier to pass the information about all the dependencies of the object. All the dependencies and your custom objects will be instanciated during the evaluation of the function so that it will be available in the :code:`__main__`. This way the information could be sent to workers without problems. 200 | -------------------------------------------------------------------------------- /docs/Userguide/Experiment.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Experiment 3 | ========== 4 | 5 | Experiment section 6 | -------------------------------------------------------------------------------- /docs/Userguide/Services.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Services 3 | ======== 4 | 5 | In this section we describe the different services (such as the Jupyter Notebook, RabbitMQ, the Models databases ...) running in separated Docker containers (resp. the Controller, the Broker, Mongos Models ...). As we tried to separate the services as much as possible, sometimes the container is assimilated to the service. 6 | 7 | Controller 8 | ~~~~~~~~~~ 9 | 10 | The Controller is the user endpoint of the library. By default, it serves a Jupyter notebook in which the user sends commands (such as `import alp`). You can also use it to run an application using ALP for either training or prediction. 11 | 12 | Mongo Models 13 | ~~~~~~~~~~~~ 14 | 15 | Mongo Models is a container that runs a MongoDB service in which the architecture of the models that are trained through ALP are saved. 16 | 17 | 18 | Mongo Results 19 | ~~~~~~~~~~~~~ 20 | 21 | Mongo Results is a container that runs a MongoDB service in wich the meta informations about a tasks is saved. 22 | 23 | Broker 24 | ~~~~~~ 25 | 26 | Also called scheduler in the architecture, it distributes the tasks and gather the results. 27 | 28 | Worker(s) 29 | ~~~~~~~~~ 30 | 31 | The workers run the tasks and send results to the MongoDB services. Each backend need at least one worker consuming from the right queue. 32 | 33 | Job monitor 34 | ~~~~~~~~~~~ 35 | 36 | You can plug several containers to monitor jobs. 37 | 38 | 39 | -------------------------------------------------------------------------------- /docs/Userguide/index_userguide.rst: -------------------------------------------------------------------------------- 1 | Userguide 2 | --------- 3 | 4 | In this user guide we explain with more details how to use the architecture and the main objects available in ALP. 5 | 6 | .. warning:: 7 | 8 | The userguide is currently under construction. Please visit this section in a few days. 9 | 10 | .. include :: Services.rst 11 | .. include :: Experiment.rst 12 | -------------------------------------------------------------------------------- /docs/_static/download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tboquet/python-alp/5c53d4a8bbcb197bbaebb84a3ac16d721de331c5/docs/_static/download.png -------------------------------------------------------------------------------- /docs/_static/last_bouquetin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tboquet/python-alp/5c53d4a8bbcb197bbaebb84a3ac16d721de331c5/docs/_static/last_bouquetin.png -------------------------------------------------------------------------------- /docs/_static/last_bouquetin.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 8 | 10 | 208 | 211 | 213 | 215 | 219 | 221 | 224 | 226 | 228 | 232 | 234 | 236 | 239 | 241 | 243 | 246 | 248 | 250 | 255 | 257 | 258 | 259 | 260 | -------------------------------------------------------------------------------- /docs/_templates/last_modified.html: -------------------------------------------------------------------------------- 1 |
    2 | {%- if last_updated %} 3 | {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %} 4 | {%- endif %} 5 |
6 | 7 |
    8 | v: {{ release }} 9 |
10 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import os 5 | import alabaster 6 | 7 | extensions = [ 8 | 'sphinx.ext.autodoc', 9 | 'sphinx.ext.autosummary', 10 | 'sphinx.ext.coverage', 11 | 'sphinx.ext.doctest', 12 | 'sphinx.ext.extlinks', 13 | 'sphinx.ext.ifconfig', 14 | 'sphinx.ext.napoleon', 15 | 'sphinx.ext.todo', 16 | 'sphinx.ext.viewcode', 17 | 'sphinxcontrib.httpdomain', 18 | 'sphinxcontrib.autohttp.flask', 19 | 'alabaster' 20 | ] 21 | if os.getenv('SPELLCHECK'): 22 | extensions += 'sphinxcontrib.spelling', 23 | spelling_show_suggestions = True 24 | spelling_lang = 'en_US' 25 | 26 | # on_rtd is whether we are on readthedocs.org 27 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 28 | 29 | source_suffix = '.rst' 30 | master_doc = 'index' 31 | project = u'ALP' 32 | year = '2016' 33 | author = u'Thomas Boquet and Paul Lemaître' 34 | copyright = '{0}, {1}'.format(year, author) 35 | version = release = u'0.3.0' 36 | 37 | pygments_style = 'sphinx' 38 | templates_path = ['_templates'] 39 | extlinks = { 40 | 'issue': ('https://github.com/tboquet/python-alp/issues/%s', '#'), 41 | 'pr': ('https://github.com/tboquet/python-alp/pull/%s', 'PR #'), 42 | } 43 | 44 | description = 'Schedule and save your machine learning experiments' 45 | # -- Option for HTML output ----------------------------------------------- 46 | 47 | html_static_path = ['_static'] 48 | html_theme_options = { 49 | 'logo': 'last_bouquetin.svg', 50 | 'logo_name': 'true', 51 | 'description': description, 52 | 'github_button': 'false' 53 | } 54 | 55 | # Custom sidebar templates, maps document names to template names. 56 | html_sidebars = { 57 | '**': [ 58 | 'about.html', 59 | 'navigation.html', 60 | 'relations.html', 61 | 'searchbox.html', 62 | 'donate.html', 63 | 'relations.html', 64 | 'last_modified.html' 65 | ] 66 | } 67 | 68 | html_show_sourcelink = True 69 | 70 | 71 | # Add any paths that contain custom themes here, relative to this directory. 72 | 73 | if not on_rtd: # only set the theme if we're building docs locally 74 | html_theme = 'alabaster' 75 | html_theme_path = [alabaster.get_path()] 76 | 77 | else: 78 | from mock import Mock as MagicMock 79 | import sys 80 | 81 | class Mock(MagicMock): 82 | @classmethod 83 | def __getattr__(cls, name): 84 | return Mock() 85 | 86 | # include the names of your minimal required packages here 87 | MOCK_MODULES = ['h5py'] 88 | sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) 89 | 90 | html_use_smartypants = True 91 | html_last_updated_fmt = '%b %d, %Y' 92 | html_split_index = False 93 | 94 | 95 | html_short_title = '%s-%s' % (project, version) 96 | 97 | napoleon_use_ivar = True 98 | napoleon_use_rtype = False 99 | napoleon_use_param = False 100 | 101 | keep_warnings = True 102 | 103 | add_module_names = False 104 | todo_include_todos = True 105 | 106 | mathjax_path = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' 107 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | Welcome on ALP's documentation! 3 | =============================== 4 | 5 | ALP helps you experiment with a lot of machine learning models quickly. It provides you with a simple way of scheduling and recording experiments. 6 | This library has been developped to work well with Keras and Scikit-learn but can suit a lot of other frameworks. 7 | 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | FirstSteps/index_first_steps 13 | Setup/index_setup 14 | Tutorials/index_tuto 15 | Userguide/index_userguide 16 | reference/index 17 | projectevolution/index_projectevolution 18 | 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` 26 | 27 | -------------------------------------------------------------------------------- /docs/projectevolution/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../AUTHORS.rst -------------------------------------------------------------------------------- /docs/projectevolution/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CHANGELOG.rst -------------------------------------------------------------------------------- /docs/projectevolution/contribute.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CONTRIBUTING.rst -------------------------------------------------------------------------------- /docs/projectevolution/index_projectevolution.rst: -------------------------------------------------------------------------------- 1 | Project evolution 2 | --------------------------- 3 | 4 | .. include:: contribute.rst 5 | .. include:: changelog.rst 6 | .. include:: authors.rst 7 | -------------------------------------------------------------------------------- /docs/reference/alp.appcom.rst: -------------------------------------------------------------------------------- 1 | alp.appcom package 2 | ================== 3 | 4 | .. automodule:: alp.appcom.core 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. automodule:: alp.appcom.ensembles 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | .. automodule:: alp.appcom.utils 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/reference/alp.backend.rst: -------------------------------------------------------------------------------- 1 | alp.backend package 2 | =================== 3 | 4 | .. automodule:: alp.backend.keras_backend 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | .. automodule:: alp.backend.sklearn_backend 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | -------------------------------------------------------------------------------- /docs/reference/alp.rst: -------------------------------------------------------------------------------- 1 | alp 2 | === 3 | 4 | .. testsetup:: 5 | 6 | from alp import * 7 | 8 | .. automodule:: alp 9 | :members: 10 | 11 | .. 12 | Subpackages 13 | ----------- 14 | 15 | .. toctree:: 16 | 17 | alp.appcom 18 | alp.backend 19 | alp.dbbackend 20 | 21 | alp.celapp module 22 | ------------------ 23 | 24 | .. automodule:: alp.celapp 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | alp.config module 30 | ----------------- 31 | 32 | .. automodule:: alp.config 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | -------------------------------------------------------------------------------- /docs/reference/index.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ========= 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | alp* 8 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | requests>=2.12.3 3 | keras 4 | pymongo 5 | scikit-learn 6 | dill 7 | mock 8 | celery 9 | sphinx>=1.3 10 | sphinx-py3doc-enhanced-theme 11 | sphinxcontrib-httpdomain>=1.4.0 12 | sphinxcontrib-napoleon>=0.5.0 13 | progressbar2 14 | -e . 15 | -------------------------------------------------------------------------------- /docs/spelling_wordlist.txt: -------------------------------------------------------------------------------- 1 | builtin 2 | builtins 3 | classmethod 4 | staticmethod 5 | classmethods 6 | staticmethods 7 | args 8 | kwargs 9 | callstack 10 | Changelog 11 | Indices 12 | -------------------------------------------------------------------------------- /install-prereqs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # launch the mongo model docker container 4 | 5 | echo "Building ..." 6 | echo "Launch the MongoDB models container ..." 7 | docker run --name mongo_models -v /opt/data/mongo_data/models:/data/db -d --restart=always mongo 8 | echo -e "\n" 9 | 10 | echo "Launch the MongoDB results container ..." 11 | # launch the mongo results docker container 12 | docker run --name mongo_results -v /opt/data/mongo_data/results:/data/db -d --restart=always mongo 13 | echo -e "\n" 14 | 15 | echo "Launch the Rabbitmq broker container ..." 16 | # start the rabbitmq broker 17 | docker run -d -v /etc/localtime:/etc/localtime:ro \ 18 | -v /opt/data/rabbitmq/dev/log:/dev/log -v /opt/data/rabbitmq:/var/lib/rabbitmq \ 19 | --name=rabbitmq_sched -p 8080:15672 -p 5672:5672\ 20 | --restart=always rabbitmq:3-management 21 | echo -e "\n" 22 | 23 | -------------------------------------------------------------------------------- /req/requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | numpy 3 | pymongo 4 | -e git+git://github.com/Theano/Theano.git#egg=package 5 | git+git://github.com/fchollet/keras.git 6 | celery 7 | dill 8 | -------------------------------------------------------------------------------- /req/requirements_cli.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | cython 3 | numpy 4 | pymongo 5 | pyyaml 6 | six>=1.10 7 | Click 8 | pandas 9 | docker-py 10 | -------------------------------------------------------------------------------- /req/requirements_first.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | cython 3 | numpy 4 | pymongo 5 | h5py 6 | Pillow 7 | pyyaml 8 | six>=1.10 9 | Click 10 | pandas 11 | docker-py 12 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | max-line-length = 140 6 | exclude = tests/*,*/migrations/*,*/south_migrations/* 7 | 8 | [tool:pytest] 9 | norecursedirs = 10 | .git 11 | .tox 12 | .env 13 | dist 14 | build 15 | south_migrations 16 | migrations 17 | python_files = 18 | test_*.py 19 | *_test.py 20 | tests.py 21 | addopts = 22 | -rxEfsw 23 | --strict 24 | --doctest-modules 25 | --doctest-glob=\*.rst 26 | --tb=short 27 | 28 | [isort] 29 | force_single_line=True 30 | line_length=120 31 | known_first_party=alp 32 | default_section=THIRDPARTY 33 | forced_separate=test_alp 34 | not_skip = __init__.py 35 | skip = migrations, south_migrations 36 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | 6 | import io 7 | import re 8 | from glob import glob 9 | from os.path import basename 10 | from os.path import dirname 11 | from os.path import join 12 | from os.path import splitext 13 | 14 | from setuptools import find_packages 15 | from setuptools import setup 16 | 17 | 18 | def read(*names, **kwargs): 19 | return io.open( 20 | join(dirname(__file__), *names), 21 | encoding=kwargs.get('encoding', 'utf8') 22 | ).read() 23 | 24 | 25 | setup( 26 | name='alp', 27 | version='0.3.0', 28 | license='BSD', 29 | description='Machine learning for teams', 30 | long_description='%s\n%s' % ( 31 | re.compile('^.. start-badges.*^.. end-badges', re.M | re.S).sub('', read('README.rst')), 32 | re.sub(':[a-z]+:`~?(.*?)`', r'``\1``', read('CHANGELOG.rst')) 33 | ), 34 | author='Thomas Boquet', 35 | author_email='thomas.boquet@hec.ca', 36 | url='https://github.com/tboquet/python-alp', 37 | packages=find_packages('src'), 38 | package_dir={'': 'src'}, 39 | py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')], 40 | include_package_data=True, 41 | zip_safe=False, 42 | classifiers=[ 43 | # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers 44 | 'Development Status :: 5 - Production/Stable', 45 | 'Intended Audience :: Developers', 46 | 'License :: OSI Approved :: BSD License', 47 | 'Operating System :: Unix', 48 | 'Operating System :: POSIX', 49 | 'Operating System :: Microsoft :: Windows', 50 | 'Programming Language :: Python', 51 | 'Programming Language :: Python :: 2.7', 52 | 'Programming Language :: Python :: 3', 53 | 'Programming Language :: Python :: 3.3', 54 | 'Programming Language :: Python :: 3.4', 55 | 'Programming Language :: Python :: 3.5', 56 | 'Programming Language :: Python :: Implementation :: CPython', 57 | 'Programming Language :: Python :: Implementation :: PyPy', 58 | # uncomment if you test on these interpreters: 59 | # 'Programming Language :: Python :: Implementation :: IronPython', 60 | # 'Programming Language :: Python :: Implementation :: Jython', 61 | # 'Programming Language :: Python :: Implementation :: Stackless', 62 | 'Topic :: Utilities', 63 | ], 64 | keywords=[ 65 | # eg: 'keyword1', 'keyword2', 'keyword3', 66 | ], 67 | install_requires=[ 68 | # eg: 'aspectlib==1.1.1', 'six>=1.7', 69 | ], 70 | extras_require={ 71 | # eg: 72 | # 'rst': ['docutils>=0.11'], 73 | # ':python_version=="2.6"': ['argparse'], 74 | }, 75 | entry_points={ 76 | 'console_scripts': [ 77 | 'alp = alp.cli:main', 78 | ] 79 | }, 80 | ) 81 | -------------------------------------------------------------------------------- /src/alp/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .appcom import * 3 | 4 | __all__ = ["Experiment"] 5 | 6 | __version__ = "0.3.0" 7 | -------------------------------------------------------------------------------- /src/alp/appcom/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import json 5 | import os 6 | 7 | from .core import * 8 | 9 | _alp_base_dir = os.path.expanduser('~') 10 | if not os.access(_alp_base_dir, os.W_OK): # pragma: no cover 11 | _alp_base_dir = '/tmp' 12 | 13 | 14 | _alp_dir = os.path.join(_alp_base_dir, '.alp') 15 | if not os.path.exists(_alp_dir): # pragma: no cover 16 | os.makedirs(_alp_dir) 17 | 18 | # Defaults 19 | 20 | # App config 21 | _broker = 'amqp://guest:guest@rabbitmq:5672//' 22 | _backend = 'mongodb://mongo_r:27017' 23 | 24 | # Parameters 25 | _path_h5 = '/parameters_h5/' 26 | 27 | if os.getenv("TEST_MODE") == "ON": # pragma: no cover 28 | _backend = 'mongodb://127.0.0.1:27018' 29 | _broker = 'amqp://guest:guest@localhost:5672//' 30 | 31 | elif os.getenv("WORKER") == "TRUE": # pragma: no cover 32 | _backend = 'mongodb://mongo_r:27017' # pragma: no cover 33 | _config_path = os.path.expanduser(os.path.join(_alp_dir, 'alpapp.json')) 34 | 35 | if os.path.exists(_config_path): # pragma: no cover 36 | _config = json.load(open(_config_path)) 37 | _broker = _config.get('broker', 'amqp://guest:guest@rabbitmq:5672//') 38 | _backend = _config.get('backend', 'mongodb://mongo_r:27017') 39 | _path_h5 = _config.get('path_h5', '/parameters_h5/') 40 | 41 | # save config file 42 | _config = {'broker': _broker, 43 | 'backend': _backend, 44 | 'path_h5': _path_h5} 45 | 46 | with open(_config_path, 'w') as f: 47 | f.write(json.dumps(_config, indent=4)) 48 | 49 | __all__ = ["Experiment"] 50 | -------------------------------------------------------------------------------- /src/alp/appcom/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. codeauthor:: Thomas Boquet thomas.boquet@r2.ca 3 | 4 | A simple module to perform training and prediction of models 5 | ============================================================ 6 | 7 | Using `celery `_, this module helps to schedule 8 | the training of models if the users send enough models in a short 9 | period of time. 10 | 11 | """ 12 | 13 | import copy 14 | import sys 15 | 16 | from six.moves import zip as szip 17 | from ..appcom.utils import background 18 | from ..backend import common as cm 19 | from ..dbbackend import get_models 20 | from .utils import get_nb_chunks 21 | from .utils import init_backend 22 | from .utils import pickle_gen 23 | from .utils import switch_backend 24 | 25 | 26 | class Experiment(object): 27 | """An Experiment trains, predicts, saves and logs a model 28 | 29 | Attributes: 30 | model(model): the model used in the experiment 31 | metrics(list): a list of callables 32 | """ 33 | 34 | def __init__(self, model=None, metrics=None, verbose=0): 35 | self.model = model 36 | self.trained = False 37 | self.verbose = verbose 38 | self.metrics = metrics 39 | if model is not None: 40 | backend, backend_name, backend_version = init_backend(model) 41 | self.backend = backend 42 | self.backend_name = backend_name 43 | self.backend_version = backend_version 44 | self.model_dict = self.backend.to_dict_w_opt(self.model, 45 | self.metrics) 46 | else: 47 | self.backend = None 48 | self.backend_name = None 49 | self.backend_version = None 50 | self.model_dict = None 51 | 52 | @property 53 | def model_dict(self): 54 | return self.__model_dict 55 | 56 | @model_dict.setter 57 | def model_dict(self, model_dict): 58 | if isinstance(model_dict, dict) or model_dict is None: 59 | self.__model_dict = dict() 60 | self.__model_dict['model_arch'] = model_dict 61 | self.mod_id = None 62 | self.params_dump = None 63 | self.data_id = None 64 | else: 65 | self.model = model_dict 66 | backend, backend_name, backend_version = init_backend(model_dict) 67 | self.backend = backend 68 | self.backend_name = backend_name 69 | self.backend_version = backend_version 70 | self.__model_dict['model_arch'] = self.backend.to_dict_w_opt( 71 | self.model, self.metrics) 72 | self.mod_id = None 73 | self.params_dump = None 74 | self.data_id = None 75 | 76 | @property 77 | def params_dump(self): 78 | return self.__params_dump 79 | 80 | @params_dump.setter 81 | def params_dump(self, params_dump): 82 | self.__model_dict['params_dump'] = params_dump 83 | self.__params_dump = params_dump 84 | 85 | @property 86 | def mod_id(self): 87 | return self.__mod_id 88 | 89 | @mod_id.setter 90 | def mod_id(self, mod_id): 91 | self.__model_dict['mod_id'] = mod_id 92 | self.__mod_id = mod_id 93 | 94 | @property 95 | def data_id(self): 96 | return self.__data_id 97 | 98 | @data_id.setter 99 | def data_id(self, data_id): 100 | self.__model_dict['data_id'] = data_id 101 | self.__data_id = data_id 102 | 103 | def fit(self, data, data_val, model=None, *args, **kwargs): 104 | """Build and fit a model given data and hyperparameters 105 | 106 | Args: 107 | data(list(dict)): a list of dictionnaries mapping inputs and 108 | outputs names to numpy arrays for training. 109 | data_val(list(dict)): a list of dictionnaries mapping inputs and 110 | outputs names to numpy arrays for validation. 111 | model(model, optionnal): a model from a supported backend 112 | 113 | Returns: 114 | the id of the model in the db, the id of the data in the db and 115 | path to the parameters. 116 | """ 117 | res = self._prepare_fit(model, data, data_val, generator=False, 118 | delay=False, *args, **kwargs) 119 | return res 120 | 121 | def fit_async(self, data, data_val, model=None, 122 | *args, **kwargs): 123 | """Build and fit asynchronously a model given data and hyperparameters 124 | 125 | Args: 126 | data(list(dict)): a list of dictionnaries mapping inputs and 127 | outputs names to numpy arrays for training. 128 | data_val(list(dict)): a list of dictionnaries mapping inputs and 129 | outputs names to numpy arrays for validation. 130 | model(model, optionnal): a model from a supported backend 131 | 132 | Returns: 133 | the id of the model in the db, the id of the data in the db and a 134 | path to the parameters. 135 | """ 136 | res = self._prepare_fit(model, data, data_val, generator=False, 137 | delay=True, *args, **kwargs) 138 | 139 | return res 140 | 141 | def fit_gen(self, gen_train, data_val, 142 | model=None, *args, **kwargs): 143 | """Build and fit asynchronously a model given data and hyperparameters 144 | 145 | Args: 146 | gen_train(list(dict)): a list of generators. 147 | data_val(list(dict)): a list of dictionnaries mapping inputs and 148 | outputs names to numpy arrays or generators for validation. 149 | model(model, optionnal): a model from a supported backend 150 | 151 | Returns: 152 | the id of the model in the db, the id of the data in the db and a 153 | path to the parameters. 154 | """ 155 | res = self._prepare_fit(model, gen_train, data_val, generator=True, 156 | delay=False, *args, **kwargs) 157 | 158 | return res 159 | 160 | def fit_gen_async(self, gen_train, data_val, 161 | model=None, *args, **kwargs): 162 | """Build and fit asynchronously a model given generator(s) and 163 | hyperparameters. 164 | 165 | Args: 166 | gen_train(list(dict)): a list of generators. 167 | data_val(list(dict)): a list of dictionnaries mapping inputs and 168 | outputs names to numpy arrays or generators for validation. 169 | model(model, optionnal): a model from a supported backend 170 | 171 | Returns: 172 | the id of the model in the db, the id of the data in the db and a 173 | path to the parameters. 174 | """ 175 | res = self._prepare_fit(model, gen_train, data_val, generator=True, 176 | delay=True, *args, **kwargs) 177 | return res 178 | 179 | def load_model(self, mod_id=None, data_id=None): 180 | """Load a model from the database form it's mod_id and data_id 181 | 182 | Args: 183 | mod_id(str): the id of the model in the database 184 | data_id(str): the id of the data in the database""" 185 | if mod_id is None and data_id is None: 186 | mod_id = self.mod_id 187 | data_id = self.data_id 188 | assert mod_id is not None, 'You must provide a model id' 189 | assert data_id is not None, 'You must provide a data id' 190 | models = get_models() 191 | model_db = models.find_one({'mod_id': mod_id, 'data_id': data_id}) 192 | self._switch_backend(model_db) 193 | self.model_dict = model_db['model_arch'] 194 | self.params_dump = model_db['params_dump'] 195 | self.mod_id = model_db['mod_id'] 196 | self.data_id = model_db['data_id'] 197 | self.full_res = None 198 | self.async_res = None 199 | self.trained = True 200 | 201 | return self 202 | 203 | def predict(self, data, *args, **kwargs): 204 | """Make predictions given data 205 | 206 | Args: 207 | data(np.array): 208 | 209 | Returns: 210 | an np.array of predictions""" 211 | return self._predict(data, async=False, *args, **kwargs) 212 | 213 | def predict_async(self, data, *args, **kwargs): 214 | """Make predictions given data 215 | 216 | Args: 217 | data(np.array): 218 | 219 | Returns: 220 | an np.array of predictions""" 221 | return self._predict(data, async=True, *args, **kwargs) 222 | 223 | def _predict(self, data, async, *args, **kwargs): 224 | """Make predictions given data 225 | 226 | Args: 227 | data(np.array): 228 | 229 | Returns: 230 | an np.array of predictions""" 231 | if self.trained: 232 | if async: 233 | return self.backend.predict.delay( 234 | copy.deepcopy(self.model_dict), data, async, *args, 235 | **kwargs) 236 | else: 237 | return self.backend.predict( 238 | copy.deepcopy(self.model_dict), data, async, *args, 239 | **kwargs) 240 | else: 241 | raise Exception("You must have a trained model" 242 | "in order to make predictions") 243 | 244 | def _check_compile(self, model, kwargs_m): 245 | """Check if we have to recompile and reserialize the model 246 | 247 | Args: 248 | model(a supported model): the model sent (could be None). 249 | kwargs_m(dict): the keyword arguments passed to the wrapper 250 | """ 251 | _recompile = False 252 | if model is not None: 253 | self.model = model 254 | _recompile = True 255 | if "metrics" in kwargs_m: 256 | self.metrics = kwargs_m.pop("metrics") 257 | _recompile = True 258 | 259 | if _recompile is True: 260 | self.model_dict = self.backend.to_dict_w_opt(self.model, 261 | self.metrics) 262 | 263 | if self.model is None: 264 | raise Exception('No model provided') 265 | 266 | def _switch_backend(self, model_db): 267 | """A utility function to switch backend when loading a model 268 | 269 | Args: 270 | model_db(dict): the dictionnary stored in the database 271 | """ 272 | if model_db['backend_name'] != self.backend_name: 273 | backend = switch_backend(model_db['backend_name']) 274 | self.backend_name = backend.__name__ 275 | self.backend_version = None 276 | if self.backend_name == 'keras': 277 | from ..backend import keras_backend 278 | self.backend = keras_backend 279 | elif self.backend_name == 'sklearn': 280 | from ..backend import sklearn_backend 281 | self.backend = sklearn_backend 282 | if hasattr(backend, '__version__'): 283 | check = self.backend_version != backend.__version__ 284 | self.backend_version = backend.__version__ 285 | if check and self.verbose > 0: # pragma: no cover 286 | sys.stderr.write('Warning: the backend versions' 287 | 'do not match.\n') # pragma: no cover 288 | 289 | def _check_serialize(self, kwargs): 290 | """Serialize the object mapped in the kwargs 291 | 292 | Args: 293 | kwargs(dict): keyword arguments 294 | 295 | Returns: 296 | kwargs 297 | """ 298 | for k in kwargs: 299 | if k in self.backend.TO_SERIALIZE: 300 | if isinstance(kwargs[k], dict): 301 | kwargs[k] = {j: self.backend.serialize(kwargs[k][j]) 302 | for j in kwargs[k]} 303 | elif isinstance(kwargs[k], list): 304 | kwargs[k] = [self.backend.serialize(j) 305 | for j in kwargs[k]] 306 | else: 307 | raise TypeError('Your iterable should be a dict or a list') 308 | return kwargs 309 | 310 | def _prepare_message(self, model, data, data_val, kwargs, generator=False): 311 | """Prepare the elements to be passed to the backend 312 | 313 | Args: 314 | model(supported model): the model to be prepared 315 | data(list): the list of dicts or generators used for training 316 | data_val(list): the list of dicts or generator used for validation 317 | 318 | Returns: 319 | the transformed data object, the transformed validation data object, 320 | the data_hash 321 | """ 322 | self._check_compile(model, kwargs) 323 | kwargs = self._check_serialize(kwargs) 324 | gen_setup = [] 325 | 326 | if generator: 327 | nb_data_chunks = [get_nb_chunks(d) for d in data] 328 | nb_data_val_chunks = [get_nb_chunks(dv) for dv in data_val] 329 | for d_c, dv_c in szip(nb_data_chunks, nb_data_val_chunks): 330 | is_val_one = dv_c == 1 331 | is_train_one = d_c == 1 332 | 333 | if dv_c is not None: 334 | # many to one 335 | if d_c > dv_c and is_val_one: 336 | gen_setup.append(1) 337 | 338 | # one to many 339 | elif d_c < dv_c and is_train_one: 340 | gen_setup.append(2) 341 | 342 | # equal 343 | elif d_c == dv_c: 344 | gen_setup.append(3) 345 | 346 | else: # pragma: no cover 347 | Exception('Nb batches in train generator and' 348 | 'validation generator not compatible') 349 | 350 | data_hash = cm.create_gen_hash(data) 351 | data, data_val = pickle_gen(data, data_val) 352 | else: 353 | data_hash = cm.create_data_hash(data) 354 | 355 | return data, data_val, data_hash, gen_setup 356 | 357 | def _prepare_fit(self, model, data, data_val, 358 | generator=False, delay=False, 359 | *args, **kwargs): 360 | """Prepare the model and the datasets and fit the model 361 | 362 | Args: 363 | model(a supported model): the model to send 364 | data(dict or generator): the training data 365 | data_val(dict or generator): the validation data 366 | generator(bool): if True, transforms the generators 367 | delay(bool): if True, fits the model in asynchronous mode 368 | """ 369 | 370 | data, data_val, data_hash, size_gen = self._prepare_message(model, 371 | data, 372 | data_val, 373 | kwargs, 374 | generator) 375 | 376 | f = self.backend.fit 377 | if delay: 378 | f = self.backend.fit.delay 379 | res = f(self.backend_name, 380 | self.backend_version, 381 | copy.deepcopy(self.model_dict), 382 | data, data_hash, data_val, 383 | size_gen=size_gen, 384 | generator=generator, 385 | *args, **kwargs) 386 | return self._handle_results(res, delay) 387 | 388 | def _handle_results(self, res, delay): 389 | """Modify the Experiment given the results received from the worker 390 | 391 | Args: 392 | res(celery result or dict): the results returned by the model 393 | delay(bool): if True the result is an async celery result 394 | 395 | Returns: 396 | the results and the thread used to handle the results""" 397 | if delay: 398 | thread = self._get_results(res) 399 | else: 400 | self.mod_id = res['model_id'] 401 | self.data_id = res['data_id'] 402 | self.params_dump = res['params_dump'] 403 | 404 | self.trained = True 405 | self.full_res = res 406 | thread = None 407 | return res, thread 408 | 409 | @background 410 | def _get_results(self, res): 411 | """Handle the results of an asynchronous task 412 | 413 | Args: 414 | res(async result): result of an asynchronous task""" 415 | self.async_res = res 416 | self.full_res = res.wait() # pragma: no cover 417 | self.trained = True # pragma: no cover 418 | self.mod_id = self.full_res['model_id'] # pragma: no cover 419 | self.data_id = self.full_res['data_id'] # pragma: no cover 420 | self.params_dump = self.full_res['params_dump'] # pragma: no cover 421 | if self.verbose > 0: # pragma: no cover 422 | print("Result {} | {} ready".format( 423 | self.mod_id, self.data_id)) # pragma: no cover 424 | -------------------------------------------------------------------------------- /src/alp/appcom/ensembles.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ensembles module 3 | ================ 4 | """ 5 | 6 | import warnings 7 | from time import time 8 | 9 | import numpy as np 10 | import pandas as pd 11 | from progressbar import ETA 12 | from progressbar import Bar 13 | from progressbar import DynamicMessage 14 | from progressbar import FormatLabel 15 | from progressbar import Percentage 16 | from progressbar import ProgressBar 17 | from progressbar import SimpleProgress 18 | 19 | 20 | def get_best(experiments, metric, op, partial=False): 21 | """Helper function for manipulation of a list of experiments 22 | 23 | In case of equality in the metric, the behaviour of op_arg determines the 24 | result. 25 | 26 | Args: 27 | experiments(list): a list of experiments 28 | metric(str): the name of a metric used in the experiments 29 | op (function): operation to perform with the metric (optional) 30 | partial(bool): if True will pass an experiment without result. Raise 31 | an error otherwise. 32 | """ 33 | best_perf_expes = [] 34 | list_experiments = [] 35 | list_keys = [] 36 | not_ready = False 37 | for k, expe in experiments.items(): 38 | if not hasattr(expe, 'full_res'): # pragma: no cover 39 | if not partial: 40 | raise Exception('Results are not ready') 41 | else: 42 | not_ready = True 43 | else: 44 | best_perf_expes.append(op(expe.full_res['metrics'][metric])) 45 | list_experiments.append(expe) 46 | list_keys.append(k) 47 | 48 | if not_ready is True: # pragma: no cover 49 | warnings.warn('Some results are not ready: Using the best available' 50 | ' model.') 51 | 52 | if len(list_experiments) == 0: 53 | raise Exception('No result is ready yet') 54 | 55 | ar_expes = np.array(list_experiments) 56 | ar_keys = np.array(list_keys) 57 | perf_array = np.array(best_perf_expes) 58 | perf_nans = np.isnan(perf_array) 59 | if (1 - perf_nans).sum() == 0: 60 | raise Exception('The selected metric evaluations are all nans') 61 | 62 | best_perf_expes = perf_array[perf_nans == False] # NOQA 63 | bool_choice = op(best_perf_expes) == np.array(best_perf_expes) 64 | best = ar_expes[bool_choice] # NOQA 65 | best_key = ar_keys[bool_choice] 66 | return best[0], best_key[0] 67 | 68 | 69 | widgets = [Percentage(), ' ', 70 | SimpleProgress(), ' ', 71 | Bar(marker='=', left='[', right=']'), 72 | ' ', FormatLabel('in: %(elapsed)s'), ' ', 73 | ETA(), ' | ', 'job/', DynamicMessage('s')] 74 | 75 | 76 | class Ensemble(object): 77 | 78 | """Base class to build experiments containers able to execute batch 79 | sequences of action. Must implement the `fit`, `fit_gen`, `fit_async` 80 | `fit_gen_async` methods 81 | 82 | Args: 83 | experiments(dict or list): experiments to be wrapped. If a dictionnary 84 | is passed, it should map experiment names to experiments. 85 | """ 86 | def __init__(self, experiments): 87 | if isinstance(experiments, list): 88 | experiments = {i: v for i, v in enumerate(experiments)} 89 | if not isinstance(experiments, dict): # pragma: no cover 90 | raise TypeError('You must pass either an experiments dict or list') 91 | self.experiments = experiments 92 | 93 | def fit(self, data, data_val, *args, **kwargs): 94 | raise NotImplementedError 95 | 96 | def fit_gen(self, data, data_val, *args, **kwargs): 97 | raise NotImplementedError 98 | 99 | def fit_async(self, data, data_val, *args, **kwargs): 100 | raise NotImplementedError 101 | 102 | def fit_gen_async(self, data, data_val, *args, **kwargs): 103 | raise NotImplementedError 104 | 105 | def predict(self, data, data_val, *args, **kwargs): 106 | raise NotImplementedError 107 | 108 | def summary(self, metrics, verbose=False): 109 | raise NotImplementedError 110 | 111 | def plt_summary(self): 112 | raise NotImplementedError 113 | 114 | 115 | class HParamsSearch(Ensemble): 116 | """Hyper parameters search class 117 | 118 | Train several experiments with different hyperparameters and save results. 119 | Wraps the training process so that it's possible to access results easily. 120 | 121 | Args: 122 | experiments(dict or list): experiments to be wrapped. If a dictionnary 123 | is passed, it should map experiment names to experiments 124 | hyperparams(dict): a dict of hyperparameters 125 | metric(str): the name of a metric used in the experiments 126 | op(str): an operator to select a model 127 | 128 | """ 129 | def __init__(self, experiments, hyperparams=None, metric=None, op=None): 130 | super(HParamsSearch, self).__init__(experiments=experiments) 131 | self.hyperparams = hyperparams 132 | self.metric = metric 133 | self.op = op 134 | self.results = dict() 135 | 136 | def fit(self, data, data_val, *args, **kwargs): 137 | """Apply the fit method to all the experiments 138 | 139 | Args: 140 | see `alp.core.Experiment.fit` 141 | 142 | Returns: 143 | a list of results""" 144 | self._fit_cm(data, data_val, gen=False, async=False, *args, **kwargs) 145 | return self.results 146 | 147 | def fit_gen(self, data, data_val, *args, **kwargs): 148 | """Apply the fit_gen method to all the experiments 149 | 150 | Args: 151 | see :meth:`alp.appcom.core.Experiment.fit_gen` 152 | 153 | Returns: 154 | a list of results""" 155 | self._fit_cm(data, data_val, gen=True, async=False, *args, **kwargs) 156 | return self.results 157 | 158 | def fit_gen_async(self, data, data_val, *args, **kwargs): 159 | """Apply the fit_gen_async method to all the experiments 160 | 161 | Args: 162 | see :meth:`alp.appcom.core.Experiment.fit_gen_async` 163 | 164 | Returns: 165 | a list of results""" 166 | self._fit_cm(data, data_val, gen=True, async=True, *args, **kwargs) 167 | return self.results 168 | 169 | def fit_async(self, data, data_val, *args, **kwargs): 170 | """Apply the fit_async method to all the experiments 171 | 172 | Args: 173 | see :meth:`alp.appcom.core.Experiment.fit_async` 174 | 175 | Returns: 176 | a list of results""" 177 | self._fit_cm(data, data_val, gen=False, async=True, *args, **kwargs) 178 | return self.results 179 | 180 | def _fit_cm(self, data, data_val, gen, async, *args, **kwargs): 181 | with ProgressBar(max_value=len(self.experiments), 182 | redirect_stdout=True, 183 | widgets=widgets, term_width=80) as progress: 184 | for i, kv in enumerate(self.experiments.items()): 185 | k, expe = kv 186 | b = time() 187 | if gen and async: 188 | res = expe.fit_gen_async(data, data_val, *args, **kwargs) 189 | elif gen and not async: 190 | res = expe.fit_gen(data, data_val, *args, **kwargs) 191 | elif not gen and async: 192 | res = expe.fit_async(data, data_val, *args, **kwargs) 193 | else: 194 | res = expe.fit(data, data_val, *args, **kwargs) 195 | 196 | self.results[k] = res 197 | if i == 0: 198 | spent = time() - b 199 | to_print = spent 200 | else: 201 | spent += time() - b 202 | to_print = spent / (i + 1) 203 | progress.update(i, s=float(1 / to_print)) 204 | if expe.backend_name == 'keras' and async: # pragma: no cover 205 | import keras.backend as K 206 | if K.backend() == 'tensorflow': 207 | K.clear_session() 208 | return self.results 209 | 210 | def predict(self, data, metric=None, op=None, partial=False, 211 | *args, **kwargs): 212 | """Apply the predict method to all the experiments 213 | 214 | Args: 215 | see :meth:`alp.appcom.core.Experiment.predict` 216 | metric(str): the name of the metric to use 217 | op(function): an operator returning the value to select an 218 | experiment 219 | 220 | Returns: 221 | an array of results""" 222 | if not metric: 223 | metric = self.metric 224 | if not op: 225 | op = self.op 226 | 227 | if metric is None or op is None: 228 | raise Exception('You should provide a metric along with an op') 229 | best_exp, best_key = get_best(self.experiments, metric, op, partial) 230 | return best_key, best_exp.predict(data, *args, **kwargs) 231 | 232 | def summary(self, metrics, verbose=False): 233 | """Build a results table using individual results from models 234 | 235 | Args: 236 | verbose(bool): if True, print a description of the results 237 | metrics(dict): a dictionnary mapping metric's names to ops. 238 | 239 | Returns: 240 | a pandas DataFrame of results""" 241 | # build results table 242 | res_dict = dict() 243 | expes = self.experiments 244 | for kv in self.results.items(): 245 | k, res = kv 246 | res, t = res 247 | if t is not None: 248 | t.join() 249 | for kr, v in expes[k].full_res['metrics'].items(): 250 | if isinstance(v, list): 251 | if kr in metrics: 252 | op = metrics[kr] 253 | if kr in res_dict: 254 | res_dict[kr] += [op(v)] 255 | else: 256 | res_dict[kr] = [] 257 | res_dict[kr] += [op(v)] 258 | res_table = pd.DataFrame(res_dict) 259 | if verbose is True: 260 | print(res_table.describe()) 261 | return res_table 262 | -------------------------------------------------------------------------------- /src/alp/appcom/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for the appcom module 3 | ======================================= 4 | """ 5 | 6 | import functools 7 | import pickle 8 | import threading 9 | from itertools import islice 10 | 11 | from six.moves import zip as szip 12 | 13 | 14 | def _get_backend_attributes(ABE): 15 | """Gets the backend attributes. 16 | 17 | Args: 18 | ABE(module): the module to get attributes from. 19 | 20 | Returns: 21 | the backend, the backend name and the backend version 22 | 23 | """ 24 | backend_m = ABE.get_backend() 25 | backend_name = backend_m.__name__ 26 | if hasattr(backend_m, '__version__'): 27 | backend_version = backend_m.__version__ 28 | else: # pragma: no cover 29 | backend_version = None 30 | 31 | return ABE, backend_name, backend_version 32 | 33 | 34 | def init_backend(model): 35 | """Initialization of the backend 36 | 37 | Args: 38 | backend(str): only 'keras' or 'sklearn' at the moment 39 | 40 | Returns: 41 | the backend, the backend name and the backend version 42 | """ 43 | if 'keras' in repr(model): 44 | from ..backend import keras_backend as ABE 45 | elif 'sklearn' in repr(type(model)): 46 | from ..backend import sklearn_backend as ABE 47 | else: 48 | raise NotImplementedError( 49 | "this backend is not supported: {}".format( 50 | model)) # pragma: no cover 51 | 52 | return _get_backend_attributes(ABE) 53 | 54 | 55 | def switch_backend(backend_name): 56 | """Switch the backend based on it's name 57 | 58 | Args: 59 | backend_name(str): the name of the backend to import 60 | 61 | Return: 62 | the backend asked""" 63 | if backend_name == 'keras': 64 | from ..backend.keras_backend import get_backend 65 | elif backend_name == 'sklearn': 66 | from ..backend.sklearn_backend import get_backend 67 | else: 68 | raise NotImplementedError 69 | return get_backend() 70 | 71 | 72 | def list_to_dict(list_to_transform): 73 | """Transform a list of object to a dict 74 | 75 | Args: 76 | list_to_transform(list): the list to transform 77 | 78 | Returns: 79 | a dictionnary mapping names of the objects to objects""" 80 | return {el.__name__: el for el in list_to_transform} 81 | 82 | 83 | def background(f): 84 | ''' 85 | a threading decorator 86 | use @background above the function you want to run in the background 87 | ''' 88 | @functools.wraps(f) 89 | def bg_f(*a, **kw): 90 | t = threading.Thread(target=f, args=a, kwargs=kw) 91 | t.start() 92 | return t 93 | return bg_f 94 | 95 | 96 | def imports(packages=None): 97 | """A decorator to import packages only once when a function is serialized 98 | 99 | Args: 100 | packages(list or dict): a list or dict of packages to import. If the 101 | object is a dict, the name of the import is the key and the value 102 | is the module. If the object is a list, it's transformed to a dict 103 | mapping the name of the module to the imported module. 104 | """ 105 | if packages is None: 106 | packages = dict() 107 | 108 | def dec(wrapped): 109 | @functools.wraps(wrapped) 110 | def inner(*args, **kwargs): 111 | packs = packages 112 | if isinstance(packages, list): 113 | packs = list_to_dict(packages) 114 | for name, pack in packs.items(): 115 | if name not in wrapped.__globals__: 116 | wrapped.__globals__[name] = pack 117 | return wrapped(*args, **kwargs) 118 | return inner 119 | return dec 120 | 121 | 122 | def norm_iterator(iterable): 123 | """returns a normalized iterable of tuples""" 124 | if isinstance(iterable, list): 125 | names = ['list_' + str(i) for i, j in enumerate(iterable)] 126 | return szip(names, iterable) 127 | else: 128 | raise NotImplementedError('Iterables other than lists ' 129 | 'cannot be passed to this function') 130 | 131 | 132 | def window(seq, n=2): 133 | """Returns a sliding window (of width n) over data from the iterable""" 134 | it = iter(seq) 135 | result = tuple(islice(it, n)) 136 | if len(result) == n: # pragma: no cover 137 | yield result 138 | for elem in it: 139 | result = result[1:] + (elem,) 140 | yield result 141 | 142 | 143 | def to_fuel_h5(inputs, outputs, slices, names, 144 | file_name, file_path=''): 145 | """Transforms list of numpy arrays to a structured hdf5 file 146 | 147 | 148 | Args: 149 | inputs(list): a list of inputs(numpy.arrays) 150 | outputs(list): a list of outputs(numpy.arrays) 151 | slices(list): a list of int representing the end of a slice and the 152 | begining of another slice. The last slice is automatically added 153 | if missing (maximum length of the inputs). 154 | names(list): a list of names for the datasets 155 | file_name(str): the name of the file to save. 156 | file_path(str): the path where the file is located 157 | 158 | Returns: 159 | The file full path 160 | """ 161 | import h5py 162 | import os 163 | from fuel.datasets.hdf5 import H5PYDataset 164 | 165 | suffix = 'hdf5' 166 | 167 | inp = 'input_' 168 | out = 'output_' 169 | 170 | full_path = os.path.join(file_path, file_name.lower() + '.' + suffix) 171 | f = h5py.File(full_path, mode='w') 172 | 173 | dict_data_set = dict() 174 | split_dict = dict() 175 | for name in names: 176 | split_dict[name] = dict() 177 | 178 | slices.append(max_v_len(inputs)) 179 | 180 | def insert_info_h5(iterable, suf): 181 | names_out = [] 182 | for k, v in norm_iterator(iterable): 183 | dict_data_set[suf + k] = f.create_dataset(suf + k, v.shape, 184 | v.dtype) 185 | dict_data_set[suf + k][...] = v 186 | for sl, name in zip(window(slices, 2), names): 187 | split_dict[name][suf + k] = sl 188 | names_out.append(suf + str(k)) 189 | return names_out 190 | 191 | inputs_names = insert_info_h5(inputs, inp) 192 | outputs_names = insert_info_h5(outputs, out) 193 | 194 | f.attrs['split'] = H5PYDataset.create_split_array(split_dict) 195 | f.flush() 196 | f.close() 197 | return full_path, inputs_names, outputs_names 198 | 199 | 200 | def max_v_len(iterable_to_check): 201 | """Returns the max length of a list of iterable""" 202 | max_v = 0 203 | for _, v in norm_iterator(iterable_to_check): 204 | if len(v) > max_v: # pragma: no cover 205 | max_v = len(v) 206 | return max_v 207 | 208 | 209 | def pickle_gen(gen_train, data_val): 210 | """Check and serialize the validation data object and serialize the 211 | training data generator. 212 | 213 | Args: 214 | gen_train(generator): the training data generator 215 | data_val(dict or generator): the training data object 216 | 217 | Returns: 218 | normalized datasets""" 219 | gen_train = [pickle.dumps(g).decode('raw_unicode_escape') 220 | for g in gen_train] 221 | 222 | val_gen = check_gen(data_val) 223 | 224 | if val_gen: 225 | data_val = [pickle.dumps(g).decode('raw_unicode_escape') 226 | for g in data_val] 227 | return gen_train, data_val 228 | 229 | 230 | def check_gen(iterable): 231 | """Check if the last object of the iterable is an iterator 232 | 233 | Args: 234 | iterable(list): a list containing data. 235 | 236 | Returns: 237 | True if the last object is a generator, False otherwise. 238 | """ 239 | is_gen = (hasattr(iterable[-1], 'next') or 240 | hasattr(iterable[-1], '__next__')) 241 | is_gen += 'fuel' in repr(iterable[-1]) 242 | 243 | return is_gen 244 | 245 | 246 | def get_nb_chunks(generator): 247 | """Get the number of chunks that yields a generator 248 | 249 | Args: 250 | generator: a Fuel generator 251 | 252 | Returns: 253 | number of chunks (int)""" 254 | if hasattr(generator, 'iteration_scheme'): 255 | if generator.iteration_scheme is not None: 256 | batch_size = generator.iteration_scheme.batch_size 257 | nb_examples = len(generator.iteration_scheme.indices) 258 | return nb_examples // batch_size 259 | else: 260 | if hasattr(generator, 'data_stream'): 261 | return get_nb_chunks(generator.data_stream) 262 | else: 263 | raise Exception('No data stream in the generator') 264 | -------------------------------------------------------------------------------- /src/alp/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tboquet/python-alp/5c53d4a8bbcb197bbaebb84a3ac16d721de331c5/src/alp/backend/__init__.py -------------------------------------------------------------------------------- /src/alp/backend/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions used in every backend 3 | =============================== 4 | """ 5 | 6 | import copy 7 | import hashlib 8 | import json 9 | import os 10 | import pickle 11 | from datetime import datetime 12 | 13 | import numpy as np 14 | 15 | 16 | def clean_model(model): 17 | """Clean a dict of a model of uncessary elements 18 | 19 | Args: 20 | model(dict): a dictionnary of the model 21 | 22 | Returns: 23 | a new cleaned dict""" 24 | model_c = copy.deepcopy(model) 25 | if 'ser_metrics' in model_c['model_arch']: 26 | model_c['model_arch'].pop('ser_metrics') 27 | if 'metrics' in model_c['model_arch']: 28 | model_c['model_arch'].pop('metrics') 29 | return model_c 30 | 31 | 32 | def create_model_hash(model, batch_size): 33 | """Creates a hash based on the dict of a model and the batch size 34 | 35 | Args: 36 | model(dict): a dictionnary of the model 37 | batch_size(int): the batch size 38 | 39 | Returns: 40 | a md5 hash of the model""" 41 | # convert dict to json string 42 | model_str = json.dumps(model) 43 | 44 | # create the model hash from the stringified json 45 | mh = hashlib.md5() 46 | str_concat_m = str(model_str) + str(batch_size) 47 | mh.update(str_concat_m.encode('utf-8')) 48 | return mh.hexdigest() 49 | 50 | 51 | def create_data_hash(data): 52 | """Creates a hash based on the data passed 53 | 54 | The unique descriptors are based on the mean of the arrays passed and the 55 | sum of all the elements of the first lines of the first axis. 56 | 57 | Args: 58 | data(list): a dictionnary of the model 59 | 60 | Returns: 61 | a md5 hash of the data""" 62 | un_data_m = 0 63 | un_data_f = 0 64 | for i, _ in enumerate(data): 65 | for key in data[i]: 66 | un_data_m += data[i][key].mean() 67 | un_data_f += data[i][key][0].sum() 68 | 69 | dh = hashlib.md5() 70 | str_concat_d = str(un_data_m) + str(un_data_f) 71 | dh.update(str_concat_d.encode('utf-8')) 72 | return dh.hexdigest() 73 | 74 | 75 | def create_gen_hash(gen): 76 | """Creates a hash based on the data passed 77 | 78 | The unique descriptors are based on the mean of the arrays passed and the 79 | sum of all the elements of the first lines of the first axis. 80 | 81 | Args: 82 | data(list): a dictionnary of the model 83 | 84 | Returns: 85 | a md5 hash of the data""" 86 | pickle_gen = pickle.dumps(gen) 87 | dh = hashlib.md5() 88 | str_concat_g = str(pickle_gen) 89 | dh.update(str_concat_g.encode('utf-8')) 90 | return dh.hexdigest() 91 | 92 | 93 | def create_param_dump(_path_h5, hexdi_m, hexdi_d): 94 | """Create a the path where to dump the params 95 | 96 | Args: 97 | _path_h5(str): the base path 98 | hexdi_m(str): the model hash 99 | hexdi_d(str): the data hash 100 | 101 | Returns: 102 | the full path where to dump the params""" 103 | return os.path.join(os.path.sep, _path_h5, hexdi_m + hexdi_d + '.h5') 104 | 105 | 106 | def make_all_hash(model_c, batch_size, data_hash, _path_h5): 107 | """Generate a hash for the model and the name of the file where 108 | the parameters are dumped""" 109 | hexdi_m = create_model_hash(model_c, batch_size) 110 | params_dump = create_param_dump(_path_h5, hexdi_m, data_hash) 111 | return hexdi_m, params_dump 112 | 113 | 114 | def open_dataset_gen(generator): 115 | """Open a fuel dataset given a fuel pipeline 116 | 117 | This function is recursive and search for the data_set attribute.""" 118 | if hasattr(generator, 'data_stream'): 119 | data_stream = generator.data_stream 120 | if hasattr(data_stream, 'dataset'): 121 | data_stream.dataset.open() 122 | else: # pragma: no cover 123 | open_dataset_gen(data_stream) 124 | elif hasattr(generator, 'dataset'): 125 | generator.dataset.open() 126 | else: 127 | raise NotImplementedError('not able to open the dataset') 128 | 129 | 130 | def transform_gen(gen_train, mod_name): 131 | """Transform generators of tupple to generators of dicts 132 | 133 | Args: 134 | gen_train(Fuel data stream): a fuel training data generator 135 | gen_val(Fuel data stream): a fuel validation data generator 136 | 137 | Yield: 138 | a dictionnary mapping training and testing data to numpy arrays if 139 | the model is a graph, a tupple (inputs, ouputs) instead.""" 140 | names_dict = gen_train.sources 141 | 142 | inp = 'input_' 143 | out = 'output_' 144 | 145 | li = 'list' 146 | 147 | open_dataset_gen(gen_train) 148 | 149 | while 1: 150 | for d in gen_train.get_epoch_iterator(): 151 | data = zip(d, names_dict) 152 | inputs_list = [] 153 | outputs_list = [] 154 | for arr, name in data: 155 | if inp in name: 156 | if li in name: 157 | inputs_list.append(arr) 158 | elif out in name: 159 | if li in name: 160 | outputs_list.append(arr) 161 | elif 'index' in name: # pragma: no cover 162 | pass 163 | else: # pragma: no cover 164 | raise("Not input nor output, please check your generator") 165 | data_out = (inputs_list, outputs_list) 166 | yield data_out 167 | 168 | 169 | def train_pipe(train_f, save_f, model, data, data_val, generator, size_gen, 170 | params_dump, data_hash, hexdi_m, 171 | *args, **kwargs): 172 | """Common function to train models for all backends 173 | 174 | Args: 175 | train_f(function): the train function to use 176 | save_f(function): the function used to save parameters""" 177 | results, model = train_f(model['model_arch'], data, 178 | data_val, size_gen, 179 | generator=generator, 180 | *args, **kwargs) 181 | res_dict = { 182 | 'iter_stopped': results['metrics']['iter'], 183 | 'trained': 1, 184 | 'date_finished_training': datetime.now()} 185 | for metric in results['metrics']: 186 | res_dict[metric] = results['metrics'][metric] 187 | if metric in ['loss', 'val_loss']: 188 | res_dict[metric] = np.min(results['metrics'][metric]) 189 | 190 | save_f(model, params_dump) 191 | results['model_id'] = hexdi_m 192 | results['data_id'] = data_hash 193 | results['params_dump'] = params_dump 194 | return results, res_dict 195 | 196 | 197 | def on_worker(): 198 | return os.getenv("ON_WORKER") == "TRUE" 199 | -------------------------------------------------------------------------------- /src/alp/celapp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Celery config 3 | ============= 4 | """ 5 | 6 | from celery import Celery 7 | from . import appcom as apc 8 | 9 | 10 | RESULT_SERIALIZER = 'json' 11 | 12 | app = Celery(broker=apc._broker, 13 | backend=apc._backend) 14 | 15 | app.conf.update(task_serializer='pickle', 16 | result_serializer=RESULT_SERIALIZER, 17 | accept_content=['pickle', 'json']) 18 | -------------------------------------------------------------------------------- /src/alp/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | CLI to launch ALP services 3 | ========================== 4 | """ 5 | 6 | import os 7 | import click 8 | import pandas as pd 9 | from docker import Client 10 | from . import __version__ 11 | from .cli_utils import a_text 12 | from .cli_utils import action_config 13 | from .cli_utils import banner 14 | from .cli_utils import col_info 15 | from .cli_utils import col_warn 16 | from .cli_utils import gen_all_configs 17 | from .cli_utils import get_config_names 18 | from .cli_utils import open_config 19 | from .cli_utils import pass_config 20 | from .cli_utils import pull_config 21 | 22 | 23 | @click.group() 24 | @click.option('--verbose', is_flag=True) 25 | @pass_config 26 | def main(conf, verbose): 27 | """ 28 | The alp command provide you with a number of options to manage alp services 29 | """ 30 | docker_client = Client('unix://var/run/docker.sock') 31 | kernel_version = docker_client.info()['ServerVersion'] 32 | click.echo(click.style(banner, fg=col_info, bold=True)) 33 | click.echo(click.style('Version: {}'.format(__version__), 34 | fg=col_info, bold=True)) 35 | click.echo(click.style('Running with Docker version: {}'.format( 36 | kernel_version), fg=col_info, bold=True)) 37 | click.echo(click.style('\n')) 38 | conf.verbose = verbose 39 | return 0 40 | 41 | 42 | @main.command() 43 | @click.option('--force', is_flag=True) 44 | @click.option('--dry_run', is_flag=True) 45 | @click.argument('action', type=click.STRING, required=True) 46 | @click.argument('config', type=click.Path(exists=True), required=True) 47 | @pass_config 48 | def service(conf, force, dry_run, action, config): 49 | """Subcommand to take action on services""" 50 | config = open_config(config, conf.verbose) 51 | if action == 'start': 52 | results = action_config(config, 'run', conf.verbose, force=force, 53 | dry_run=dry_run) 54 | elif action == 'stop': 55 | results = action_config(config, 'stop', conf.verbose, force=force, 56 | dry_run=dry_run) 57 | elif action == 'restart': 58 | results = action_config(config, 'restart', conf.verbose, force=force, 59 | dry_run=dry_run) 60 | elif action == 'rm': 61 | results = action_config(config, 'rm', conf.verbose, force=force, 62 | dry_run=dry_run) 63 | else: 64 | raise Exception('Action must be in start, stop, restart, rm') 65 | return results 66 | 67 | 68 | @main.command() 69 | @click.argument('config', type=click.Path(exists=True), required=True) 70 | @pass_config 71 | def status(conf, config): 72 | """Get the status of the running containers""" 73 | config = open_config(config) 74 | docker_client = Client('unix://var/run/docker.sock') 75 | all_containers = docker_client.containers(all=True) 76 | running_containers = [] 77 | running_ids = dict() 78 | 79 | names = get_config_names(config) 80 | for container in all_containers: 81 | name = container['Names'][0].replace('/', '') 82 | if name in names: 83 | print_cont = dict() 84 | print_cont['name'] = name 85 | print_cont['status'] = container['Status'] 86 | print_cont['image'] = container['Image'] 87 | print_cont['image_id'] = container['ImageID'] 88 | running_ids[container['ImageID']] = print_cont['image'] 89 | print_cont['ports'] = [] 90 | if 'Ports' in container: # pragma: no cover 91 | for port in container['Ports']: 92 | pub_port = None 93 | priv_port = None 94 | if 'PublicPort' in port: 95 | pub_port = port['PublicPort'] 96 | if 'PrivatePort' in port: 97 | priv_port = port['PrivatePort'] 98 | if pub_port: 99 | print_cont['ports'] += ['{}:{}'.format(pub_port, 100 | priv_port)] 101 | running_containers.append(print_cont) 102 | else: # pragma: no cover 103 | click.echo(click.style( 104 | a_text('{}'.format(name), 'not in the config'), 105 | fg=col_warn)) 106 | 107 | click.echo(click.style('Running containers'.center(80, '='), 108 | fg=col_info, bold=True)) 109 | click.echo() 110 | for container in running_containers: 111 | click.echo(click.style('{}'.format(container['name']).center(80, '-'), 112 | fg=col_info, bold=True)) 113 | for k in container: 114 | if isinstance(container[k], list): 115 | container[k] = ' '.join(container[k]) 116 | if len(container[k]) > 40: 117 | cut = len(container[k]) - 40 118 | container[k] = container[k][:cut - 3] + '...' 119 | click.echo(click.style(a_text(k, container[k]), 120 | fg=col_info)) 121 | click.echo('\n') 122 | images = docker_client.images() 123 | 124 | click.echo(click.style('Images from the config'.center(80, '='), 125 | fg=col_info, bold=True)) 126 | click.echo() 127 | for image in images: 128 | if image['Id'] in running_ids: 129 | print_im = dict() 130 | print_im['name'] = '{}'.format(running_ids[image['Id']]) 131 | print_im['created'] = pd.to_datetime(image['Created'] * 1e9) 132 | print_im['created'] = print_im['created'].strftime( 133 | '%Y-%m-%d %H:%M') 134 | print_im['size'] = '{:.2f}'.format(image['Size'] / 1000000000.) 135 | 136 | click.echo(click.style( 137 | '{}'.format(print_im['name']).center(80, '-'), 138 | fg=col_info, bold=True)) 139 | for k in print_im: 140 | if isinstance(print_im[k], list): # pragma: no cover 141 | container[k] = ' '.join(print_im[k]) 142 | if len(print_im[k]) > 40: # pragma: no cover 143 | cut = len(print_im[k]) - 40 144 | container[k] = print_im[k][:cut - 3] + '...' 145 | click.echo(click.style(a_text(k, print_im[k]), 146 | fg=col_info)) 147 | click.echo('\n') 148 | return 0 149 | 150 | 151 | @main.command() 152 | @click.option('--force', is_flag=True) 153 | @click.argument('config', type=click.Path(exists=True), required=True) 154 | @pass_config 155 | def update(conf, config, force): 156 | """Pull, stop, remove and rerun all containers""" 157 | config = open_config(config) 158 | pull_config(config, conf.verbose) 159 | res_stop = action_config(config, 'stop', conf.verbose, force=force) 160 | res_rm = action_config(config, 'rm', conf.verbose, force=force) 161 | res_run = action_config(config, 'run', conf.verbose, force=force) 162 | succeeded = all([res_stop, res_rm, res_run]) 163 | return succeeded 164 | 165 | 166 | @main.command() 167 | @click.argument('config', type=click.Path(exists=True), required=True) 168 | @pass_config 169 | def pull(conf, config): 170 | """Pull containers""" 171 | config = open_config(config) 172 | res = pull_config(config, conf.verbose) 173 | return res 174 | 175 | 176 | @main.command() 177 | @click.option('--outdir', type=click.Path(exists=True)) 178 | @click.option('--namesuf', type=click.STRING, default='') 179 | @click.option('--portshift', type=click.INT, default=0) 180 | @click.option('--rootfolder', type=click.Path(exists=True)) 181 | @click.option('--controlers', type=click.INT, default=1) 182 | @click.option('--skworkers', type=click.INT, default=1) 183 | @click.option('--kworkers', type=click.INT, default=1) 184 | @click.option('--cpu', is_flag=True) 185 | @pass_config 186 | def genconfig(conf, outdir, namesuf, portshift, rootfolder, controlers, 187 | skworkers, kworkers, cpu): 188 | """Generates and writes configurations files in .alp""" 189 | 190 | if outdir is None: 191 | outdir = os.path.expanduser('~') 192 | if not os.access(outdir, os.W_OK): # pragma: no cover 193 | outdir = '/tmp' 194 | outdir = os.path.join(outdir, '.alp') 195 | else: 196 | if not os.access(outdir, os.W_OK): # pragma: no cover 197 | raise IOError('Cannot access directory') 198 | outdir = os.path.join(outdir, '.alp') 199 | if not os.path.exists(outdir): # pragma: no cover 200 | os.makedirs(outdir) 201 | 202 | alpapp, alpdb, containers = gen_all_configs(outdir, namesuf, portshift, 203 | rootfolder, controlers, 204 | skworkers, kworkers, cpu) 205 | 206 | if conf.verbose: 207 | click.echo(click.style('Auto generated configuration:', fg=col_info)) 208 | click.echo(click.style(a_text(' Controlers', str(controlers)), 209 | fg=col_info)) 210 | click.echo(click.style(a_text(' Sklearn workers', str(skworkers)), 211 | fg=col_info)) 212 | click.echo(click.style(a_text(' Keras workers', str(kworkers)), 213 | fg=col_info)) 214 | click.echo() 215 | 216 | # dump configs in .alp 217 | 218 | alpapp_json = os.path.join(outdir, 'alpapp.json') 219 | alpdb_json = os.path.join(outdir, 'alpdb.json') 220 | containers_json = os.path.join(outdir, 'containers.json') 221 | 222 | with open(alpapp_json, 'w') as f: 223 | f.write(alpapp) 224 | 225 | with open(alpdb_json, 'w') as f: 226 | f.write(alpdb) 227 | 228 | with open(containers_json, 'w') as f: 229 | f.write(containers) 230 | 231 | return 0 232 | -------------------------------------------------------------------------------- /src/alp/dbbackend/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import json 5 | import os 6 | 7 | 8 | _alp_base_dir = os.path.expanduser('~') 9 | if not os.access(_alp_base_dir, os.W_OK): # pragma: no cover 10 | _alp_base_dir = '/tmp' 11 | 12 | 13 | _alp_dir = os.path.join(_alp_base_dir, '.alp') 14 | if not os.path.exists(_alp_dir): # pragma: no cover 15 | os.makedirs(_alp_dir) 16 | 17 | _db_engine = 'mongodb' 18 | _host_adress = 'mongo_m' 19 | _host_port = 27017 20 | _db_name = 'modelization' 21 | _models_collection = 'models' 22 | _generators_collection = 'generators' 23 | 24 | if os.getenv("TEST_MODE") == "ON": # pragma: no cover 25 | _host_adress = '127.0.0.1' 26 | 27 | # note: we have to be able to accept other structures 28 | 29 | _config_path = os.path.expanduser(os.path.join(_alp_dir, 'alpdb.json')) 30 | if os.path.exists(_config_path): # pragma: no cover 31 | _config = json.load(open(_config_path)) 32 | _db_engine = _config.get('db_engine', 'mongodb') 33 | assert _db_engine in {'mongodb'} 34 | _host_adress = _config.get('host_adress', 'mongo_m') 35 | _host_port = _config.get('host_port', 27017) 36 | _db_name = _config.get('db_name', 'modelization') 37 | _models_collection = _config.get('_models_collection', 'models') 38 | _generators_collection = _config.get('_generators_collection', 'models') 39 | 40 | # save config file 41 | _config = {'db_engine': _db_engine, 42 | 'host_adress': _host_adress, 43 | 'host_port': _host_port, 44 | 'db_name': _db_name, 45 | 'models_collection': _models_collection, 46 | 'generators_collection': _generators_collection} 47 | 48 | with open(_config_path, 'w') as f: 49 | f.write(json.dumps(_config, indent=4)) 50 | 51 | # import backend 52 | if _db_engine == 'mongodb': 53 | from ..dbbackend.mongo_backend import * # NOQA 54 | else: 55 | raise Exception('Unknown backend: ' + str(_db_engine)) 56 | -------------------------------------------------------------------------------- /src/alp/dbbackend/mongo_backend.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model database setup 3 | ==================== 4 | """ 5 | 6 | from pymongo import DESCENDING 7 | from pymongo import MongoClient 8 | from pymongo import ReturnDocument 9 | from ..dbbackend import _db_name 10 | from ..dbbackend import _generators_collection 11 | from ..dbbackend import _host_adress 12 | from ..dbbackend import _host_port 13 | from ..dbbackend import _models_collection 14 | 15 | 16 | def get_models(): 17 | """Utility function to retrieve the collection of models 18 | 19 | Returns: 20 | the collection of models""" 21 | client = MongoClient(_host_adress, _host_port) 22 | modelization = client[_db_name] 23 | return modelization[_models_collection] 24 | 25 | 26 | def get_generators(): 27 | """Utility function to retrieve the collection of generators 28 | 29 | Returns: 30 | the collection of generators""" 31 | client = MongoClient(_host_adress, _host_port) 32 | modelization = client[_db_name] 33 | return modelization[_generators_collection] 34 | 35 | 36 | def insert(full_json, collection, upsert=False): 37 | """Insert an observation in the db 38 | 39 | Args: 40 | full_json(dict): a dictionnary mapping variable names to 41 | carateristics of object. This dictionnary must have the 42 | mod_data_id key. 43 | 44 | Returns: 45 | the id of the inserted object in the db""" 46 | filter_db = dict() 47 | filter_db['mod_data_id'] = full_json['mod_data_id'] 48 | doc_id = collection.find_one(filter_db) 49 | if doc_id is not None: 50 | doc_id = doc_id['_id'] 51 | if upsert is True: 52 | inserted = collection.find_one_and_update( 53 | filter_db, {'$set': full_json}, upsert=upsert, 54 | return_document=ReturnDocument.AFTER) 55 | inserted = inserted['_id'] 56 | else: 57 | inserted = collection.insert_one(full_json).inserted_id 58 | return inserted 59 | 60 | 61 | def update(inserted_id, json_changes): 62 | """Update an observation in the db 63 | 64 | Args: 65 | insert_id(int): the id of the observation 66 | json_changes(dict): the changes to do in the db""" 67 | models = get_models() 68 | updated = models.update_one(inserted_id, json_changes) 69 | return updated 70 | 71 | 72 | def create_db(drop=True): 73 | """Delete (and optionnaly drop) the modelization database and collection""" 74 | client = MongoClient(_host_adress, _host_port) 75 | modelization = client[_db_name] 76 | if drop: 77 | modelization.drop_collection(_models_collection) 78 | models = modelization['models'] 79 | return models.create_index([('mod_data_id', DESCENDING)], 80 | unique=True) 81 | -------------------------------------------------------------------------------- /src/alp/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tboquet/python-alp/5c53d4a8bbcb197bbaebb84a3ac16d721de331c5/src/alp/utils/__init__.py -------------------------------------------------------------------------------- /src/alp/utils/utils_tests.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from fuel.datasets.hdf5 import H5PYDataset 3 | from fuel.schemes import SequentialScheme 4 | from fuel.streams import DataStream 5 | from fuel.transformers import ScaleAndShift 6 | from keras.layers import Dense 7 | from keras.layers import Dropout 8 | from keras.layers import Input 9 | from keras.models import Model 10 | from keras.models import Sequential 11 | from keras.utils import np_utils 12 | from keras.utils.test_utils import get_test_data 13 | 14 | from alp.appcom.utils import to_fuel_h5 15 | 16 | 17 | input_dim = 2 18 | nb_hidden = 4 19 | nb_class = 2 20 | batch_size = 4 21 | train_samples = 256 22 | test_samples = 128 23 | 24 | 25 | def close_gens(gen, data, data_stream): 26 | gen.close() 27 | data.close(None) 28 | data_stream.close() 29 | 30 | 31 | def make_data(train_samples, test_samples): 32 | (X_tr, y_tr), (X_te, y_te) = get_test_data(nb_train=train_samples, 33 | nb_test=test_samples, 34 | input_shape=(input_dim,), 35 | classification=True, 36 | nb_class=nb_class) 37 | 38 | y_tr = np_utils.to_categorical(y_tr) 39 | y_te = np_utils.to_categorical(y_te) 40 | 41 | data, data_val = dict(), dict() 42 | 43 | data["X"] = X_tr 44 | data["y"] = y_tr 45 | 46 | data_val["X"] = X_te 47 | data_val["y"] = y_te 48 | return data, data_val 49 | 50 | 51 | def dump_data(train_samples, test_samples): 52 | data, data_val = make_data(train_samples, test_samples) 53 | inputs = [np.concatenate([data['X'], data_val['X']])] 54 | outputs = [np.concatenate([data['y'], data_val['y']])] 55 | 56 | file_name = 'test_data' 57 | scale = 1.0 / inputs[0].std(axis=0) 58 | shift = - scale * inputs[0].mean(axis=0) 59 | 60 | file_path, i_names, o_names = to_fuel_h5(inputs, outputs, [0, 256], 61 | ['train', 'test'], 62 | file_name, 63 | '/data_generator') 64 | return file_path, scale, shift, i_names, o_names 65 | 66 | 67 | file_path, scale, shift, i_names, o_names = dump_data(train_samples, test_samples) 68 | 69 | 70 | def make_gen(batch_size, examples=4): 71 | file_path_f = file_path 72 | names_select = i_names 73 | train_set = H5PYDataset(file_path_f, 74 | which_sets=('train', 'test')) 75 | 76 | scheme = SequentialScheme(examples=examples, batch_size=batch_size) 77 | 78 | data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme) 79 | 80 | stand_stream_train = ScaleAndShift(data_stream=data_stream_train, 81 | scale=scale, shift=shift, 82 | which_sources=(names_select[-1],)) 83 | return stand_stream_train, train_set, data_stream_train 84 | 85 | 86 | def sequential(custom=False, nb_hidden=4): 87 | model = Sequential() 88 | model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu')) 89 | model.add(Dense(nb_class, activation='softmax')) 90 | model.add(Dropout(0.5)) 91 | if custom: 92 | model.add(return_custom()(0.5)) 93 | return model 94 | 95 | 96 | def model(custom=False): 97 | inputs = Input(shape=(input_dim,), name='X') 98 | 99 | x = Dense(nb_hidden, activation='relu')(inputs) 100 | x = Dense(nb_hidden, activation='relu')(x) 101 | predictions = Dense(nb_class, 102 | activation='softmax', 103 | name='main_loss')(x) 104 | 105 | model = Model(input=inputs, output=predictions) 106 | return model 107 | 108 | 109 | def return_custom(): 110 | import keras.backend as K 111 | from keras.engine import Layer 112 | 113 | class Dropout_cust(Layer): # pragma: no cover 114 | '''Applies Dropout to the input. 115 | ''' 116 | 117 | def __init__(self, p, **kwargs): 118 | self.p = p 119 | if 0. < self.p < 1.: 120 | self.uses_learning_phase = True 121 | self.supports_masking = True 122 | super(Dropout_cust, self).__init__(**kwargs) 123 | 124 | def call(self, x, mask=None): 125 | if 0. < self.p < 1.: 126 | x = K.in_train_phase(K.dropout(x, level=self.p), x) 127 | return x 128 | 129 | def get_config(self): 130 | config = {'p': self.p} 131 | base_config = super(Dropout_cust, self).get_config() 132 | return dict(list(base_config.items()) + list(config.items())) 133 | 134 | return Dropout_cust 135 | -------------------------------------------------------------------------------- /tests/backend/test_common.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from alp.appcom.utils import imports 3 | 4 | 5 | def test_imports(): 6 | import numpy 7 | 8 | @imports() 9 | def dummy(): 10 | return 0 11 | 12 | assert dummy() == 0 13 | 14 | @imports([numpy]) 15 | def ones_check(): 16 | return numpy.ones((1)) 17 | 18 | assert ones_check().sum() == 1 19 | 20 | 21 | if __name__ == "__main__": 22 | pytest.main([__file__]) 23 | -------------------------------------------------------------------------------- /tests/backend/test_sklearn_backend.py: -------------------------------------------------------------------------------- 1 | """Tests for the sklearn backend""" 2 | 3 | import numpy as np 4 | import pytest 5 | import sklearn 6 | 7 | from fuel.datasets.hdf5 import H5PYDataset 8 | from fuel.schemes import SequentialScheme 9 | from fuel.streams import DataStream 10 | from fuel.transformers import ScaleAndShift 11 | 12 | from six.moves import zip as szip 13 | 14 | from sklearn import cross_validation as cv 15 | from sklearn import datasets 16 | 17 | from alp.appcom.core import Experiment 18 | from alp.appcom.utils import to_fuel_h5 19 | from alp.backend import sklearn_backend as SKB 20 | from alp.backend.sklearn_backend import getname 21 | from alp.utils.utils_tests import close_gens 22 | 23 | 24 | np.random.seed(1336) 25 | NAME = sklearn.__name__ 26 | VERSION = sklearn.__version__ 27 | CLASSIF = ['sklearn.linear_model.logistic.LogisticRegression', 28 | 'sklearn.discriminant_analysis.LinearDiscriminantAnalysis', 29 | 'sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis'] 30 | 31 | 32 | def generate_data(classif=False): 33 | data, data_val = dict(), dict() 34 | if classif: 35 | datas = datasets.load_iris() 36 | Xs = datas.data 37 | Ys = datas.target 38 | else: 39 | Xs = np.linspace(0, 12.3, num=150, endpoint=False).reshape(1, -1).T 40 | Ys = (Xs * np.sin(Xs)).ravel() 41 | 42 | data["X"], data_val["X"], data["y"], data_val["y"] = cv.train_test_split( 43 | Xs, Ys, test_size=20, random_state=0) 44 | 45 | return data, data_val 46 | 47 | 48 | def dump_data(data, data_val, classif=False): 49 | ''' 50 | The sklearn version differs from the keras version 51 | in the following points: 52 | no local import of np 53 | no graph model 54 | validation cut at index 130 55 | classification or regression data will dump different files 56 | ''' 57 | suffix = '_R' 58 | if classif: 59 | suffix = '_C' 60 | 61 | inputs = [np.concatenate([data['X'], data_val['X']])] 62 | outputs = [np.concatenate([data['y'], data_val['y']])] 63 | 64 | file_name = 'test_data' + suffix 65 | scale = 1.0 / inputs[0].std(axis=0) 66 | shift = scale * inputs[0].mean(axis=0) 67 | 68 | file_path, i_names, o_names = to_fuel_h5(inputs, outputs, [0, 130], 69 | ['train', 'test'], 70 | file_name, 71 | '/data_generator') 72 | return file_path, scale, shift, i_names, o_names 73 | 74 | 75 | data_R, data_val_R = generate_data(False) 76 | data_C, data_val_C = generate_data(True) 77 | file_path_R, scale_R, shift_R, i_names_R, o_names_R = dump_data( 78 | data_R, data_val_R, False) 79 | file_path_C, scale_C, shift_C, i_names_C, o_names_C = dump_data( 80 | data_C, data_val_C, True) 81 | 82 | 83 | def make_gen(Nchunks=True, classif=False, train=True): 84 | ''' 85 | Nchunks==True : 10 chunks in the generator 86 | Nchunks == False : 1 chunk in the generator 87 | Makes the distinction between classification/regression 88 | Makes the distinction between test/train 89 | ''' 90 | 91 | file_path_f = file_path_R 92 | shift_f = shift_R 93 | scale_f = scale_R 94 | if classif: 95 | file_path_f = file_path_C 96 | shift_f = shift_C 97 | scale_f = scale_C 98 | 99 | if Nchunks: 100 | batch_size = 13 101 | else: 102 | batch_size = 130 103 | t_scheme = SequentialScheme(examples=130, batch_size=batch_size) 104 | t_source = 'train' 105 | if not train: 106 | if Nchunks: 107 | batch_size = 2 108 | else: 109 | batch_size = 20 110 | t_source = 'test' 111 | t_scheme = SequentialScheme(examples=20, batch_size=batch_size) 112 | 113 | t_set = H5PYDataset(file_path_f, which_sets=[t_source]) 114 | data_stream_t = DataStream(dataset=t_set, iteration_scheme=t_scheme) 115 | 116 | stand_stream_t = ScaleAndShift(data_stream=data_stream_t, 117 | scale=scale_f, shift=shift_f, 118 | which_sources=t_source) 119 | 120 | return stand_stream_t, t_set, data_stream_t 121 | 122 | 123 | keyval = dict() 124 | for m in SKB.SUPPORTED: 125 | keyval[getname(m)] = m() 126 | 127 | 128 | @pytest.fixture(scope='module', params=['no_metric', 'accuracy and mse']) 129 | def get_metric(request): 130 | if request.param == 'no_metric': 131 | return(None) 132 | elif request.param == 'accuracy and mse': 133 | return(['accuracy_score', 'mean_squared_error']) 134 | 135 | 136 | @pytest.fixture(scope='module', params=list(keyval.keys())) 137 | def get_model(request): 138 | model = keyval[request.param] 139 | return(model) 140 | 141 | 142 | @pytest.fixture 143 | def get_model_data_expe(get_model, get_metric): 144 | model, metric = get_model, get_metric 145 | 146 | expe = Experiment(model) 147 | 148 | data, data_val = data_R, data_val_R 149 | is_classif = False 150 | if getname(model, False) in CLASSIF: 151 | data, data_val = data_C, data_val_C 152 | is_classif = True 153 | else: # if regression model, remove accuracy 154 | if metric: 155 | if "accuracy_score" in metric: 156 | metric.remove("accuracy_score") 157 | 158 | return data, data_val, is_classif, model, metric, expe 159 | 160 | 161 | class TestExperiment: 162 | 163 | def test_experiment_instance_utils(self, get_model_data_expe): 164 | _, _, _, model, _, expe = get_model_data_expe 165 | expe.model_dict = model 166 | expe.backend_name = 'another_backend' 167 | expe.model_dict = model 168 | print(self) 169 | 170 | assert expe.backend is not None 171 | 172 | def test_experiment_fit(self, get_model_data_expe): 173 | data, data_val, _, model, metric, expe = get_model_data_expe 174 | 175 | for mod in [None, model]: 176 | for data_val_loc in [None, data_val]: 177 | expe.fit([data], [data_val_loc], model=mod, 178 | overwrite=True, metrics=metric) 179 | 180 | expe.backend_name = 'another_backend' 181 | expe.load_model() 182 | expe.load_model(expe.mod_id, expe.data_id) 183 | 184 | assert expe.data_id is not None 185 | assert expe.mod_id is not None 186 | assert expe.params_dump is not None 187 | print(self) 188 | 189 | def test_experiment_fit_gen_nogenval(self, get_model_data_expe): 190 | ''' 191 | Main case: generator on train 192 | 193 | Subcases: 194 | 10 chunks on train B with and without val 195 | 1 chunk on train B with and without val 196 | ''' 197 | data, data_val, is_classif, model, metric, expe = get_model_data_expe 198 | 199 | for Nchunks_gen, expected_value in szip([True, False], [10, 1]): 200 | gen_train, data_train, data_stream_train = make_gen( 201 | Nchunks_gen, is_classif, train=True) 202 | 203 | for data_val_loc in [None, data_val]: 204 | expe.fit_gen([gen_train], [data_val_loc], 205 | model=model, 206 | overwrite=True, metrics=metric) 207 | 208 | assert len(expe.full_res['metrics'][ 209 | 'score']) == expected_value 210 | assert len(expe.full_res['metrics'][ 211 | 'val_score']) == expected_value 212 | 213 | if data_val_loc is not None: 214 | assert None not in expe.full_res['metrics'][ 215 | 'val_score'] 216 | else: 217 | assert np.all([np.isnan(v) for v in expe.full_res[ 218 | 'metrics']['val_score']]) 219 | 220 | assert expe.data_id is not None 221 | assert expe.mod_id is not None 222 | assert expe.params_dump is not None 223 | assert expe 224 | 225 | close_gens(gen_train, data_train, data_stream_train) 226 | 227 | print(self) 228 | 229 | def test_experiment_fit_gen_withgenval(self, get_model_data_expe): 230 | ''' 231 | Main case: gen on train, gen on val 232 | Subcases: 233 | 10 chunks on train / 10 chunks on val C3 234 | 10 chunks on train / 1 chunk on val C1 235 | 1 chunk on train / 10 chunks on val C2 236 | ''' 237 | data, data_val, is_classif, model, metric, expe = get_model_data_expe 238 | 239 | for Nchunks_gen, Nchunks_val in szip([True, True, False], 240 | [True, False, True]): 241 | gen_train, data_train, data_stream_train = make_gen( 242 | Nchunks_gen, is_classif, train=True) 243 | gen_test, data_test, data_stream_test = make_gen( 244 | Nchunks_val, is_classif, train=False) 245 | 246 | expe.fit_gen([gen_train], [gen_test], 247 | overwrite=True, metrics=metric) 248 | 249 | expected_value_gen = 10 250 | if not Nchunks_gen: 251 | expected_value_gen = 1 252 | 253 | assert len(expe.full_res['metrics'][ 254 | 'score']) == expected_value_gen 255 | assert len(expe.full_res['metrics'][ 256 | 'val_score']) == 10 257 | assert expe.data_id is not None 258 | assert expe.mod_id is not None 259 | assert expe.params_dump is not None 260 | assert expe 261 | 262 | close_gens(gen_train, data_train, data_stream_train) 263 | close_gens(gen_test, data_test, data_stream_test) 264 | 265 | print(self) 266 | 267 | def test_experiment_fit_async(self, get_model_data_expe): 268 | data, data_val, _, model, metric, expe = get_model_data_expe 269 | 270 | for mod in [None, model]: 271 | for data_val_loc in [None, data_val]: 272 | _, thread = expe.fit_async([data], [data_val_loc], 273 | model=mod, overwrite=True, 274 | metrics=metric) 275 | thread.join() 276 | assert expe.data_id is not None 277 | assert expe.mod_id is not None 278 | assert expe.params_dump is not None 279 | assert expe 280 | 281 | # TODO: check consistency of results 282 | print(self) 283 | 284 | def test_experiment_fit_gen_async_nogenval(self, get_model_data_expe): 285 | ''' 286 | Main case: gen on train, data on val 287 | Subcases: 288 | 10 chunks on train 289 | 1 chunk on train 290 | ''' 291 | data, data_val, is_classif, model, metric, expe = get_model_data_expe 292 | 293 | for Nchunks_gen, expected_value in szip([True, False], [10, 1]): 294 | gen_train, data_train, data_stream_train = make_gen( 295 | Nchunks_gen, is_classif, train=True) 296 | 297 | for data_val_loc in [None, data_val]: 298 | 299 | _, thread = expe.fit_gen_async([gen_train], [data_val_loc], 300 | model=model, 301 | overwrite=True, metrics=metric) 302 | thread.join() 303 | 304 | assert len(expe.full_res['metrics'][ 305 | 'score']) == expected_value 306 | assert len(expe.full_res['metrics'][ 307 | 'val_score']) == expected_value 308 | 309 | if data_val_loc is not None: 310 | assert None not in expe.full_res['metrics'][ 311 | 'val_score'] 312 | else: 313 | assert np.all([np.isnan(v) for v in expe.full_res[ 314 | 'metrics']['val_score']]) 315 | 316 | assert expe.data_id is not None 317 | assert expe.mod_id is not None 318 | assert expe.params_dump is not None 319 | assert expe 320 | 321 | close_gens(gen_train, data_train, data_stream_train) 322 | 323 | print(self) 324 | 325 | def test_experiment_fit_gen_async_withgenval(self, get_model_data_expe): 326 | ''' 327 | Main case: gen on train, gen on val 328 | Subcases: 329 | 10 chunks on train / 10 chunks on val 330 | 10 chunks on train / 1 chunk on val 331 | 1 chunk on train / 10 chunks on val 332 | ''' 333 | data, data_val, is_classif, model, metric, expe = get_model_data_expe 334 | 335 | for Nchunks_gen, Nchunks_val in szip([True, True, False], 336 | [True, False, True]): 337 | 338 | gen_train, data_train, data_stream_train = make_gen( 339 | Nchunks_gen, is_classif, train=True) 340 | gen_test, data_test, data_stream_test = make_gen( 341 | Nchunks_val, is_classif, train=False) 342 | 343 | _, thread = expe.fit_gen_async( 344 | [gen_train], [gen_test], overwrite=True, metrics=metric) 345 | thread.join() 346 | 347 | expected_value_gen = 10 348 | if not Nchunks_gen: 349 | expected_value_gen = 1 350 | 351 | assert len(expe.full_res['metrics'][ 352 | 'score']) == expected_value_gen 353 | assert len(expe.full_res['metrics'][ 354 | 'val_score']) == 10 355 | assert expe.data_id is not None 356 | assert expe.mod_id is not None 357 | assert expe.params_dump is not None 358 | assert expe 359 | 360 | close_gens(gen_train, data_train, data_stream_train) 361 | 362 | print(self) 363 | 364 | def test_experiment_predict(self, get_model_data_expe): 365 | data, data_val, _, model, metric, expe = get_model_data_expe 366 | model._test_ = 'test' 367 | 368 | for mod in [None, model]: 369 | expe.fit([data], [data_val], model=mod, custom_objects={}, 370 | overwrite=True, metrics=metric) 371 | expe.load_model() 372 | alp_pred = expe.predict(data['X']) 373 | alp_pred_async = expe.predict_async(data['X']) 374 | 375 | alp_pred_async = alp_pred_async.wait() 376 | 377 | model.fit(data['X'], data['y']) 378 | sklearn_pred = model.predict(data['X']) 379 | assert(np.allclose(alp_pred, sklearn_pred)) 380 | assert(np.allclose(np.array(alp_pred_async), sklearn_pred)) 381 | print(self) 382 | 383 | def test_experiment_predict_async(self, get_model_data_expe): 384 | data, data_val, _, model, metric, expe = get_model_data_expe 385 | model._test_ = 'test' 386 | 387 | for mod in [None, model]: 388 | expe.fit([data], [data_val], model=mod, custom_objects={}, 389 | overwrite=True, metrics=metric) 390 | expe.load_model() 391 | alp_pred = expe.predict(data['X']) 392 | 393 | model.fit(data['X'], data['y']) 394 | sklearn_pred = model.predict(data['X']) 395 | assert(np.allclose(alp_pred, sklearn_pred)) 396 | print(self) 397 | 398 | 399 | 400 | def test_utils(): 401 | objects = [list(), 402 | [1, 2], 403 | [1., 2.], 404 | list(np.array([1, 2], dtype=np.integer)), 405 | list(np.array([1., 2.], dtype=np.float)), 406 | list(np.array([np.ones((1))]))] 407 | for el in objects: 408 | SKB.typeconversion(el) 409 | 410 | 411 | if __name__ == "__main__": 412 | pytest.main([__file__]) 413 | -------------------------------------------------------------------------------- /tests/core/test_hpoptim.py: -------------------------------------------------------------------------------- 1 | """Tests Hyper parameter search""" 2 | 3 | import keras 4 | import numpy as np 5 | import pytest 6 | 7 | from fuel.datasets.hdf5 import H5PYDataset 8 | from fuel.schemes import SequentialScheme 9 | from fuel.streams import DataStream 10 | from fuel.transformers import ScaleAndShift 11 | from keras.layers import Dense 12 | from keras.layers import Dropout 13 | from keras.models import Sequential 14 | from keras.utils import np_utils 15 | from keras.utils.test_utils import get_test_data 16 | from sklearn.linear_model import LogisticRegression 17 | 18 | from alp.appcom.core import Experiment 19 | from alp.appcom.ensembles import HParamsSearch 20 | from alp.appcom.utils import to_fuel_h5 21 | from alp.utils.utils_tests import batch_size 22 | from alp.utils.utils_tests import close_gens 23 | from alp.utils.utils_tests import make_data 24 | from alp.utils.utils_tests import make_gen 25 | from alp.utils.utils_tests import sequential 26 | from alp.utils.utils_tests import test_samples 27 | from alp.utils.utils_tests import train_samples 28 | 29 | 30 | def make_experiments(dict_exp=False): 31 | experiments = [] 32 | if dict_exp: 33 | experiments = dict() 34 | nb_hiddens = [8, 16, 32] 35 | for i in range(3): 36 | nb_hidden = nb_hiddens[i] 37 | 38 | # model 39 | model = sequential(False, nb_hidden) 40 | 41 | model.compile(loss='categorical_crossentropy', 42 | optimizer='adam', 43 | metrics=['accuracy']) 44 | 45 | expe = Experiment(model, metrics=['accuracy']) 46 | if dict_exp: 47 | experiments[str(nb_hidden)] = expe 48 | else: 49 | experiments.append(expe) 50 | return experiments 51 | 52 | 53 | def make_sklearn_experiments(): 54 | experiments = [] 55 | C_list = [1.0, 0.8, 0.5] 56 | for C in C_list: 57 | model = LogisticRegression(C=C) 58 | expe = Experiment(model) 59 | experiments.append(expe) 60 | return experiments 61 | 62 | 63 | class TestHParamsSearch: 64 | def test_fit(self): 65 | data, data_val = make_data(train_samples, test_samples) 66 | experiments = make_experiments() 67 | 68 | param_search = HParamsSearch(experiments, metric='loss', op=np.min) 69 | param_search.fit([data], [data_val], nb_epoch=2, 70 | batch_size=batch_size, verbose=2, 71 | overwrite=True) 72 | print(self) 73 | 74 | def test_fit_async(self): 75 | data, data_val = make_data(train_samples, test_samples) 76 | experiments = make_experiments() 77 | 78 | param_search = HParamsSearch(experiments, metric='loss', op=np.min) 79 | param_search.fit_async([data], [data_val], nb_epoch=2, 80 | batch_size=batch_size, verbose=2, 81 | overwrite=True) 82 | param_search.summary(metrics={'val_loss': np.min}) 83 | print(self) 84 | 85 | def test_fit_gen(self): 86 | gen, data, data_stream = make_gen(batch_size) 87 | val, data_2, data_stream_2 = make_gen(batch_size) 88 | experiments = make_experiments() 89 | 90 | param_search = HParamsSearch(experiments, metric='loss', op=np.min) 91 | param_search.fit_gen([gen], [val], nb_epoch=2, 92 | verbose=2, 93 | nb_val_samples=128, 94 | samples_per_epoch=64, 95 | overwrite=True) 96 | param_search.summary(verbose=True, metrics={'val_loss': np.min}) 97 | close_gens(gen, data, data_stream) 98 | close_gens(val, data_2, data_stream_2) 99 | print(self) 100 | 101 | def test_fit_gen_async(self): 102 | gen, data, data_stream = make_gen(batch_size) 103 | val, data_2, data_stream_2 = make_gen(batch_size) 104 | experiments = make_experiments() 105 | param_search = HParamsSearch(experiments, metric='loss', op=np.min) 106 | param_search.fit_gen_async([gen], [val], nb_epoch=2, 107 | verbose=2, 108 | nb_val_samples=128, 109 | samples_per_epoch=64, 110 | overwrite=True) 111 | param_search.summary(verbose=True, metrics={'val_loss': np.min}) 112 | close_gens(gen, data, data_stream) 113 | close_gens(val, data_2, data_stream_2) 114 | print(self) 115 | 116 | def test_predict(self): 117 | data, data_val = make_data(train_samples, test_samples) 118 | experiments = make_experiments() 119 | 120 | param_search = HParamsSearch(experiments, metric='acc', op=np.min) 121 | min_x = np.min(data['X']) 122 | data['X'] = (data['X'] - min_x) / (np.max(data['X']) - min_x) 123 | param_search.fit([data], [data_val], nb_epoch=2, 124 | batch_size=batch_size, verbose=2, 125 | overwrite=True) 126 | 127 | param_search.predict(data['X'], metric='val_acc', op=np.max) 128 | 129 | experiments = make_experiments(dict_exp=True) 130 | param_search = HParamsSearch(experiments) 131 | param_search.fit([data], [data_val], nb_epoch=2, 132 | batch_size=batch_size, verbose=2, 133 | overwrite=True) 134 | 135 | param_search.predict(data['X'], metric='acc', op=np.min, partial=True) 136 | print(self) 137 | 138 | def test_predict_sklearn(self): 139 | data, data_val = make_data(train_samples, test_samples) 140 | experiments = make_sklearn_experiments() 141 | 142 | param_search = HParamsSearch(experiments, metric='score', op=np.max) 143 | data['y'] = np.argmax(data['y'], axis=1).ravel() 144 | data_val['y'] = np.argmax(data_val['y'], axis=1).ravel() 145 | param_search.fit([data], [data_val], overwrite=True) 146 | 147 | param_search.predict(data['X']) 148 | print(self) 149 | 150 | 151 | if __name__ == "__main__": 152 | pytest.main([__file__]) 153 | -------------------------------------------------------------------------------- /tests/dbbackend/test_mongodb.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from alp.dbbackend import mongo_backend as mgb 4 | 5 | 6 | def test_create_db(): 7 | mgb.create_db(True) 8 | mgb.create_db(False) 9 | 10 | 11 | if __name__ == "__main__": 12 | pytest.main([__file__]) 13 | -------------------------------------------------------------------------------- /tests/test_alp.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from click.testing import CliRunner 4 | 5 | from alp.cli import main 6 | 7 | 8 | def init_test_config(): 9 | config_path = '/root/.alp/containers_test.json' 10 | if os.getenv('TEST_MODE') == 'ON': # pragma: no cover 11 | config_path = 'containers_test.json' 12 | if not os.path.exists(config_path): # pragma: no cover 13 | config = dict() 14 | config['broker'] = { 15 | "volumes": ["/opt/data2/rabbitmq/dev/log:/dev/log", 16 | "/opt/data2/rabbitmq:/var/lib/rabbitmq"], 17 | "ports": ["8080:15672", "5672:5672"], 18 | "name": "rabbitmq_sched", 19 | "container_name": "rabbitmq:3-management", 20 | "mode": "-d" 21 | } 22 | config['result_db'] = { 23 | "volumes": ["/opt/data2/mongo_data/results:/data/db"], 24 | "name": "mongo_results_test", 25 | "container_name": "mongo", 26 | "mode": "-d" 27 | } 28 | config['model_gen_db'] = { 29 | "volumes": ["/opt/data2/mongo_data/models:/data/db"], 30 | "name": "mongo_models_test", 31 | "container_name": "mongo", 32 | "mode": "-d", 33 | } 34 | config['workers'] = [] 35 | config['controlers'] = [] 36 | 37 | with open(config_path, 'w') as f: 38 | f.write(json.dumps(config, indent=4)) 39 | return config_path 40 | 41 | 42 | config_path = init_test_config() 43 | 44 | 45 | def test_status(): 46 | runner = CliRunner() 47 | result = runner.invoke(main, ['status', config_path]) 48 | assert result.exit_code == 0 49 | result = runner.invoke(main, ['--verbose', 'status', config_path]) 50 | assert result.exit_code == 0 51 | 52 | 53 | def test_service(): 54 | runner = CliRunner() 55 | for cm in ['stop', 'rm', 'start', 'restart']: 56 | result = runner.invoke(main, ['service', cm, config_path]) 57 | assert result.exit_code == 0 58 | for cm in ['stop', 'rm', 'start', 'restart']: 59 | result = runner.invoke(main, ['--verbose', 'service', cm, config_path]) 60 | assert result.exit_code == 0 61 | for cm in ['stop', 'rm', 'start', 'restart']: 62 | result = runner.invoke(main, ['--verbose', 'service', '--dry_run', 63 | cm, config_path]) 64 | assert result.exit_code == 0 65 | 66 | def test_update(): 67 | runner = CliRunner() 68 | result = runner.invoke(main, ['--verbose', 'update', config_path]) 69 | assert result.exit_code == 0 70 | 71 | 72 | def test_pull(): 73 | runner = CliRunner() 74 | result = runner.invoke(main, ['pull', config_path]) 75 | assert result.exit_code == 0 76 | result = runner.invoke(main, ['--verbose', 'pull', config_path]) 77 | assert result.exit_code == 0 78 | 79 | 80 | def test_gen(): 81 | user_path = os.path.expanduser('~') 82 | gen_dir = os.path.join(user_path, '.alp') 83 | if not os.path.exists(gen_dir): 84 | os.makedirs(gen_dir) 85 | runner = CliRunner() 86 | result = runner.invoke(main, ['genconfig']) 87 | result = runner.invoke(main, ['genconfig', '--namesuf=test']) 88 | result = runner.invoke(main, ['genconfig', '--cpu']) 89 | assert result.exit_code == 0 90 | result = runner.invoke(main, ['--verbose', 'genconfig', 91 | '--outdir={}'.format(gen_dir)]) 92 | assert result.exit_code == 0 93 | result = runner.invoke(main, ['--verbose', 'genconfig', 94 | '--rootfolder={}'.format(gen_dir)]) 95 | assert result.exit_code == 0 96 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | ; a generative tox configuration, see: https://testrun.org/tox/latest/config.html#generative-envlist 2 | 3 | [tox] 4 | envlist = 5 | clean, 6 | check, 7 | {py27,py34,py35}, 8 | report, 9 | docs, 10 | docsbuild 11 | indexserver = 12 | g1 = https://pypi.python.org/simple 13 | g2 = https://pypi.python.org/simple 14 | g3 = https://pypi.python.org/simple 15 | g4 = https://pypi.python.org/simple 16 | 17 | [testenv] 18 | basepython = 19 | {py27,docs,docsbuild,spell}: {env:TOXPYTHON:python2.7} 20 | py34: {env:TOXPYTHON:python3.4} 21 | py35: {env:TOXPYTHON:python3.5} 22 | {clean,check,report,coveralls,codecov}: python3.5 23 | bootstrap: python 24 | setenv = 25 | PYTHONPATH={toxinidir}/tests 26 | PYTHONUNBUFFERED=yes 27 | passenv = 28 | * 29 | usedevelop = false 30 | sitepackages = true 31 | deps = 32 | :g1: -r{toxinidir}/req/requirements_first.txt 33 | :g2: git+git://github.com/mila-udem/fuel.git 34 | :g3: -r{toxinidir}/req/requirements.txt 35 | :g3: pymongo 36 | :g3: pytest 37 | :g3: pytest-travis-fold 38 | :g3: pytest-cov 39 | :g3: coveralls 40 | :g4: scikit-learn 41 | 42 | [testenv:py27] 43 | deps = 44 | :g1: -r{toxinidir}/req/requirements_first.txt 45 | :g2: git+git://github.com/mila-udem/fuel.git 46 | :g2: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl 47 | :g3: -r{toxinidir}/req/requirements.txt 48 | :g3: pymongo 49 | :g3: pytest 50 | :g3: pytest-travis-fold 51 | :g3: pytest-cov 52 | :g3: coveralls 53 | :g3: scipy==0.16.1 54 | :g4: scikit-learn 55 | commands = 56 | {posargs:py.test --cov=alp --cov-report=term-missing -vv tests} 57 | 58 | [testenv:py34] 59 | deps = 60 | :g1: -r{toxinidir}/req/requirements_first.txt 61 | :g2: git+git://github.com/mila-udem/fuel.git 62 | :g2: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl 63 | :g3: -r{toxinidir}/req/requirements.txt 64 | :g3: pymongo 65 | :g3: pytest 66 | :g3: pytest-travis-fold 67 | :g3: pytest-cov 68 | :g3: coveralls 69 | :g3: scipy==0.16.1 70 | :g4: scikit-learn 71 | commands = 72 | {posargs:py.test --cov=alp --cov-report=term-missing -vv tests} 73 | 74 | [testenv:py35] 75 | deps = 76 | :g1: -r{toxinidir}/req/requirements_first.txt 77 | :g2: git+git://github.com/mila-udem/fuel.git 78 | :g2: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl 79 | :g3: -r{toxinidir}/req/requirements.txt 80 | :g3: pymongo 81 | :g3: pytest 82 | :g3: pytest-travis-fold 83 | :g3: pytest-cov 84 | :g3: coveralls 85 | :g3: scipy==0.16.1 86 | :g4: scikit-learn 87 | commands = 88 | {posargs:py.test --cov=alp --cov-report=term-missing -vv tests} 89 | 90 | [testenv:bootstrap] 91 | deps = 92 | jinja2 93 | matrix 94 | skip_install = true 95 | commands = 96 | python ci/bootstrap.py 97 | passenv = 98 | * 99 | 100 | [testenv:spell] 101 | setenv = 102 | SPELLCHECK=1 103 | commands = 104 | sphinx-build -b spelling docs dist/docs 105 | skip_install = true 106 | deps = 107 | :g1: setuptools 108 | mock 109 | -r{toxinidir}/docs/requirements.txt 110 | sphinxcontrib-spelling 111 | pyenchant 112 | 113 | [testenv:docs] 114 | deps = 115 | :g1: scipy 116 | :g1: setuptools>=28.8 117 | :g1: six>=1.6 118 | :g2: -r{toxinidir}/docs/requirements.txt 119 | :g2: celery 120 | :g2: mock 121 | :g2: git+git://github.com/fchollet/keras.git 122 | 123 | 124 | commands = 125 | sphinx-build {posargs:-E} -b doctest docs dist/docs 126 | sphinx-build {posargs:-E} -b html docs dist/docs 127 | sphinx-build -b linkcheck docs dist/docs 128 | 129 | [testenv:docsbuild] 130 | deps = 131 | :g1: setuptools>=28.8 132 | :g1: six>=1.6 133 | :g2: -r{toxinidir}/docs/requirements.txt 134 | :g3: mock 135 | :g3: celery 136 | :g3: git+git://github.com/fchollet/keras.git 137 | commands = 138 | sphinx-build {posargs:-E} -b doctest docs dist/docs 139 | sphinx-build {posargs:-E} -b html docs dist/docs 140 | 141 | 142 | [testenv:check] 143 | deps = 144 | :g1: -r{toxinidir}/req/requirements_first.txt 145 | docutils 146 | check-manifest 147 | flake8 148 | readme-renderer 149 | pygments 150 | isort 151 | skip_install = true 152 | commands = 153 | python setup.py check --strict --metadata --restructuredtext 154 | check-manifest {toxinidir} 155 | flake8 src tests setup.py --ignore=F403 156 | isort --verbose --check-only --diff --recursive src tests setup.py 157 | 158 | [testenv:coveralls] 159 | deps = 160 | coverage==4.1 161 | coveralls 162 | skip_install = true 163 | commands = 164 | coverage combine 165 | coverage report 166 | coveralls [] 167 | 168 | [testenv:codecov] 169 | deps = 170 | coverage==4.1 171 | codecov 172 | skip_install = true 173 | commands = 174 | coverage combine 175 | coverage report 176 | coverage xml --ignore-errors 177 | codecov [] 178 | 179 | [testenv:report] 180 | deps = coverage==4.1 181 | skip_install = true 182 | commands = 183 | coverage combine 184 | coverage report 185 | coverage html 186 | 187 | [testenv:clean] 188 | commands = coverage erase 189 | skip_install = true 190 | deps = coverage --------------------------------------------------------------------------------