├── src
└── alp
│ ├── backend
│ ├── __init__.py
│ └── common.py
│ ├── utils
│ ├── __init__.py
│ └── utils_tests.py
│ ├── __init__.py
│ ├── celapp.py
│ ├── appcom
│ ├── __init__.py
│ ├── utils.py
│ ├── ensembles.py
│ └── core.py
│ ├── dbbackend
│ ├── __init__.py
│ └── mongo_backend.py
│ └── cli.py
├── docs
├── projectevolution
│ ├── authors.rst
│ ├── changelog.rst
│ ├── contribute.rst
│ └── index_projectevolution.rst
├── Userguide
│ ├── Experiment.rst
│ ├── index_userguide.rst
│ └── Services.rst
├── _static
│ ├── download.png
│ ├── last_bouquetin.png
│ └── last_bouquetin.svg
├── reference
│ ├── index.rst
│ ├── alp.backend.rst
│ ├── alp.appcom.rst
│ └── alp.rst
├── FirstSteps
│ ├── index_first_steps.rst
│ └── why-alp.rst
├── Setup
│ ├── how_does_it_work.rst
│ ├── index_setup.rst
│ ├── requirements.rst
│ └── config_CLI_launch.rst
├── spelling_wordlist.txt
├── Tutorials
│ ├── index_tuto.rst
│ ├── tuto3.rst
│ ├── tuto1.rst
│ ├── tuto4.rst
│ ├── tuto2.rst
│ └── tuto0.rst
├── _templates
│ └── last_modified.html
├── requirements.txt
├── index.rst
└── conf.py
├── AUTHORS.rst
├── req
├── requirements_cli.txt
├── requirements_first.txt
└── requirements.txt
├── .bumpversion.cfg
├── tests
├── dbbackend
│ └── test_mongodb.py
├── backend
│ ├── test_common.py
│ └── test_sklearn_backend.py
├── test_alp.py
└── core
│ └── test_hpoptim.py
├── .editorconfig
├── .coveragerc
├── MANIFEST.in
├── CHANGELOG.rst
├── setup.cfg
├── LICENSE
├── .gitignore
├── install-prereqs.sh
├── ci
├── templates
│ ├── .travis.yml
│ └── appveyor.yml
├── appveyor-with-compiler.cmd
├── bootstrap.py
├── appveyor-download.py
└── appveyor-bootstrap.py
├── .cookiecutterrc
├── appveyor.yml
├── setup.py
├── CONTRIBUTING.rst
├── .travis.yml
├── tox.ini
└── README.rst
/src/alp/backend/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/alp/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/projectevolution/authors.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../AUTHORS.rst
--------------------------------------------------------------------------------
/docs/projectevolution/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CHANGELOG.rst
--------------------------------------------------------------------------------
/docs/projectevolution/contribute.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CONTRIBUTING.rst
--------------------------------------------------------------------------------
/docs/Userguide/Experiment.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | Experiment
3 | ==========
4 |
5 | Experiment section
6 |
--------------------------------------------------------------------------------
/docs/_static/download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tboquet/python-alp/HEAD/docs/_static/download.png
--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
1 | Reference
2 | =========
3 |
4 | .. toctree::
5 | :glob:
6 |
7 | alp*
8 |
--------------------------------------------------------------------------------
/docs/_static/last_bouquetin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tboquet/python-alp/HEAD/docs/_static/last_bouquetin.png
--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
1 | =======
2 | Authors
3 | =======
4 |
5 | * Thomas Boquet - https://github.com/tboquet
6 | * Paul Lemaître
7 |
8 |
--------------------------------------------------------------------------------
/docs/FirstSteps/index_first_steps.rst:
--------------------------------------------------------------------------------
1 | First steps with Alp
2 | --------------------
3 |
4 | .. include :: why-alp.rst
5 |
--------------------------------------------------------------------------------
/req/requirements_cli.txt:
--------------------------------------------------------------------------------
1 | wheel
2 | cython
3 | numpy
4 | pymongo
5 | pyyaml
6 | six>=1.10
7 | Click
8 | pandas
9 | docker-py
10 |
--------------------------------------------------------------------------------
/docs/Setup/how_does_it_work.rst:
--------------------------------------------------------------------------------
1 | ===================
2 | How does ALP works?
3 | ===================
4 |
5 | it seems complicated but fear not.
6 |
--------------------------------------------------------------------------------
/src/alp/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .appcom import *
3 |
4 | __all__ = ["Experiment"]
5 |
6 | __version__ = "0.3.0"
7 |
--------------------------------------------------------------------------------
/docs/Setup/index_setup.rst:
--------------------------------------------------------------------------------
1 | How to setup ALP?
2 | -----------------
3 |
4 | .. include :: requirements.rst
5 | .. include :: config_CLI_launch.rst
6 |
--------------------------------------------------------------------------------
/req/requirements_first.txt:
--------------------------------------------------------------------------------
1 | wheel
2 | cython
3 | numpy
4 | pymongo
5 | h5py
6 | Pillow
7 | pyyaml
8 | six>=1.10
9 | Click
10 | pandas
11 | docker-py
12 |
--------------------------------------------------------------------------------
/req/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | numpy
3 | pymongo
4 | -e git+git://github.com/Theano/Theano.git#egg=package
5 | git+git://github.com/fchollet/keras.git
6 | celery
7 | dill
8 |
--------------------------------------------------------------------------------
/docs/spelling_wordlist.txt:
--------------------------------------------------------------------------------
1 | builtin
2 | builtins
3 | classmethod
4 | staticmethod
5 | classmethods
6 | staticmethods
7 | args
8 | kwargs
9 | callstack
10 | Changelog
11 | Indices
12 |
--------------------------------------------------------------------------------
/docs/projectevolution/index_projectevolution.rst:
--------------------------------------------------------------------------------
1 | Project evolution
2 | ---------------------------
3 |
4 | .. include:: contribute.rst
5 | .. include:: changelog.rst
6 | .. include:: authors.rst
7 |
--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 0.3.0
3 | commit = True
4 | tag = True
5 |
6 | [bumpversion:file:setup.py]
7 |
8 | [bumpversion:file:docs/conf.py]
9 |
10 | [bumpversion:file:src/alp/__init__.py]
11 |
12 |
--------------------------------------------------------------------------------
/docs/Tutorials/index_tuto.rst:
--------------------------------------------------------------------------------
1 | Some tutorials and usecases
2 | ---------------------------
3 |
4 | .. include:: tuto0.rst
5 | .. include:: tuto1.rst
6 | .. include:: tuto2.rst
7 | .. include:: tuto3.rst
8 | .. include:: tuto4.rst
--------------------------------------------------------------------------------
/docs/_templates/last_modified.html:
--------------------------------------------------------------------------------
1 |
2 | {%- if last_updated %}
3 | {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
4 | {%- endif %}
5 |
6 |
7 |
10 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | requests>=2.12.3
3 | keras
4 | pymongo
5 | scikit-learn
6 | dill
7 | mock
8 | celery
9 | sphinx>=1.3
10 | sphinx-py3doc-enhanced-theme
11 | sphinxcontrib-httpdomain>=1.4.0
12 | sphinxcontrib-napoleon>=0.5.0
13 | progressbar2
14 | -e .
15 |
--------------------------------------------------------------------------------
/tests/dbbackend/test_mongodb.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from alp.dbbackend import mongo_backend as mgb
4 |
5 |
6 | def test_create_db():
7 | mgb.create_db(True)
8 | mgb.create_db(False)
9 |
10 |
11 | if __name__ == "__main__":
12 | pytest.main([__file__])
13 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # see http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | end_of_line = lf
6 | trim_trailing_whitespace = true
7 | insert_final_newline = true
8 | indent_style = space
9 | indent_size = 4
10 | charset = utf-8
11 |
12 | [*.{bat,cmd,ps1}]
13 | end_of_line = crlf
14 |
--------------------------------------------------------------------------------
/docs/reference/alp.backend.rst:
--------------------------------------------------------------------------------
1 | alp.backend package
2 | ===================
3 |
4 | .. automodule:: alp.backend.keras_backend
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 |
10 | .. automodule:: alp.backend.sklearn_backend
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
--------------------------------------------------------------------------------
/docs/Userguide/index_userguide.rst:
--------------------------------------------------------------------------------
1 | Userguide
2 | ---------
3 |
4 | In this user guide we explain with more details how to use the architecture and the main objects available in ALP.
5 |
6 | .. warning::
7 |
8 | The userguide is currently under construction. Please visit this section in a few days.
9 |
10 | .. include :: Services.rst
11 | .. include :: Experiment.rst
12 |
--------------------------------------------------------------------------------
/docs/reference/alp.appcom.rst:
--------------------------------------------------------------------------------
1 | alp.appcom package
2 | ==================
3 |
4 | .. automodule:: alp.appcom.core
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 | .. automodule:: alp.appcom.ensembles
10 | :members:
11 | :undoc-members:
12 | :show-inheritance:
13 |
14 | .. automodule:: alp.appcom.utils
15 | :members:
16 | :undoc-members:
17 | :show-inheritance:
18 |
--------------------------------------------------------------------------------
/src/alp/celapp.py:
--------------------------------------------------------------------------------
1 | """
2 | Celery config
3 | =============
4 | """
5 |
6 | from celery import Celery
7 | from . import appcom as apc
8 |
9 |
10 | RESULT_SERIALIZER = 'json'
11 |
12 | app = Celery(broker=apc._broker,
13 | backend=apc._backend)
14 |
15 | app.conf.update(task_serializer='pickle',
16 | result_serializer=RESULT_SERIALIZER,
17 | accept_content=['pickle', 'json'])
18 |
--------------------------------------------------------------------------------
/tests/backend/test_common.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from alp.appcom.utils import imports
3 |
4 |
5 | def test_imports():
6 | import numpy
7 |
8 | @imports()
9 | def dummy():
10 | return 0
11 |
12 | assert dummy() == 0
13 |
14 | @imports([numpy])
15 | def ones_check():
16 | return numpy.ones((1))
17 |
18 | assert ones_check().sum() == 1
19 |
20 |
21 | if __name__ == "__main__":
22 | pytest.main([__file__])
23 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [paths]
2 | source =
3 | src/alp
4 | */site-packages/alp
5 |
6 | [run]
7 | branch = True
8 | source = alp
9 | parallel = true
10 |
11 | [report]
12 | show_missing = true
13 | precision = 2
14 | omit = *migrations*
15 | exclude_lines =
16 | pragma: no cover
17 | raise Exception
18 | except Exception as e:
19 | except Exception:
20 | raise NotImplementedError
21 | except MemoryError
22 | raise TypeError
23 | except KeyboardInterrupt
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | graft docs
2 | graft examples
3 | graft src
4 | graft ci
5 | graft tests
6 |
7 | include .bumpversion.cfg
8 | include .coveragerc
9 | include .cookiecutterrc
10 | include .editorconfig
11 | include .isort.cfg
12 | include *.sh
13 | include req/requirements.txt
14 | include req/requirements_first.txt
15 | include req/requirements_cli.txt
16 |
17 | include AUTHORS.rst
18 | include CHANGELOG.rst
19 | include CONTRIBUTING.rst
20 | include LICENSE
21 | include README.rst
22 |
23 | include tox.ini .travis.yml appveyor.yml
24 |
25 | global-exclude *.py[cod] __pycache__ *.so *.dylib
26 |
--------------------------------------------------------------------------------
/docs/reference/alp.rst:
--------------------------------------------------------------------------------
1 | alp
2 | ===
3 |
4 | .. testsetup::
5 |
6 | from alp import *
7 |
8 | .. automodule:: alp
9 | :members:
10 |
11 | ..
12 | Subpackages
13 | -----------
14 |
15 | .. toctree::
16 |
17 | alp.appcom
18 | alp.backend
19 | alp.dbbackend
20 |
21 | alp.celapp module
22 | ------------------
23 |
24 | .. automodule:: alp.celapp
25 | :members:
26 | :undoc-members:
27 | :show-inheritance:
28 |
29 | alp.config module
30 | -----------------
31 |
32 | .. automodule:: alp.config
33 | :members:
34 | :undoc-members:
35 | :show-inheritance:
36 |
--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
1 | =========
2 | Changelog
3 | =========
4 |
5 |
6 | 0.3.0 (2017-01-17)
7 | ==================
8 |
9 | * Command Line Interface to launch services.
10 | * sklearn backend is stable with 12 models supported and all sklearn metrics
11 | * Keras backend supports custom objects
12 | * asynchronous fit is stable for all backends
13 | * fuel generators are supported as training data and validation data source
14 | * Ensemble class in core (abstraction for many models)
15 | * Basic HyperParameter optimisation
16 | * Better documentation
17 |
18 |
19 | 0.2.0 (2016-04-21)
20 | ==================
21 |
22 | * Keras backend is stable
23 |
24 |
25 | 0.1.0 (2016-04-12)
26 | ==================
27 |
28 | * First release
29 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 |
4 | [flake8]
5 | max-line-length = 140
6 | exclude = tests/*,*/migrations/*,*/south_migrations/*
7 |
8 | [tool:pytest]
9 | norecursedirs =
10 | .git
11 | .tox
12 | .env
13 | dist
14 | build
15 | south_migrations
16 | migrations
17 | python_files =
18 | test_*.py
19 | *_test.py
20 | tests.py
21 | addopts =
22 | -rxEfsw
23 | --strict
24 | --doctest-modules
25 | --doctest-glob=\*.rst
26 | --tb=short
27 |
28 | [isort]
29 | force_single_line=True
30 | line_length=120
31 | known_first_party=alp
32 | default_section=THIRDPARTY
33 | forced_separate=test_alp
34 | not_skip = __init__.py
35 | skip = migrations, south_migrations
36 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | ===============================
2 | Welcome on ALP's documentation!
3 | ===============================
4 |
5 | ALP helps you experiment with a lot of machine learning models quickly. It provides you with a simple way of scheduling and recording experiments.
6 | This library has been developped to work well with Keras and Scikit-learn but can suit a lot of other frameworks.
7 |
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 |
12 | FirstSteps/index_first_steps
13 | Setup/index_setup
14 | Tutorials/index_tuto
15 | Userguide/index_userguide
16 | reference/index
17 | projectevolution/index_projectevolution
18 |
19 |
20 | Indices and tables
21 | ==================
22 |
23 | * :ref:`genindex`
24 | * :ref:`modindex`
25 | * :ref:`search`
26 |
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | All contributions by Thomas Boquet:
2 | Copyright (c) 2016 Thomas Boquet
3 |
4 | All contributions by R2000 inc.:
5 | Copyright (c) 2016 R2000 inc.
6 |
7 | All other contributions:
8 | Copyright (c) 2016, the respective contributors.
9 |
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 |
14 | http://www.apache.org/licenses/LICENSE-2.0
15 |
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist
10 | build
11 | eggs
12 | .eggs
13 | parts
14 | bin
15 | var
16 | sdist
17 | develop-eggs
18 | .installed.cfg
19 | lib
20 | lib64
21 | venv*/
22 | pyvenv*/
23 |
24 | # Installer logs
25 | pip-log.txt
26 |
27 | # Unit test / coverage reports
28 | .coverage
29 | .tox
30 | .coverage.*
31 | nosetests.xml
32 | coverage.xml
33 | htmlcov
34 |
35 | # Translations
36 | *.mo
37 |
38 | # Mr Developer
39 | .mr.developer.cfg
40 | .project
41 | .pydevproject
42 | .idea
43 | *.iml
44 | *.komodoproject
45 |
46 | # Complexity
47 | output/*.html
48 | output/*/index.html
49 |
50 | # Sphinx
51 | docs/_build
52 |
53 | .DS_Store
54 | *~
55 | .*.sw[po]
56 | .build
57 | .ve
58 | .env
59 | .cache
60 | .pytest
61 | .bootstrap
62 | .appveyor.token
63 | *.bak
64 |
--------------------------------------------------------------------------------
/install-prereqs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # launch the mongo model docker container
4 |
5 | echo "Building ..."
6 | echo "Launch the MongoDB models container ..."
7 | docker run --name mongo_models -v /opt/data/mongo_data/models:/data/db -d --restart=always mongo
8 | echo -e "\n"
9 |
10 | echo "Launch the MongoDB results container ..."
11 | # launch the mongo results docker container
12 | docker run --name mongo_results -v /opt/data/mongo_data/results:/data/db -d --restart=always mongo
13 | echo -e "\n"
14 |
15 | echo "Launch the Rabbitmq broker container ..."
16 | # start the rabbitmq broker
17 | docker run -d -v /etc/localtime:/etc/localtime:ro \
18 | -v /opt/data/rabbitmq/dev/log:/dev/log -v /opt/data/rabbitmq:/var/lib/rabbitmq \
19 | --name=rabbitmq_sched -p 8080:15672 -p 5672:5672\
20 | --restart=always rabbitmq:3-management
21 | echo -e "\n"
22 |
23 |
--------------------------------------------------------------------------------
/ci/templates/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python: '3.5'
3 | sudo: false
4 | env:
5 | global:
6 | - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so
7 | - SEGFAULT_SIGNALS=all
8 | matrix:
9 | - TOXENV=check
10 | - TOXENV=docs
11 | {% for env in tox_environments %}{{ '' }}
12 | - TOXENV={{ env }},coveralls,codecov
13 | {% endfor %}
14 |
15 | before_install:
16 | - python --version
17 | - uname -a
18 | - lsb_release -a
19 | install:
20 | - pip install tox
21 | - virtualenv --version
22 | - easy_install --version
23 | - pip --version
24 | - tox --version
25 | script:
26 | - tox -v
27 | after_failure:
28 | - more .tox/log/* | cat
29 | - more .tox/*/log/* | cat
30 | before_cache:
31 | - rm -rf $HOME/.cache/pip/log
32 | cache:
33 | directories:
34 | - $HOME/.cache/pip
35 | notifications:
36 | email:
37 | on_success: never
38 | on_failure: always
39 |
--------------------------------------------------------------------------------
/docs/Setup/requirements.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Requirements
3 | ============
4 |
5 | Because the whole architecture has a lot of components we use Docker_ to manage the platform and isolates the services.
6 |
7 | ALP has been developed to run on Ubuntu and has not been tested on other OS.
8 |
9 | You should first `install Docker`_ and `install nvidia-docker`_, then play a bit with docker (check if you can access your GPU with nvidia-docker). Then, you should be ready to install ALP.
10 |
11 | You can then get ALP via pip:
12 |
13 | .. code-block:: bash
14 |
15 | pip install git+git://github.com/tboquet/python-alp
16 |
17 |
18 | That will install ALP on your machine, and you will be able to launch it via the Command Line Interface.
19 |
20 | .. _Docker: https://www.docker.com/
21 | .. _`nvidia-docker`: https://github.com/NVIDIA/nvidia-docker
22 | .. _`install Docker`: https://docs.docker.com/engine/installation/linux/ubuntulinux/
23 | .. _`install nvidia-docker`: https://github.com/NVIDIA/nvidia-docker/wiki/Installation
24 |
25 |
--------------------------------------------------------------------------------
/docs/Userguide/Services.rst:
--------------------------------------------------------------------------------
1 | ========
2 | Services
3 | ========
4 |
5 | In this section we describe the different services (such as the Jupyter Notebook, RabbitMQ, the Models databases ...) running in separated Docker containers (resp. the Controller, the Broker, Mongos Models ...). As we tried to separate the services as much as possible, sometimes the container is assimilated to the service.
6 |
7 | Controller
8 | ~~~~~~~~~~
9 |
10 | The Controller is the user endpoint of the library. By default, it serves a Jupyter notebook in which the user sends commands (such as `import alp`). You can also use it to run an application using ALP for either training or prediction.
11 |
12 | Mongo Models
13 | ~~~~~~~~~~~~
14 |
15 | Mongo Models is a container that runs a MongoDB service in which the architecture of the models that are trained through ALP are saved.
16 |
17 |
18 | Mongo Results
19 | ~~~~~~~~~~~~~
20 |
21 | Mongo Results is a container that runs a MongoDB service in wich the meta informations about a tasks is saved.
22 |
23 | Broker
24 | ~~~~~~
25 |
26 | Also called scheduler in the architecture, it distributes the tasks and gather the results.
27 |
28 | Worker(s)
29 | ~~~~~~~~~
30 |
31 | The workers run the tasks and send results to the MongoDB services. Each backend need at least one worker consuming from the right queue.
32 |
33 | Job monitor
34 | ~~~~~~~~~~~
35 |
36 | You can plug several containers to monitor jobs.
37 |
38 |
39 |
--------------------------------------------------------------------------------
/src/alp/appcom/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 |
4 | import json
5 | import os
6 |
7 | from .core import *
8 |
9 | _alp_base_dir = os.path.expanduser('~')
10 | if not os.access(_alp_base_dir, os.W_OK): # pragma: no cover
11 | _alp_base_dir = '/tmp'
12 |
13 |
14 | _alp_dir = os.path.join(_alp_base_dir, '.alp')
15 | if not os.path.exists(_alp_dir): # pragma: no cover
16 | os.makedirs(_alp_dir)
17 |
18 | # Defaults
19 |
20 | # App config
21 | _broker = 'amqp://guest:guest@rabbitmq:5672//'
22 | _backend = 'mongodb://mongo_r:27017'
23 |
24 | # Parameters
25 | _path_h5 = '/parameters_h5/'
26 |
27 | if os.getenv("TEST_MODE") == "ON": # pragma: no cover
28 | _backend = 'mongodb://127.0.0.1:27018'
29 | _broker = 'amqp://guest:guest@localhost:5672//'
30 |
31 | elif os.getenv("WORKER") == "TRUE": # pragma: no cover
32 | _backend = 'mongodb://mongo_r:27017' # pragma: no cover
33 | _config_path = os.path.expanduser(os.path.join(_alp_dir, 'alpapp.json'))
34 |
35 | if os.path.exists(_config_path): # pragma: no cover
36 | _config = json.load(open(_config_path))
37 | _broker = _config.get('broker', 'amqp://guest:guest@rabbitmq:5672//')
38 | _backend = _config.get('backend', 'mongodb://mongo_r:27017')
39 | _path_h5 = _config.get('path_h5', '/parameters_h5/')
40 |
41 | # save config file
42 | _config = {'broker': _broker,
43 | 'backend': _backend,
44 | 'path_h5': _path_h5}
45 |
46 | with open(_config_path, 'w') as f:
47 | f.write(json.dumps(_config, indent=4))
48 |
49 | __all__ = ["Experiment"]
50 |
--------------------------------------------------------------------------------
/ci/appveyor-with-compiler.cmd:
--------------------------------------------------------------------------------
1 | :: To build extensions for 64 bit Python 3, we need to configure environment
2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
4 | ::
5 | :: To build extensions for 64 bit Python 2, we need to configure environment
6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
8 | ::
9 | :: 32 bit builds do not require specific environment configurations.
10 | ::
11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
12 | :: cmd interpreter, at least for (SDK v7.0)
13 | ::
14 | :: More details at:
15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
16 | :: http://stackoverflow.com/a/13751649/163740
17 | ::
18 | :: Author: Olivier Grisel
19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
20 | SET COMMAND_TO_RUN=%*
21 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
22 | SET WIN_WDK="c:\Program Files (x86)\Windows Kits\10\Include\wdf"
23 | ECHO SDK: %WINDOWS_SDK_VERSION% ARCH: %PYTHON_ARCH%
24 |
25 |
26 | IF "%PYTHON_VERSION%"=="3.5" (
27 | IF EXIST %WIN_WDK% (
28 | REM See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/
29 | REN %WIN_WDK% 0wdf
30 | )
31 | GOTO main
32 | )
33 |
34 | IF "%PYTHON_ARCH%"=="32" (
35 | GOTO main
36 | )
37 |
38 | SET DISTUTILS_USE_SDK=1
39 | SET MSSdk=1
40 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
41 | CALL "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
42 |
43 | :main
44 |
45 | ECHO Executing: %COMMAND_TO_RUN%
46 | CALL %COMMAND_TO_RUN% || EXIT 1
47 |
--------------------------------------------------------------------------------
/docs/Setup/config_CLI_launch.rst:
--------------------------------------------------------------------------------
1 | ==========================
2 | Launching ALP with the CLI
3 | ==========================
4 |
5 | To begin, we can generate a base configuration using ALP CLI. We choose to write configuration files on the host machine in order to be able to customize them easily afterwards.
6 |
7 |
8 | .. code-block:: bash
9 |
10 | alp --help
11 |
12 | Will provide you with some help about the command line interface.
13 |
14 | Generating a new configuration is as easy as:
15 |
16 | .. code-block:: bash
17 |
18 | alp --verbose genconfig --outdir=/path/to/a/directory
19 |
20 |
21 | The command will generate a base configuration with one controler, one scikit learn worker and one keras worker.
22 | We specify the output directory where we want to write the three configuration files. The first file :code:`alpdb.json` defines the connection between the database of models and other containers. The second file :code:`alpapp.json` defines the connections between the broker, its database and the other containers. The third file :code:`containers.json` defines all the containers of the architecture. The linking is automatically done and ALP will use the newly created files to launch a new instance.
23 |
24 | In any case, verify that the ports that you want to use are free for the broker to communicate with the monitoring containers and for the jupyter notebooks (if any) to run.
25 |
26 | To start all the services you can use :code:`alp service start`:
27 |
28 | .. code-block:: bash
29 |
30 | alp --verbose service start /path/to/a/directory
31 |
32 | You can then take a look at the status of the containers:
33 |
34 | .. code-block:: bash
35 |
36 | alp --verbose status /path/to/a/directory
37 |
38 |
39 | You should be able to access the Jupyter notebook on the port :code:`440` of the machine where you launched the services.
40 |
--------------------------------------------------------------------------------
/ci/templates/appveyor.yml:
--------------------------------------------------------------------------------
1 | version: '{branch}-{build}'
2 | build: off
3 | cache:
4 | - '%LOCALAPPDATA%\pip\Cache'
5 | environment:
6 | global:
7 | WITH_COMPILER: 'cmd /E:ON /V:ON /C .\ci\appveyor-with-compiler.cmd'
8 | matrix:
9 | - TOXENV: check
10 | PYTHON_HOME: C:\Python27
11 | PYTHON_VERSION: '2.7'
12 | PYTHON_ARCH: '32'
13 |
14 | {% for env in tox_environments %}{% if env.startswith(('py27', 'py34', 'py35')) %}
15 | - TOXENV: '{{ env }},codecov'
16 | TOXPYTHON: C:\Python{{ env[2:4] }}\python.exe
17 | PYTHON_HOME: C:\Python{{ env[2:4] }}
18 | PYTHON_VERSION: '{{ env[2] }}.{{ env[3] }}'
19 | PYTHON_ARCH: '32'
20 |
21 | - TOXENV: '{{ env }},codecov'
22 | TOXPYTHON: C:\Python{{ env[2:4] }}-x64\python.exe
23 | {%- if env.startswith(('py2', 'py33', 'py34')) %}
24 |
25 | WINDOWS_SDK_VERSION: v7.{{ '1' if env.startswith('py3') else '0' }}
26 | {%- endif %}
27 |
28 | PYTHON_HOME: C:\Python{{ env[2:4] }}-x64
29 | PYTHON_VERSION: '{{ env[2] }}.{{ env[3] }}'
30 | PYTHON_ARCH: '64'
31 |
32 | {% endif %}{% endfor %}
33 | init:
34 | - ps: echo $env:TOXENV
35 | - ps: ls C:\Python*
36 | install:
37 | - python -u ci\appveyor-bootstrap.py
38 | - '%PYTHON_HOME%\Scripts\virtualenv --version'
39 | - '%PYTHON_HOME%\Scripts\easy_install --version'
40 | - '%PYTHON_HOME%\Scripts\pip --version'
41 | - '%PYTHON_HOME%\Scripts\tox --version'
42 | test_script:
43 | - '%WITH_COMPILER% %PYTHON_HOME%\Scripts\tox'
44 |
45 | on_failure:
46 | - ps: dir "env:"
47 | - ps: get-content .tox\*\log\*
48 | artifacts:
49 | - path: dist\*
50 |
51 | ### To enable remote debugging uncomment this (also, see: http://www.appveyor.com/docs/how-to/rdp-to-build-worker):
52 | # on_finish:
53 | # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
54 |
--------------------------------------------------------------------------------
/src/alp/dbbackend/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 |
4 | import json
5 | import os
6 |
7 |
8 | _alp_base_dir = os.path.expanduser('~')
9 | if not os.access(_alp_base_dir, os.W_OK): # pragma: no cover
10 | _alp_base_dir = '/tmp'
11 |
12 |
13 | _alp_dir = os.path.join(_alp_base_dir, '.alp')
14 | if not os.path.exists(_alp_dir): # pragma: no cover
15 | os.makedirs(_alp_dir)
16 |
17 | _db_engine = 'mongodb'
18 | _host_adress = 'mongo_m'
19 | _host_port = 27017
20 | _db_name = 'modelization'
21 | _models_collection = 'models'
22 | _generators_collection = 'generators'
23 |
24 | if os.getenv("TEST_MODE") == "ON": # pragma: no cover
25 | _host_adress = '127.0.0.1'
26 |
27 | # note: we have to be able to accept other structures
28 |
29 | _config_path = os.path.expanduser(os.path.join(_alp_dir, 'alpdb.json'))
30 | if os.path.exists(_config_path): # pragma: no cover
31 | _config = json.load(open(_config_path))
32 | _db_engine = _config.get('db_engine', 'mongodb')
33 | assert _db_engine in {'mongodb'}
34 | _host_adress = _config.get('host_adress', 'mongo_m')
35 | _host_port = _config.get('host_port', 27017)
36 | _db_name = _config.get('db_name', 'modelization')
37 | _models_collection = _config.get('_models_collection', 'models')
38 | _generators_collection = _config.get('_generators_collection', 'models')
39 |
40 | # save config file
41 | _config = {'db_engine': _db_engine,
42 | 'host_adress': _host_adress,
43 | 'host_port': _host_port,
44 | 'db_name': _db_name,
45 | 'models_collection': _models_collection,
46 | 'generators_collection': _generators_collection}
47 |
48 | with open(_config_path, 'w') as f:
49 | f.write(json.dumps(_config, indent=4))
50 |
51 | # import backend
52 | if _db_engine == 'mongodb':
53 | from ..dbbackend.mongo_backend import * # NOQA
54 | else:
55 | raise Exception('Unknown backend: ' + str(_db_engine))
56 |
--------------------------------------------------------------------------------
/.cookiecutterrc:
--------------------------------------------------------------------------------
1 | # This file exists so you can easily regenerate your project.
2 | #
3 | # `cookiepatcher` is a convenient shim around `cookiecutter`
4 | # for regenerating projects (it will generate a .cookiecutterrc
5 | # automatically for any template). To use it:
6 | #
7 | # pip install cookiepatcher
8 | # cookiepatcher gh:ionelmc/cookiecutter-pylibrary project-path
9 | #
10 | # See:
11 | # https://pypi.python.org/pypi/cookiecutter
12 | #
13 | # Alternatively, you can run:
14 | #
15 | # cookiecutter --overwrite-if-exists --config-file=project-path/.cookiecutterrc gh:ionelmc/cookiecutter-pylibrary
16 |
17 | default_context:
18 |
19 | appveyor: 'yes'
20 | c_extension_cython: 'no'
21 | c_extension_optional: 'no'
22 | c_extension_support: 'no'
23 | codacy: 'yes'
24 | codeclimate: 'yes'
25 | codecov: 'yes'
26 | command_line_interface: 'plain'
27 | command_line_interface_bin_name: 'alp'
28 | coveralls: 'yes'
29 | distribution_name: 'alp'
30 | email: 'thomas.boquet@hec.ca'
31 | full_name: 'Thomas Boquet'
32 | github_username: 'tboquet'
33 | landscape: 'no'
34 | package_name: 'alp'
35 | project_name: 'ALP'
36 | project_short_description: 'Machine learning for teams'
37 | release_date: 'today'
38 | repo_name: 'python-alp'
39 | requiresio: 'yes'
40 | scrutinizer: 'no'
41 | sphinx_doctest: 'yes'
42 | sphinx_theme: 'sphinx-py3doc-enhanced-theme'
43 | test_matrix_configurator: 'no'
44 | test_matrix_separate_coverage: 'no'
45 | test_runner: 'pytest'
46 | travis: 'yes'
47 | version: '0.1.0'
48 | website: 'https://tboquet.github.io'
49 | year: 'now'
50 |
--------------------------------------------------------------------------------
/ci/bootstrap.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from __future__ import absolute_import, print_function, unicode_literals
4 |
5 | import os
6 | import sys
7 | from os.path import exists
8 | from os.path import join
9 | from os.path import dirname
10 | from os.path import abspath
11 |
12 |
13 | if __name__ == "__main__":
14 | base_path = dirname(dirname(abspath(__file__)))
15 | print("Project path: {0}".format(base_path))
16 | env_path = join(base_path, ".tox", "bootstrap")
17 | if sys.platform == "win32":
18 | bin_path = join(env_path, "Scripts")
19 | else:
20 | bin_path = join(env_path, "bin")
21 | if not exists(env_path):
22 | import subprocess
23 | print("Making bootstrap env in: {0} ...".format(env_path))
24 | try:
25 | subprocess.check_call(["virtualenv", env_path])
26 | except Exception:
27 | subprocess.check_call([sys.executable, "-m", "virtualenv", env_path])
28 | print("Installing `jinja2` into bootstrap environment ...")
29 | subprocess.check_call([join(bin_path, "pip"), "install", "jinja2"])
30 | activate = join(bin_path, "activate_this.py")
31 | exec(compile(open(activate, "rb").read(), activate, "exec"), dict(__file__=activate))
32 |
33 | import jinja2
34 |
35 | import subprocess
36 |
37 |
38 | jinja = jinja2.Environment(
39 | loader=jinja2.FileSystemLoader(join(base_path, "ci", "templates")),
40 | trim_blocks=True,
41 | lstrip_blocks=True,
42 | keep_trailing_newline=True
43 | )
44 |
45 | tox_environments = [
46 | line.strip()
47 | for line in subprocess.check_output(['tox', '--listenvs'], universal_newlines=True).splitlines()
48 | ]
49 | tox_environments = [line for line in tox_environments if line not in ['clean', 'report', 'docs', 'check']]
50 |
51 |
52 | for name in os.listdir(join("ci", "templates")):
53 | with open(join(base_path, name), "w") as fh:
54 | fh.write(jinja.get_template(name).render(tox_environments=tox_environments))
55 | print("Wrote {}".format(name))
56 | print("DONE.")
57 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | version: '{branch}-{build}'
2 | build: off
3 | cache:
4 | - '%LOCALAPPDATA%\pip\Cache'
5 | environment:
6 | global:
7 | WITH_COMPILER: 'cmd /E:ON /V:ON /C .\ci\appveyor-with-compiler.cmd'
8 | matrix:
9 | - TOXENV: check
10 | PYTHON_HOME: C:\Python27
11 | PYTHON_VERSION: '2.7'
12 | PYTHON_ARCH: '32'
13 |
14 | - TOXENV: 'py27,codecov'
15 | TOXPYTHON: C:\Python27\python.exe
16 | PYTHON_HOME: C:\Python27
17 | PYTHON_VERSION: '2.7'
18 | PYTHON_ARCH: '32'
19 |
20 | - TOXENV: 'py27,codecov'
21 | TOXPYTHON: C:\Python27-x64\python.exe
22 | WINDOWS_SDK_VERSION: v7.0
23 | PYTHON_HOME: C:\Python27-x64
24 | PYTHON_VERSION: '2.7'
25 | PYTHON_ARCH: '64'
26 |
27 | - TOXENV: 'py34,codecov'
28 | TOXPYTHON: C:\Python34\python.exe
29 | PYTHON_HOME: C:\Python34
30 | PYTHON_VERSION: '3.4'
31 | PYTHON_ARCH: '32'
32 |
33 | - TOXENV: 'py34,codecov'
34 | TOXPYTHON: C:\Python34-x64\python.exe
35 | WINDOWS_SDK_VERSION: v7.1
36 | PYTHON_HOME: C:\Python34-x64
37 | PYTHON_VERSION: '3.4'
38 | PYTHON_ARCH: '64'
39 |
40 | - TOXENV: 'py35,codecov'
41 | TOXPYTHON: C:\Python35\python.exe
42 | PYTHON_HOME: C:\Python35
43 | PYTHON_VERSION: '3.5'
44 | PYTHON_ARCH: '32'
45 |
46 | - TOXENV: 'py35,codecov'
47 | TOXPYTHON: C:\Python35-x64\python.exe
48 | PYTHON_HOME: C:\Python35-x64
49 | PYTHON_VERSION: '3.5'
50 | PYTHON_ARCH: '64'
51 |
52 | init:
53 | - ps: echo $env:TOXENV
54 | - ps: ls C:\Python*
55 | install:
56 | - python -u ci\appveyor-bootstrap.py
57 | - '%PYTHON_HOME%\Scripts\virtualenv --version'
58 | - '%PYTHON_HOME%\Scripts\easy_install --version'
59 | - '%PYTHON_HOME%\Scripts\pip --version'
60 | - '%PYTHON_HOME%\Scripts\tox --version'
61 | test_script:
62 | - '%WITH_COMPILER% %PYTHON_HOME%\Scripts\tox'
63 |
64 | on_failure:
65 | - ps: dir "env:"
66 | - ps: get-content .tox\*\log\*
67 | artifacts:
68 | - path: dist\*
69 |
70 | ### To enable remote debugging uncomment this (also, see: http://www.appveyor.com/docs/how-to/rdp-to-build-worker):
71 | # on_finish:
72 | # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
73 |
--------------------------------------------------------------------------------
/src/alp/dbbackend/mongo_backend.py:
--------------------------------------------------------------------------------
1 | """
2 | Model database setup
3 | ====================
4 | """
5 |
6 | from pymongo import DESCENDING
7 | from pymongo import MongoClient
8 | from pymongo import ReturnDocument
9 | from ..dbbackend import _db_name
10 | from ..dbbackend import _generators_collection
11 | from ..dbbackend import _host_adress
12 | from ..dbbackend import _host_port
13 | from ..dbbackend import _models_collection
14 |
15 |
16 | def get_models():
17 | """Utility function to retrieve the collection of models
18 |
19 | Returns:
20 | the collection of models"""
21 | client = MongoClient(_host_adress, _host_port)
22 | modelization = client[_db_name]
23 | return modelization[_models_collection]
24 |
25 |
26 | def get_generators():
27 | """Utility function to retrieve the collection of generators
28 |
29 | Returns:
30 | the collection of generators"""
31 | client = MongoClient(_host_adress, _host_port)
32 | modelization = client[_db_name]
33 | return modelization[_generators_collection]
34 |
35 |
36 | def insert(full_json, collection, upsert=False):
37 | """Insert an observation in the db
38 |
39 | Args:
40 | full_json(dict): a dictionnary mapping variable names to
41 | carateristics of object. This dictionnary must have the
42 | mod_data_id key.
43 |
44 | Returns:
45 | the id of the inserted object in the db"""
46 | filter_db = dict()
47 | filter_db['mod_data_id'] = full_json['mod_data_id']
48 | doc_id = collection.find_one(filter_db)
49 | if doc_id is not None:
50 | doc_id = doc_id['_id']
51 | if upsert is True:
52 | inserted = collection.find_one_and_update(
53 | filter_db, {'$set': full_json}, upsert=upsert,
54 | return_document=ReturnDocument.AFTER)
55 | inserted = inserted['_id']
56 | else:
57 | inserted = collection.insert_one(full_json).inserted_id
58 | return inserted
59 |
60 |
61 | def update(inserted_id, json_changes):
62 | """Update an observation in the db
63 |
64 | Args:
65 | insert_id(int): the id of the observation
66 | json_changes(dict): the changes to do in the db"""
67 | models = get_models()
68 | updated = models.update_one(inserted_id, json_changes)
69 | return updated
70 |
71 |
72 | def create_db(drop=True):
73 | """Delete (and optionnaly drop) the modelization database and collection"""
74 | client = MongoClient(_host_adress, _host_port)
75 | modelization = client[_db_name]
76 | if drop:
77 | modelization.drop_collection(_models_collection)
78 | models = modelization['models']
79 | return models.create_index([('mod_data_id', DESCENDING)],
80 | unique=True)
81 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | from __future__ import absolute_import
4 | from __future__ import print_function
5 |
6 | import io
7 | import re
8 | from glob import glob
9 | from os.path import basename
10 | from os.path import dirname
11 | from os.path import join
12 | from os.path import splitext
13 |
14 | from setuptools import find_packages
15 | from setuptools import setup
16 |
17 |
18 | def read(*names, **kwargs):
19 | return io.open(
20 | join(dirname(__file__), *names),
21 | encoding=kwargs.get('encoding', 'utf8')
22 | ).read()
23 |
24 |
25 | setup(
26 | name='alp',
27 | version='0.3.0',
28 | license='BSD',
29 | description='Machine learning for teams',
30 | long_description='%s\n%s' % (
31 | re.compile('^.. start-badges.*^.. end-badges', re.M | re.S).sub('', read('README.rst')),
32 | re.sub(':[a-z]+:`~?(.*?)`', r'``\1``', read('CHANGELOG.rst'))
33 | ),
34 | author='Thomas Boquet',
35 | author_email='thomas.boquet@hec.ca',
36 | url='https://github.com/tboquet/python-alp',
37 | packages=find_packages('src'),
38 | package_dir={'': 'src'},
39 | py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
40 | include_package_data=True,
41 | zip_safe=False,
42 | classifiers=[
43 | # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers
44 | 'Development Status :: 5 - Production/Stable',
45 | 'Intended Audience :: Developers',
46 | 'License :: OSI Approved :: BSD License',
47 | 'Operating System :: Unix',
48 | 'Operating System :: POSIX',
49 | 'Operating System :: Microsoft :: Windows',
50 | 'Programming Language :: Python',
51 | 'Programming Language :: Python :: 2.7',
52 | 'Programming Language :: Python :: 3',
53 | 'Programming Language :: Python :: 3.3',
54 | 'Programming Language :: Python :: 3.4',
55 | 'Programming Language :: Python :: 3.5',
56 | 'Programming Language :: Python :: Implementation :: CPython',
57 | 'Programming Language :: Python :: Implementation :: PyPy',
58 | # uncomment if you test on these interpreters:
59 | # 'Programming Language :: Python :: Implementation :: IronPython',
60 | # 'Programming Language :: Python :: Implementation :: Jython',
61 | # 'Programming Language :: Python :: Implementation :: Stackless',
62 | 'Topic :: Utilities',
63 | ],
64 | keywords=[
65 | # eg: 'keyword1', 'keyword2', 'keyword3',
66 | ],
67 | install_requires=[
68 | # eg: 'aspectlib==1.1.1', 'six>=1.7',
69 | ],
70 | extras_require={
71 | # eg:
72 | # 'rst': ['docutils>=0.11'],
73 | # ':python_version=="2.6"': ['argparse'],
74 | },
75 | entry_points={
76 | 'console_scripts': [
77 | 'alp = alp.cli:main',
78 | ]
79 | },
80 | )
81 |
--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Contributing
3 | ============
4 |
5 | Contributions are welcome, and they are greatly appreciated! Every
6 | little bit helps, and credit will always be given.
7 |
8 | Bug reports
9 | ===========
10 |
11 | When `reporting a bug `_ please include:
12 |
13 | * Your operating system name and version.
14 | * Any details about your local setup that might be helpful in troubleshooting.
15 | * Detailed steps to reproduce the bug.
16 |
17 | Documentation improvements
18 | ==========================
19 |
20 | ALP could always use more documentation, whether as part of the
21 | official ALP docs, in docstrings, or even on the web in blog posts,
22 | articles, and such.
23 |
24 | Feature requests and feedback
25 | =============================
26 |
27 | The best way to send feedback is to file an issue at https://github.com/tboquet/python-alp/issues.
28 |
29 | If you are proposing a feature:
30 |
31 | * Explain in detail how it would work.
32 | * Keep the scope as narrow as possible, to make it easier to implement.
33 | * Remember that this is a volunteer-driven project, and that code contributions are welcome :)
34 |
35 | Development
36 | ===========
37 |
38 | To set up `python-alp` for local development:
39 |
40 | 1. Fork `python-alp `_
41 | (look for the "Fork" button).
42 | 2. Clone your fork locally::
43 |
44 | git clone git@github.com:your_name_here/python-alp.git
45 |
46 | 3. Create a branch for local development::
47 |
48 | git checkout -b name-of-your-bugfix-or-feature
49 |
50 | Now you can make your changes locally.
51 |
52 | 4. When you're done making changes, run all the checks, doc builder and spell checker with `tox `_ one command::
53 |
54 | tox
55 |
56 | 5. Commit your changes and push your branch to GitHub::
57 |
58 | git add .
59 | git commit -m "Your detailed description of your changes."
60 | git push origin name-of-your-bugfix-or-feature
61 |
62 | 6. Submit a pull request through the GitHub website.
63 |
64 | Pull Request Guidelines
65 | =======================
66 |
67 | If you need some code review or feedback while you're developing the code just make the pull request.
68 |
69 | For merging, you should:
70 |
71 | 1. Include passing tests (run ``tox``) [1]_.
72 | 2. Update documentation when there's new API, functionality etc.
73 | 3. Add a note to ``CHANGELOG.rst`` about the changes.
74 | 4. Add yourself to ``AUTHORS.rst``.
75 |
76 | .. [1] If you don't have all the necessary python versions available locally you can rely on Travis - it will
77 | `run the tests `_ for each change you add in the pull request.
78 |
79 | It will be slower though ...
80 |
81 | Tips
82 | ====
83 |
84 | To run a subset of tests::
85 |
86 | tox -e envname -- py.test -k test_myfeature
87 |
88 | To run all the test environments in *parallel* (you need to ``pip install detox``)::
89 |
90 | detox
91 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import unicode_literals
3 |
4 | import os
5 | import alabaster
6 |
7 | extensions = [
8 | 'sphinx.ext.autodoc',
9 | 'sphinx.ext.autosummary',
10 | 'sphinx.ext.coverage',
11 | 'sphinx.ext.doctest',
12 | 'sphinx.ext.extlinks',
13 | 'sphinx.ext.ifconfig',
14 | 'sphinx.ext.napoleon',
15 | 'sphinx.ext.todo',
16 | 'sphinx.ext.viewcode',
17 | 'sphinxcontrib.httpdomain',
18 | 'sphinxcontrib.autohttp.flask',
19 | 'alabaster'
20 | ]
21 | if os.getenv('SPELLCHECK'):
22 | extensions += 'sphinxcontrib.spelling',
23 | spelling_show_suggestions = True
24 | spelling_lang = 'en_US'
25 |
26 | # on_rtd is whether we are on readthedocs.org
27 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
28 |
29 | source_suffix = '.rst'
30 | master_doc = 'index'
31 | project = u'ALP'
32 | year = '2016'
33 | author = u'Thomas Boquet and Paul Lemaître'
34 | copyright = '{0}, {1}'.format(year, author)
35 | version = release = u'0.3.0'
36 |
37 | pygments_style = 'sphinx'
38 | templates_path = ['_templates']
39 | extlinks = {
40 | 'issue': ('https://github.com/tboquet/python-alp/issues/%s', '#'),
41 | 'pr': ('https://github.com/tboquet/python-alp/pull/%s', 'PR #'),
42 | }
43 |
44 | description = 'Schedule and save your machine learning experiments'
45 | # -- Option for HTML output -----------------------------------------------
46 |
47 | html_static_path = ['_static']
48 | html_theme_options = {
49 | 'logo': 'last_bouquetin.svg',
50 | 'logo_name': 'true',
51 | 'description': description,
52 | 'github_button': 'false'
53 | }
54 |
55 | # Custom sidebar templates, maps document names to template names.
56 | html_sidebars = {
57 | '**': [
58 | 'about.html',
59 | 'navigation.html',
60 | 'relations.html',
61 | 'searchbox.html',
62 | 'donate.html',
63 | 'relations.html',
64 | 'last_modified.html'
65 | ]
66 | }
67 |
68 | html_show_sourcelink = True
69 |
70 |
71 | # Add any paths that contain custom themes here, relative to this directory.
72 |
73 | if not on_rtd: # only set the theme if we're building docs locally
74 | html_theme = 'alabaster'
75 | html_theme_path = [alabaster.get_path()]
76 |
77 | else:
78 | from mock import Mock as MagicMock
79 | import sys
80 |
81 | class Mock(MagicMock):
82 | @classmethod
83 | def __getattr__(cls, name):
84 | return Mock()
85 |
86 | # include the names of your minimal required packages here
87 | MOCK_MODULES = ['h5py']
88 | sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
89 |
90 | html_use_smartypants = True
91 | html_last_updated_fmt = '%b %d, %Y'
92 | html_split_index = False
93 |
94 |
95 | html_short_title = '%s-%s' % (project, version)
96 |
97 | napoleon_use_ivar = True
98 | napoleon_use_rtype = False
99 | napoleon_use_param = False
100 |
101 | keep_warnings = True
102 |
103 | add_module_names = False
104 | todo_include_todos = True
105 |
106 | mathjax_path = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'
107 |
--------------------------------------------------------------------------------
/docs/FirstSteps/why-alp.rst:
--------------------------------------------------------------------------------
1 | ========
2 | Why ALP?
3 | ========
4 |
5 | We noticed that, when dealing with a Machine Learning problem, we sometime spend more time working on building a model, testing different architectures, comparing results than actually work on the ideas that will solve our problem. To help that process, we developed an Asynchronous Learning Platform (ALP) that uses the hardware (CPU+GPU) at a convenient capacity. That platform relies on independant services running on Docker containers. For this plateform to be easy to use, we built a convenient command line interface from wich you can easily launch, stop, remove, update and monitor a configuration.
6 |
7 | The whole system runs in the background so that the final user does not directly interact with the databases or the broker and just runs code in an usual Jupyter Notebook or from an application. You can also launch monitoring containers and access different dashboards to supervise all of your experiments. Moreover, it is possible to easily retrieve one of the trained model along with it's parameters at test time.
8 |
9 | ================================
10 | What kind of models can you use?
11 | ================================
12 |
13 | So far, the whole Keras_ neural network library is supported, as well as several models from the `scikit-learn`_ library.
14 |
15 |
16 | ==============================================
17 | What do I need to run ALP? What is inside ALP?
18 | ==============================================
19 |
20 | You need to use a machine running Linux to use ALP [1]_.
21 | ALP relies on Docker, RabbitMQ, Celery, MongoDB and nvidia-docker. It also supports interfacing with Fuel thus depends on Theano. It's implemented in Python. However since all services runs into Docker containers, your OS only needs Docker (and nvidia-docker if you want to use a NVIDIA GPU).
22 |
23 | All of this concepts and dependencies are explained later in the Setup and Userguide sections.
24 |
25 |
26 | ======================
27 | How could ALP help me?
28 | ======================
29 |
30 | We believe it might be useful for several applications such as:
31 |
32 | - **hyperparameters tuning**: for instance if you want to test several architectures on your neural network model, ALP can help you in dealing with the tedious task of logging all the architectures, parameters and results. They are all automatically stored in the databases and you just have to select the best model given the validation(s) you specified.
33 | - **fitting several models on several data streams**: you have data streams coming from a source and you want to fit a lot of online models, it is easy with ALP. With the support of Fuel generators, you could transform your data on the fly. The he learning is then done using the resources of the host and the parameters of the models are stored. You could even code an API that returns prediction to your data service.
34 | - **post analysis**: extract and explore the parameters of models given their score on several data blocks. Sometimes it could be helpful to visualise the successful set of parameters.
35 |
36 | - **model deployment in production**: when a model is trained, you can load it and deploy it instantly in production.
37 |
38 |
39 |
40 | .. [1] unfortunately at the time of the development, running MongoDB in a Windows Docker was not a possibility, but we will check out that soon.
41 |
42 |
43 | .. _Keras: http://keras.io/
44 | .. _`scikit-learn`: http://scikit-learn.org/stable/
45 |
--------------------------------------------------------------------------------
/tests/test_alp.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from click.testing import CliRunner
4 |
5 | from alp.cli import main
6 |
7 |
8 | def init_test_config():
9 | config_path = '/root/.alp/containers_test.json'
10 | if os.getenv('TEST_MODE') == 'ON': # pragma: no cover
11 | config_path = 'containers_test.json'
12 | if not os.path.exists(config_path): # pragma: no cover
13 | config = dict()
14 | config['broker'] = {
15 | "volumes": ["/opt/data2/rabbitmq/dev/log:/dev/log",
16 | "/opt/data2/rabbitmq:/var/lib/rabbitmq"],
17 | "ports": ["8080:15672", "5672:5672"],
18 | "name": "rabbitmq_sched",
19 | "container_name": "rabbitmq:3-management",
20 | "mode": "-d"
21 | }
22 | config['result_db'] = {
23 | "volumes": ["/opt/data2/mongo_data/results:/data/db"],
24 | "name": "mongo_results_test",
25 | "container_name": "mongo",
26 | "mode": "-d"
27 | }
28 | config['model_gen_db'] = {
29 | "volumes": ["/opt/data2/mongo_data/models:/data/db"],
30 | "name": "mongo_models_test",
31 | "container_name": "mongo",
32 | "mode": "-d",
33 | }
34 | config['workers'] = []
35 | config['controlers'] = []
36 |
37 | with open(config_path, 'w') as f:
38 | f.write(json.dumps(config, indent=4))
39 | return config_path
40 |
41 |
42 | config_path = init_test_config()
43 |
44 |
45 | def test_status():
46 | runner = CliRunner()
47 | result = runner.invoke(main, ['status', config_path])
48 | assert result.exit_code == 0
49 | result = runner.invoke(main, ['--verbose', 'status', config_path])
50 | assert result.exit_code == 0
51 |
52 |
53 | def test_service():
54 | runner = CliRunner()
55 | for cm in ['stop', 'rm', 'start', 'restart']:
56 | result = runner.invoke(main, ['service', cm, config_path])
57 | assert result.exit_code == 0
58 | for cm in ['stop', 'rm', 'start', 'restart']:
59 | result = runner.invoke(main, ['--verbose', 'service', cm, config_path])
60 | assert result.exit_code == 0
61 | for cm in ['stop', 'rm', 'start', 'restart']:
62 | result = runner.invoke(main, ['--verbose', 'service', '--dry_run',
63 | cm, config_path])
64 | assert result.exit_code == 0
65 |
66 | def test_update():
67 | runner = CliRunner()
68 | result = runner.invoke(main, ['--verbose', 'update', config_path])
69 | assert result.exit_code == 0
70 |
71 |
72 | def test_pull():
73 | runner = CliRunner()
74 | result = runner.invoke(main, ['pull', config_path])
75 | assert result.exit_code == 0
76 | result = runner.invoke(main, ['--verbose', 'pull', config_path])
77 | assert result.exit_code == 0
78 |
79 |
80 | def test_gen():
81 | user_path = os.path.expanduser('~')
82 | gen_dir = os.path.join(user_path, '.alp')
83 | if not os.path.exists(gen_dir):
84 | os.makedirs(gen_dir)
85 | runner = CliRunner()
86 | result = runner.invoke(main, ['genconfig'])
87 | result = runner.invoke(main, ['genconfig', '--namesuf=test'])
88 | result = runner.invoke(main, ['genconfig', '--cpu'])
89 | assert result.exit_code == 0
90 | result = runner.invoke(main, ['--verbose', 'genconfig',
91 | '--outdir={}'.format(gen_dir)])
92 | assert result.exit_code == 0
93 | result = runner.invoke(main, ['--verbose', 'genconfig',
94 | '--rootfolder={}'.format(gen_dir)])
95 | assert result.exit_code == 0
96 |
--------------------------------------------------------------------------------
/ci/appveyor-download.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Use the AppVeyor API to download Windows artifacts.
4 |
5 | Taken from: https://bitbucket.org/ned/coveragepy/src/tip/ci/download_appveyor.py
6 | # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
7 | # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
8 | """
9 | from __future__ import unicode_literals
10 |
11 | import argparse
12 | import os
13 | import requests
14 | import zipfile
15 |
16 |
17 | def make_auth_headers():
18 | """Make the authentication headers needed to use the Appveyor API."""
19 | path = os.path.expanduser("~/.appveyor.token")
20 | if not os.path.exists(path):
21 | raise RuntimeError(
22 | "Please create a file named `.appveyor.token` in your home directory. "
23 | "You can get the token from https://ci.appveyor.com/api-token"
24 | )
25 | with open(path) as f:
26 | token = f.read().strip()
27 |
28 | headers = {
29 | 'Authorization': 'Bearer {}'.format(token),
30 | }
31 | return headers
32 |
33 |
34 | def download_latest_artifacts(account_project, build_id):
35 | """Download all the artifacts from the latest build."""
36 | if build_id is None:
37 | url = "https://ci.appveyor.com/api/projects/{}".format(account_project)
38 | else:
39 | url = "https://ci.appveyor.com/api/projects/{}/build/{}".format(account_project, build_id)
40 | build = requests.get(url, headers=make_auth_headers()).json()
41 | jobs = build['build']['jobs']
42 | print(u"Build {0[build][version]}, {1} jobs: {0[build][message]}".format(build, len(jobs)))
43 |
44 | for job in jobs:
45 | name = job['name']
46 | print(u" {0}: {1[status]}, {1[artifactsCount]} artifacts".format(name, job))
47 |
48 | url = "https://ci.appveyor.com/api/buildjobs/{}/artifacts".format(job['jobId'])
49 | response = requests.get(url, headers=make_auth_headers())
50 | artifacts = response.json()
51 |
52 | for artifact in artifacts:
53 | is_zip = artifact['type'] == "Zip"
54 | filename = artifact['fileName']
55 | print(u" {0}, {1} bytes".format(filename, artifact['size']))
56 |
57 | url = "https://ci.appveyor.com/api/buildjobs/{}/artifacts/{}".format(job['jobId'], filename)
58 | download_url(url, filename, make_auth_headers())
59 |
60 | if is_zip:
61 | unpack_zipfile(filename)
62 | os.remove(filename)
63 |
64 |
65 | def ensure_dirs(filename):
66 | """Make sure the directories exist for `filename`."""
67 | dirname, _ = os.path.split(filename)
68 | if dirname and not os.path.exists(dirname):
69 | os.makedirs(dirname)
70 |
71 |
72 | def download_url(url, filename, headers):
73 | """Download a file from `url` to `filename`."""
74 | ensure_dirs(filename)
75 | response = requests.get(url, headers=headers, stream=True)
76 | if response.status_code == 200:
77 | with open(filename, 'wb') as f:
78 | for chunk in response.iter_content(16 * 1024):
79 | f.write(chunk)
80 | else:
81 | print(u" Error downloading {}: {}".format(url, response))
82 |
83 |
84 | def unpack_zipfile(filename):
85 | """Unpack a zipfile, using the names in the zip."""
86 | with open(filename, 'rb') as fzip:
87 | z = zipfile.ZipFile(fzip)
88 | for name in z.namelist():
89 | print(u" extracting {}".format(name))
90 | ensure_dirs(name)
91 | z.extract(name)
92 |
93 | parser = argparse.ArgumentParser(description='Download artifacts from AppVeyor.')
94 | parser.add_argument('--id',
95 | metavar='PROJECT_ID',
96 | default='tboquet/python-alp',
97 | help='Project ID in AppVeyor.')
98 | parser.add_argument('build',
99 | nargs='?',
100 | metavar='BUILD_ID',
101 | help='Build ID in AppVeyor. Eg: master-123')
102 |
103 | if __name__ == "__main__":
104 | # import logging
105 | # logging.basicConfig(level="DEBUG")
106 | args = parser.parse_args()
107 | download_latest_artifacts(args.id, args.build)
108 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python: '3.5'
3 | sudo: required
4 | dist: trusty
5 | services:
6 | - mongodb
7 | - docker
8 | env:
9 | global:
10 | - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so
11 | - SEGFAULT_SIGNALS=all
12 | - TEST_MODE=ON
13 | matrix:
14 | - TOXENV=check
15 | - TOXENV=docs
16 |
17 | - TOXENV=py27,coveralls,codecov
18 | - TOXENV=py34,coveralls,codecov
19 | - TOXENV=py35,coveralls,codecov
20 | addons:
21 | code_climate:
22 | repo_token: dfe5865d21322900ba6972d30da44e2859d3533fead6c26cdf217ebb540000ce
23 | apt:
24 | packages:
25 | - libhdf5-dev
26 | before_install:
27 | - echo $TRAVIS_PULL_REQUEST_BRANCH
28 | - echo $TRAVIS_BRANCH
29 | - SUFFIX=''
30 | - if [[ "$TRAVIS_BRANCH" =~ dev ]]; then SUFFIX=dev; fi
31 | - if [[ "$TRAVIS_PULL_REQUEST_BRANCH" =~ dev ]]; then SUFFIX=dev; fi
32 | - echo $SUFFIX
33 | - sudo mkdir /parameters_h5
34 | - sudo chmod 777 /parameters_h5
35 | - sudo mkdir /data_generator
36 | - sudo chmod 777 /data_generator
37 | - sudo rabbitmqctl stop
38 | - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 1397BC53640DB551
39 | - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv EA312927
40 | - echo "deb http://repo.mongodb.org/apt/ubuntu trusty/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list
41 | - sudo apt-get update
42 | - sudo apt-get install -y libhdf5-dev
43 | - sudo apt-get install -y mongodb-org
44 | - python --version
45 | - uname -a
46 | - lsb_release -a
47 | - if [[ ("$TOXENV" == "py27,coveralls,codecov") ]]; then docker run --name mongo_results -v /opt/data/mongo_data/results:/data/db -p 27018:27017 -d --restart=always mongo; docker run --name mongo_models -v /opt/data/mongo_data/models:/data/db -d --restart=always mongo; docker run -d -v /opt/data/rabbitmq/dev/log:/dev/log -v /opt/data/rabbitmq:/var/lib/rabbitmq --name=rabbitmq_sched -p 8080:15672 -p 5672:5672 --restart=always rabbitmq:3-management; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=keras_worker --restart=always tboquet/travis_worker_${SUFFIX}k:latest; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=sklearn_worker --restart=always tboquet/travis_worker_${SUFFIX}sk:latest;fi
48 | - if [[ ("$TOXENV" == "py34,coveralls,codecov" || "$TOXENV" == "py35,coveralls,codecov") ]]; then docker run --name mongo_results -v /opt/data/mongo_data/results:/data/db -p 27018:27017 -d --restart=always mongo; docker run --name mongo_models -v /opt/data/mongo_data/models:/data/db -d --restart=always mongo; docker run -d -v /opt/data/rabbitmq/dev/log:/dev/log -v /opt/data/rabbitmq:/var/lib/rabbitmq --name=rabbitmq_sched -p 8080:15672 -p 5672:5672 --restart=always rabbitmq:3-management; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=keras_worker --restart=always tboquet/travis_worker_${SUFFIX}k:py3; docker run -d -v /parameters_h5:/parameters_h5 -v /opt/data/r2dbh5:/r2dbh5 -v /data_generator:/data_generator -e WORKER=TRUE --link=mongo_results:mongo_r --link=mongo_models:mongo_m --link=rabbitmq_sched:rabbitmq --name=sklearn_worker --restart=always tboquet/travis_worker_${SUFFIX}sk:py3;fi
49 | - docker ps
50 | - sleep 3
51 | - if [[ ("$TOXENV" == "py27,coveralls,codecov" || "$TOXENV" == "py34,coveralls,codecov" || "$TOXENV" == "py35,coveralls,codecov") ]]; then docker logs sklearn_worker;docker logs keras_worker;fi
52 | install:
53 | - pip install tox
54 | - pip install --upgrade pip
55 | - virtualenv --version
56 | - easy_install --version
57 | - pip --version
58 | - tox --version
59 | script:
60 | - tox -v
61 | after_failure:
62 | - more .tox/log/* | cat
63 | - more .tox/*/log/* | cat
64 | - docker logs sklearn_worker
65 | - docker logs keras_worker
66 | before_cache:
67 | - rm -rf $HOME/.cache/pip/log
68 | cache:
69 | directories:
70 | - $HOME/.cache/pip
71 | notifications:
72 | email:
73 | on_success: never
74 | on_failure: always
75 |
--------------------------------------------------------------------------------
/ci/appveyor-bootstrap.py:
--------------------------------------------------------------------------------
1 | """
2 | AppVeyor will at least have few Pythons around so there's no point of implementing a bootstrapper in PowerShell.
3 |
4 | This is a port of https://github.com/pypa/python-packaging-user-guide/blob/master/source/code/install.ps1
5 | with various fixes and improvements that just weren't feasible to implement in PowerShell.
6 | """
7 | from __future__ import print_function
8 | from os import environ
9 | from os.path import exists
10 | from subprocess import check_call
11 |
12 | try:
13 | from urllib.request import urlretrieve
14 | except ImportError:
15 | from urllib import urlretrieve
16 |
17 | BASE_URL = "https://www.python.org/ftp/python/"
18 | GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py"
19 | GET_PIP_PATH = "C:\get-pip.py"
20 | URLS = {
21 | ("2.7", "64"): BASE_URL + "2.7.10/python-2.7.10.amd64.msi",
22 | ("2.7", "32"): BASE_URL + "2.7.10/python-2.7.10.msi",
23 | # NOTE: no .msi installer for 3.3.6
24 | ("3.3", "64"): BASE_URL + "3.3.3/python-3.3.3.amd64.msi",
25 | ("3.3", "32"): BASE_URL + "3.3.3/python-3.3.3.msi",
26 | ("3.4", "64"): BASE_URL + "3.4.3/python-3.4.3.amd64.msi",
27 | ("3.4", "32"): BASE_URL + "3.4.3/python-3.4.3.msi",
28 | ("3.5", "64"): BASE_URL + "3.5.0/python-3.5.0-amd64.exe",
29 | ("3.5", "32"): BASE_URL + "3.5.0/python-3.5.0.exe",
30 | }
31 | INSTALL_CMD = {
32 | # Commands are allowed to fail only if they are not the last command. Eg: uninstall (/x) allowed to fail.
33 | "2.7": [["msiexec.exe", "/L*+!", "install.log", "/qn", "/x", "{path}"],
34 | ["msiexec.exe", "/L*+!", "install.log", "/qn", "/i", "{path}", "TARGETDIR={home}"]],
35 | "3.3": [["msiexec.exe", "/L*+!", "install.log", "/qn", "/x", "{path}"],
36 | ["msiexec.exe", "/L*+!", "install.log", "/qn", "/i", "{path}", "TARGETDIR={home}"]],
37 | "3.4": [["msiexec.exe", "/L*+!", "install.log", "/qn", "/x", "{path}"],
38 | ["msiexec.exe", "/L*+!", "install.log", "/qn", "/i", "{path}", "TARGETDIR={home}"]],
39 | "3.5": [["{path}", "/quiet", "TargetDir={home}"]],
40 | }
41 |
42 |
43 | def download_file(url, path):
44 | print("Downloading: {} (into {})".format(url, path))
45 | progress = [0, 0]
46 |
47 | def report(count, size, total):
48 | progress[0] = count * size
49 | if progress[0] - progress[1] > 1000000:
50 | progress[1] = progress[0]
51 | print("Downloaded {:,}/{:,} ...".format(progress[1], total))
52 |
53 | dest, _ = urlretrieve(url, path, reporthook=report)
54 | return dest
55 |
56 |
57 | def install_python(version, arch, home):
58 | print("Installing Python", version, "for", arch, "bit architecture to", home)
59 | if exists(home):
60 | return
61 |
62 | path = download_python(version, arch)
63 | print("Installing", path, "to", home)
64 | success = False
65 | for cmd in INSTALL_CMD[version]:
66 | cmd = [part.format(home=home, path=path) for part in cmd]
67 | print("Running:", " ".join(cmd))
68 | try:
69 | check_call(cmd)
70 | except Exception as exc:
71 | print("Failed command", cmd, "with:", exc)
72 | if exists("install.log"):
73 | with open("install.log") as fh:
74 | print(fh.read())
75 | else:
76 | success = True
77 | if success:
78 | print("Installation complete!")
79 | else:
80 | print("Installation failed")
81 |
82 |
83 | def download_python(version, arch):
84 | for _ in range(3):
85 | try:
86 | return download_file(URLS[version, arch], "installer.exe")
87 | except Exception as exc:
88 | print("Failed to download:", exc)
89 | print("Retrying ...")
90 |
91 |
92 | def install_pip(home):
93 | pip_path = home + "/Scripts/pip.exe"
94 | python_path = home + "/python.exe"
95 | if exists(pip_path):
96 | print("pip already installed.")
97 | else:
98 | print("Installing pip...")
99 | download_file(GET_PIP_URL, GET_PIP_PATH)
100 | print("Executing:", python_path, GET_PIP_PATH)
101 | check_call([python_path, GET_PIP_PATH])
102 |
103 |
104 | def install_packages(home, *packages):
105 | cmd = [home + "/Scripts/pip.exe", "install"]
106 | cmd.extend(packages)
107 | check_call(cmd)
108 |
109 |
110 | if __name__ == "__main__":
111 | install_python(environ['PYTHON_VERSION'], environ['PYTHON_ARCH'], environ['PYTHON_HOME'])
112 | install_pip(environ['PYTHON_HOME'])
113 | install_packages(environ['PYTHON_HOME'], "setuptools>=18.0.1", "wheel", "tox", "virtualenv>=13.1.0")
114 |
--------------------------------------------------------------------------------
/src/alp/utils/utils_tests.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from fuel.datasets.hdf5 import H5PYDataset
3 | from fuel.schemes import SequentialScheme
4 | from fuel.streams import DataStream
5 | from fuel.transformers import ScaleAndShift
6 | from keras.layers import Dense
7 | from keras.layers import Dropout
8 | from keras.layers import Input
9 | from keras.models import Model
10 | from keras.models import Sequential
11 | from keras.utils import np_utils
12 | from keras.utils.test_utils import get_test_data
13 |
14 | from alp.appcom.utils import to_fuel_h5
15 |
16 |
17 | input_dim = 2
18 | nb_hidden = 4
19 | nb_class = 2
20 | batch_size = 4
21 | train_samples = 256
22 | test_samples = 128
23 |
24 |
25 | def close_gens(gen, data, data_stream):
26 | gen.close()
27 | data.close(None)
28 | data_stream.close()
29 |
30 |
31 | def make_data(train_samples, test_samples):
32 | (X_tr, y_tr), (X_te, y_te) = get_test_data(nb_train=train_samples,
33 | nb_test=test_samples,
34 | input_shape=(input_dim,),
35 | classification=True,
36 | nb_class=nb_class)
37 |
38 | y_tr = np_utils.to_categorical(y_tr)
39 | y_te = np_utils.to_categorical(y_te)
40 |
41 | data, data_val = dict(), dict()
42 |
43 | data["X"] = X_tr
44 | data["y"] = y_tr
45 |
46 | data_val["X"] = X_te
47 | data_val["y"] = y_te
48 | return data, data_val
49 |
50 |
51 | def dump_data(train_samples, test_samples):
52 | data, data_val = make_data(train_samples, test_samples)
53 | inputs = [np.concatenate([data['X'], data_val['X']])]
54 | outputs = [np.concatenate([data['y'], data_val['y']])]
55 |
56 | file_name = 'test_data'
57 | scale = 1.0 / inputs[0].std(axis=0)
58 | shift = - scale * inputs[0].mean(axis=0)
59 |
60 | file_path, i_names, o_names = to_fuel_h5(inputs, outputs, [0, 256],
61 | ['train', 'test'],
62 | file_name,
63 | '/data_generator')
64 | return file_path, scale, shift, i_names, o_names
65 |
66 |
67 | file_path, scale, shift, i_names, o_names = dump_data(train_samples, test_samples)
68 |
69 |
70 | def make_gen(batch_size, examples=4):
71 | file_path_f = file_path
72 | names_select = i_names
73 | train_set = H5PYDataset(file_path_f,
74 | which_sets=('train', 'test'))
75 |
76 | scheme = SequentialScheme(examples=examples, batch_size=batch_size)
77 |
78 | data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme)
79 |
80 | stand_stream_train = ScaleAndShift(data_stream=data_stream_train,
81 | scale=scale, shift=shift,
82 | which_sources=(names_select[-1],))
83 | return stand_stream_train, train_set, data_stream_train
84 |
85 |
86 | def sequential(custom=False, nb_hidden=4):
87 | model = Sequential()
88 | model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
89 | model.add(Dense(nb_class, activation='softmax'))
90 | model.add(Dropout(0.5))
91 | if custom:
92 | model.add(return_custom()(0.5))
93 | return model
94 |
95 |
96 | def model(custom=False):
97 | inputs = Input(shape=(input_dim,), name='X')
98 |
99 | x = Dense(nb_hidden, activation='relu')(inputs)
100 | x = Dense(nb_hidden, activation='relu')(x)
101 | predictions = Dense(nb_class,
102 | activation='softmax',
103 | name='main_loss')(x)
104 |
105 | model = Model(input=inputs, output=predictions)
106 | return model
107 |
108 |
109 | def return_custom():
110 | import keras.backend as K
111 | from keras.engine import Layer
112 |
113 | class Dropout_cust(Layer): # pragma: no cover
114 | '''Applies Dropout to the input.
115 | '''
116 |
117 | def __init__(self, p, **kwargs):
118 | self.p = p
119 | if 0. < self.p < 1.:
120 | self.uses_learning_phase = True
121 | self.supports_masking = True
122 | super(Dropout_cust, self).__init__(**kwargs)
123 |
124 | def call(self, x, mask=None):
125 | if 0. < self.p < 1.:
126 | x = K.in_train_phase(K.dropout(x, level=self.p), x)
127 | return x
128 |
129 | def get_config(self):
130 | config = {'p': self.p}
131 | base_config = super(Dropout_cust, self).get_config()
132 | return dict(list(base_config.items()) + list(config.items()))
133 |
134 | return Dropout_cust
135 |
--------------------------------------------------------------------------------
/docs/Tutorials/tuto3.rst:
--------------------------------------------------------------------------------
1 | ====================================================
2 | Tutorial 3 : Feed more data with Fuel or generators
3 | ====================================================
4 |
5 | Because we aim at supporting online learning on streamed data, we think that generators support was a good start.
6 | We support Fuel_, a library that helps you to pre-process and yield chunks of data while being serializable.
7 |
8 | 1 - Create some data
9 | ~~~~~~~~~~~~~~~~~~~~
10 |
11 | You can easily use Fuel_ iterators in an Experiment.
12 | We will first create some fake data.
13 |
14 | .. code-block:: python
15 |
16 | import fuel
17 | import numpy as np
18 | input_dim = 2
19 | nb_hidden = 4
20 | nb_class = 2
21 | batch_size = 5
22 | train_samples = 512
23 | test_samples = 128
24 | (X_tr, y_tr), (X_te, y_te) = get_test_data(nb_train=train_samples,
25 | nb_test=test_samples,
26 | input_shape=(input_dim,),
27 | classification=True,
28 | nb_class=nb_class)
29 |
30 | y_tr = np_utils.to_categorical(y_tr)
31 | y_te = np_utils.to_categorical(y_te)
32 |
33 | data, data_val = dict(), dict()
34 |
35 | X = np.concat([X_tr, X_te])
36 | y = np.concat([y_tr, y_te])
37 |
38 | inputs = [X, X]
39 | outputs = [y]
40 |
41 |
42 | 2 - Transform the data
43 | ~~~~~~~~~~~~~~~~~~~~~~
44 |
45 | We then import an helper function that will convert our list of inputs to an HDF5 dataset.
46 | This dataset has a simple structure and we can divide it into multiple sets.
47 |
48 |
49 | .. code-block:: python
50 |
51 | # we save the mean and the scale (inverse of the standard deviation)
52 | # for each channel
53 | scale = 1.0 / inputs[0].std(axis=0)
54 | shift = - scale * inputs[0].mean(axis=0)
55 |
56 | # for 3 sets, we need 3 slices
57 | slices = [0, 256, 512]
58 |
59 | # and 3 names
60 | names = ['train', 'test', 'valid']
61 |
62 | file_name = 'test_data_'
63 | file_path_f = to_fuel_h5(inputs, outputs, slices, names, file_name, '/data_generator')
64 |
65 |
66 | 3 - Build your generator
67 | ~~~~~~~~~~~~~~~~~~~~~~~~
68 |
69 | The next step is to construct our Fuel generator using our dataset, a scheme and to transform the data so it's prepared for our model.
70 |
71 |
72 | .. code-block:: python
73 |
74 | train_set = H5PYDataset(file_path_f,
75 | which_sets=('train','test', 'valid'))
76 |
77 | scheme = SequentialScheme(examples=128, batch_size=32)
78 |
79 | data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme)
80 |
81 | stand_stream_train = ScaleAndShift(data_stream=data_stream_train,
82 | scale=scale, shift=shift,
83 | which_sources=('input_X',))
84 |
85 |
86 | 4 - Build and wrap your model
87 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
88 |
89 | We finally build our model and wrap it in an experiment.
90 |
91 |
92 | .. code-block:: python
93 |
94 | inputs = Input(shape=(input_dim,), name='X')
95 |
96 | x = Dense(nb_hidden, activation='relu')(inputs)
97 | x = Dense(nb_hidden, activation='relu')(x)
98 | predictions = Dense(nb_class, activation='softmax')(x)
99 |
100 | model = Model(input=inputs, output=predictions)
101 |
102 | model.compile(loss='categorical_crossentropy',
103 | optimizer='rmsprop',
104 | metrics=['accuracy'])
105 |
106 | expe = Experiment(model)
107 |
108 |
109 | 5 - Train your model
110 | ~~~~~~~~~~~~~~~~~~~~
111 |
112 | We can finally use the :meth:`alp.appcom.core.Experiment.fit_gen` method with our model and dataset.
113 |
114 |
115 | .. code-block:: python
116 |
117 | expe.fit_gen([gen], [val], nb_epoch=2,
118 | model=model,
119 | metrics=metrics,
120 | custom_objects=cust_objects,
121 | samples_per_epoch=128,
122 | nb_val_samples=128)
123 |
124 | You can also use :meth:`alp.appcom.core.Experiment.fit_gen_async` with the same function parameters if you have a worker running.
125 |
126 | .. code-block:: python
127 |
128 | expe.fit_gen([gen], [val], nb_epoch=2,
129 | model=model,
130 | metrics=metrics,
131 | custom_objects=cust_objects,
132 | samples_per_epoch=128,
133 | nb_val_samples=128)
134 |
135 |
136 | .. _Fuel: https://github.com/mila-udem/fuel
137 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | ; a generative tox configuration, see: https://testrun.org/tox/latest/config.html#generative-envlist
2 |
3 | [tox]
4 | envlist =
5 | clean,
6 | check,
7 | {py27,py34,py35},
8 | report,
9 | docs,
10 | docsbuild
11 | indexserver =
12 | g1 = https://pypi.python.org/simple
13 | g2 = https://pypi.python.org/simple
14 | g3 = https://pypi.python.org/simple
15 | g4 = https://pypi.python.org/simple
16 |
17 | [testenv]
18 | basepython =
19 | {py27,docs,docsbuild,spell}: {env:TOXPYTHON:python2.7}
20 | py34: {env:TOXPYTHON:python3.4}
21 | py35: {env:TOXPYTHON:python3.5}
22 | {clean,check,report,coveralls,codecov}: python3.5
23 | bootstrap: python
24 | setenv =
25 | PYTHONPATH={toxinidir}/tests
26 | PYTHONUNBUFFERED=yes
27 | passenv =
28 | *
29 | usedevelop = false
30 | sitepackages = true
31 | deps =
32 | :g1: -r{toxinidir}/req/requirements_first.txt
33 | :g2: git+git://github.com/mila-udem/fuel.git
34 | :g3: -r{toxinidir}/req/requirements.txt
35 | :g3: pymongo
36 | :g3: pytest
37 | :g3: pytest-travis-fold
38 | :g3: pytest-cov
39 | :g3: coveralls
40 | :g4: scikit-learn
41 |
42 | [testenv:py27]
43 | deps =
44 | :g1: -r{toxinidir}/req/requirements_first.txt
45 | :g2: git+git://github.com/mila-udem/fuel.git
46 | :g2: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
47 | :g3: -r{toxinidir}/req/requirements.txt
48 | :g3: pymongo
49 | :g3: pytest
50 | :g3: pytest-travis-fold
51 | :g3: pytest-cov
52 | :g3: coveralls
53 | :g3: scipy==0.16.1
54 | :g4: scikit-learn
55 | commands =
56 | {posargs:py.test --cov=alp --cov-report=term-missing -vv tests}
57 |
58 | [testenv:py34]
59 | deps =
60 | :g1: -r{toxinidir}/req/requirements_first.txt
61 | :g2: git+git://github.com/mila-udem/fuel.git
62 | :g2: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
63 | :g3: -r{toxinidir}/req/requirements.txt
64 | :g3: pymongo
65 | :g3: pytest
66 | :g3: pytest-travis-fold
67 | :g3: pytest-cov
68 | :g3: coveralls
69 | :g3: scipy==0.16.1
70 | :g4: scikit-learn
71 | commands =
72 | {posargs:py.test --cov=alp --cov-report=term-missing -vv tests}
73 |
74 | [testenv:py35]
75 | deps =
76 | :g1: -r{toxinidir}/req/requirements_first.txt
77 | :g2: git+git://github.com/mila-udem/fuel.git
78 | :g2: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
79 | :g3: -r{toxinidir}/req/requirements.txt
80 | :g3: pymongo
81 | :g3: pytest
82 | :g3: pytest-travis-fold
83 | :g3: pytest-cov
84 | :g3: coveralls
85 | :g3: scipy==0.16.1
86 | :g4: scikit-learn
87 | commands =
88 | {posargs:py.test --cov=alp --cov-report=term-missing -vv tests}
89 |
90 | [testenv:bootstrap]
91 | deps =
92 | jinja2
93 | matrix
94 | skip_install = true
95 | commands =
96 | python ci/bootstrap.py
97 | passenv =
98 | *
99 |
100 | [testenv:spell]
101 | setenv =
102 | SPELLCHECK=1
103 | commands =
104 | sphinx-build -b spelling docs dist/docs
105 | skip_install = true
106 | deps =
107 | :g1: setuptools
108 | mock
109 | -r{toxinidir}/docs/requirements.txt
110 | sphinxcontrib-spelling
111 | pyenchant
112 |
113 | [testenv:docs]
114 | deps =
115 | :g1: scipy
116 | :g1: setuptools>=28.8
117 | :g1: six>=1.6
118 | :g2: -r{toxinidir}/docs/requirements.txt
119 | :g2: celery
120 | :g2: mock
121 | :g2: git+git://github.com/fchollet/keras.git
122 |
123 |
124 | commands =
125 | sphinx-build {posargs:-E} -b doctest docs dist/docs
126 | sphinx-build {posargs:-E} -b html docs dist/docs
127 | sphinx-build -b linkcheck docs dist/docs
128 |
129 | [testenv:docsbuild]
130 | deps =
131 | :g1: setuptools>=28.8
132 | :g1: six>=1.6
133 | :g2: -r{toxinidir}/docs/requirements.txt
134 | :g3: mock
135 | :g3: celery
136 | :g3: git+git://github.com/fchollet/keras.git
137 | commands =
138 | sphinx-build {posargs:-E} -b doctest docs dist/docs
139 | sphinx-build {posargs:-E} -b html docs dist/docs
140 |
141 |
142 | [testenv:check]
143 | deps =
144 | :g1: -r{toxinidir}/req/requirements_first.txt
145 | docutils
146 | check-manifest
147 | flake8
148 | readme-renderer
149 | pygments
150 | isort
151 | skip_install = true
152 | commands =
153 | python setup.py check --strict --metadata --restructuredtext
154 | check-manifest {toxinidir}
155 | flake8 src tests setup.py --ignore=F403
156 | isort --verbose --check-only --diff --recursive src tests setup.py
157 |
158 | [testenv:coveralls]
159 | deps =
160 | coverage==4.1
161 | coveralls
162 | skip_install = true
163 | commands =
164 | coverage combine
165 | coverage report
166 | coveralls []
167 |
168 | [testenv:codecov]
169 | deps =
170 | coverage==4.1
171 | codecov
172 | skip_install = true
173 | commands =
174 | coverage combine
175 | coverage report
176 | coverage xml --ignore-errors
177 | codecov []
178 |
179 | [testenv:report]
180 | deps = coverage==4.1
181 | skip_install = true
182 | commands =
183 | coverage combine
184 | coverage report
185 | coverage html
186 |
187 | [testenv:clean]
188 | commands = coverage erase
189 | skip_install = true
190 | deps = coverage
--------------------------------------------------------------------------------
/tests/core/test_hpoptim.py:
--------------------------------------------------------------------------------
1 | """Tests Hyper parameter search"""
2 |
3 | import keras
4 | import numpy as np
5 | import pytest
6 |
7 | from fuel.datasets.hdf5 import H5PYDataset
8 | from fuel.schemes import SequentialScheme
9 | from fuel.streams import DataStream
10 | from fuel.transformers import ScaleAndShift
11 | from keras.layers import Dense
12 | from keras.layers import Dropout
13 | from keras.models import Sequential
14 | from keras.utils import np_utils
15 | from keras.utils.test_utils import get_test_data
16 | from sklearn.linear_model import LogisticRegression
17 |
18 | from alp.appcom.core import Experiment
19 | from alp.appcom.ensembles import HParamsSearch
20 | from alp.appcom.utils import to_fuel_h5
21 | from alp.utils.utils_tests import batch_size
22 | from alp.utils.utils_tests import close_gens
23 | from alp.utils.utils_tests import make_data
24 | from alp.utils.utils_tests import make_gen
25 | from alp.utils.utils_tests import sequential
26 | from alp.utils.utils_tests import test_samples
27 | from alp.utils.utils_tests import train_samples
28 |
29 |
30 | def make_experiments(dict_exp=False):
31 | experiments = []
32 | if dict_exp:
33 | experiments = dict()
34 | nb_hiddens = [8, 16, 32]
35 | for i in range(3):
36 | nb_hidden = nb_hiddens[i]
37 |
38 | # model
39 | model = sequential(False, nb_hidden)
40 |
41 | model.compile(loss='categorical_crossentropy',
42 | optimizer='adam',
43 | metrics=['accuracy'])
44 |
45 | expe = Experiment(model, metrics=['accuracy'])
46 | if dict_exp:
47 | experiments[str(nb_hidden)] = expe
48 | else:
49 | experiments.append(expe)
50 | return experiments
51 |
52 |
53 | def make_sklearn_experiments():
54 | experiments = []
55 | C_list = [1.0, 0.8, 0.5]
56 | for C in C_list:
57 | model = LogisticRegression(C=C)
58 | expe = Experiment(model)
59 | experiments.append(expe)
60 | return experiments
61 |
62 |
63 | class TestHParamsSearch:
64 | def test_fit(self):
65 | data, data_val = make_data(train_samples, test_samples)
66 | experiments = make_experiments()
67 |
68 | param_search = HParamsSearch(experiments, metric='loss', op=np.min)
69 | param_search.fit([data], [data_val], nb_epoch=2,
70 | batch_size=batch_size, verbose=2,
71 | overwrite=True)
72 | print(self)
73 |
74 | def test_fit_async(self):
75 | data, data_val = make_data(train_samples, test_samples)
76 | experiments = make_experiments()
77 |
78 | param_search = HParamsSearch(experiments, metric='loss', op=np.min)
79 | param_search.fit_async([data], [data_val], nb_epoch=2,
80 | batch_size=batch_size, verbose=2,
81 | overwrite=True)
82 | param_search.summary(metrics={'val_loss': np.min})
83 | print(self)
84 |
85 | def test_fit_gen(self):
86 | gen, data, data_stream = make_gen(batch_size)
87 | val, data_2, data_stream_2 = make_gen(batch_size)
88 | experiments = make_experiments()
89 |
90 | param_search = HParamsSearch(experiments, metric='loss', op=np.min)
91 | param_search.fit_gen([gen], [val], nb_epoch=2,
92 | verbose=2,
93 | nb_val_samples=128,
94 | samples_per_epoch=64,
95 | overwrite=True)
96 | param_search.summary(verbose=True, metrics={'val_loss': np.min})
97 | close_gens(gen, data, data_stream)
98 | close_gens(val, data_2, data_stream_2)
99 | print(self)
100 |
101 | def test_fit_gen_async(self):
102 | gen, data, data_stream = make_gen(batch_size)
103 | val, data_2, data_stream_2 = make_gen(batch_size)
104 | experiments = make_experiments()
105 | param_search = HParamsSearch(experiments, metric='loss', op=np.min)
106 | param_search.fit_gen_async([gen], [val], nb_epoch=2,
107 | verbose=2,
108 | nb_val_samples=128,
109 | samples_per_epoch=64,
110 | overwrite=True)
111 | param_search.summary(verbose=True, metrics={'val_loss': np.min})
112 | close_gens(gen, data, data_stream)
113 | close_gens(val, data_2, data_stream_2)
114 | print(self)
115 |
116 | def test_predict(self):
117 | data, data_val = make_data(train_samples, test_samples)
118 | experiments = make_experiments()
119 |
120 | param_search = HParamsSearch(experiments, metric='acc', op=np.min)
121 | min_x = np.min(data['X'])
122 | data['X'] = (data['X'] - min_x) / (np.max(data['X']) - min_x)
123 | param_search.fit([data], [data_val], nb_epoch=2,
124 | batch_size=batch_size, verbose=2,
125 | overwrite=True)
126 |
127 | param_search.predict(data['X'], metric='val_acc', op=np.max)
128 |
129 | experiments = make_experiments(dict_exp=True)
130 | param_search = HParamsSearch(experiments)
131 | param_search.fit([data], [data_val], nb_epoch=2,
132 | batch_size=batch_size, verbose=2,
133 | overwrite=True)
134 |
135 | param_search.predict(data['X'], metric='acc', op=np.min, partial=True)
136 | print(self)
137 |
138 | def test_predict_sklearn(self):
139 | data, data_val = make_data(train_samples, test_samples)
140 | experiments = make_sklearn_experiments()
141 |
142 | param_search = HParamsSearch(experiments, metric='score', op=np.max)
143 | data['y'] = np.argmax(data['y'], axis=1).ravel()
144 | data_val['y'] = np.argmax(data_val['y'], axis=1).ravel()
145 | param_search.fit([data], [data_val], overwrite=True)
146 |
147 | param_search.predict(data['X'])
148 | print(self)
149 |
150 |
151 | if __name__ == "__main__":
152 | pytest.main([__file__])
153 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ========
2 | Overview
3 | ========
4 |
5 | .. start-badges
6 |
7 | |travis| |requires| |coveralls| |codecov| |codacy| |docs|
8 |
9 | .. |travis| image:: https://travis-ci.org/tboquet/python-alp.svg?branch=master
10 | :alt: Travis-CI Build Status
11 | :target: https://travis-ci.org/tboquet/python-alp
12 |
13 | .. |requires| image:: https://requires.io/github/tboquet/python-alp/requirements.svg?branch=master
14 | :alt: Requirements Status
15 | :target: https://requires.io/github/tboquet/python-alp/requirements/?branch=master
16 |
17 | .. |coveralls| image:: https://coveralls.io/repos/tboquet/python-alp/badge.svg?branch=master&service=github
18 | :alt: Coverage Status
19 | :target: https://coveralls.io/r/tboquet/python-alp
20 |
21 | .. |codecov| image:: https://codecov.io/github/tboquet/python-alp/coverage.svg?branch=master
22 | :alt: Coverage Status
23 | :target: https://codecov.io/github/tboquet/python-alp
24 |
25 | .. |codacy| image:: https://img.shields.io/codacy/b7f6d79244d8480099a3593db2de9560.svg?style=flat
26 | :target: https://www.codacy.com/app/tboquet/python-alp
27 | :alt: Codacy Code Quality Status
28 |
29 | .. |docs| image:: https://readthedocs.org/projects/python-alp/badge/?style=flat
30 | :target: https://readthedocs.org/projects/python-alp
31 | :alt: Documentation Status
32 |
33 | .. end-badges
34 |
35 |
36 | ALP helps you experiment with a lot of machine learning models quickly. It provides you with a simple way of scheduling and recording experiments.
37 |
38 | This library has been developped to work well with Keras and Scikit-learn but can suit a lot of other frameworks.
39 |
40 | Documentation
41 | =============
42 |
43 | http://python-alp.readthedocs.io/
44 |
45 | Quickstart
46 | ==========
47 |
48 | Clone the repo and install the library:
49 |
50 | .. code-block:: bash
51 |
52 | git clone https://github.com/tboquet/python-alp.git
53 | cd python-alp
54 | python setup.py install
55 |
56 | Install the Command Line Interface dependencies:
57 |
58 | .. code-block:: bash
59 |
60 | cd req
61 | pip install -r requirements_cli.txt
62 |
63 | Generate a base configuration using an absolute path:
64 |
65 | .. code-block:: bash
66 |
67 | alp --verbose genconfig --outdir=/path/to/a/directory --cpu
68 |
69 | Launch the services:
70 |
71 | .. code-block:: bash
72 |
73 | alp --verbose service start /path/to/a/directory/.alp/containers.json
74 |
75 | Check the status of your containers:
76 |
77 | .. code-block:: bash
78 |
79 | alp --verbose status /path/to/a/directory/.alp/containers.json
80 |
81 |
82 | Log in to the Jupyter notebook you just launched in your browser @ :code:`localhost:440` using the password :code:`default`.
83 |
84 | Launch some experiments!
85 |
86 | .. code-block:: python
87 |
88 | # we import numpy and fix the seed
89 | import numpy as np
90 | np.random.seed(1337) # for reproducibility
91 |
92 | # we import alp and Keras tools that we will use
93 | import alp
94 | from keras.datasets import mnist
95 | from keras.models import Sequential
96 | from keras.layers import Dense, Dropout, Activation, Flatten
97 | from keras.utils import np_utils
98 | import keras.backend as K
99 | from keras.optimizers import Adam
100 | from alp.appcom.ensembles import HParamsSearch
101 |
102 | # if you use tensorflow you must use this configuration
103 | # so that it doesn't use all of the GPU's memory (default config)
104 | import tensorflow as tf
105 |
106 | config = tf.ConfigProto(allow_soft_placement=True)
107 | config.gpu_options.allow_growth = True
108 | session = tf.Session(config=config)
109 | K.set_session(session)
110 |
111 | batch_size = 128
112 | nb_classes = 10
113 | nb_epoch = 12
114 |
115 | # input image dimensions
116 | img_rows, img_cols = 28, 28
117 | # number of features to use
118 | nb_features = 32
119 |
120 | # the data, shuffled and split between train and test sets
121 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
122 |
123 | X_train = X_train.astype('float32')
124 | X_test = X_test.astype('float32')
125 | X_train /= 255
126 | X_test /= 255
127 | print('X_train shape:', X_train.shape)
128 | print(X_train.shape[0], 'train samples')
129 | print(X_test.shape[0], 'test samples')
130 |
131 | if K.image_dim_ordering() == 'th':
132 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
133 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
134 | input_shape = (1, img_rows, img_cols)
135 | else:
136 | X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
137 | X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
138 | input_shape = (img_rows, img_cols, 1)
139 |
140 | # convert class vectors to binary class matrices
141 | Y_train = np_utils.to_categorical(y_train, nb_classes)
142 | Y_test = np_utils.to_categorical(y_test, nb_classes)
143 |
144 | # put the data in the form ALP expects
145 | data, data_val = dict(), dict()
146 | data["X"] = X_train[:500]
147 | data["y"] = Y_train[:500]
148 | data_val["X"] = X_test[:500]
149 | data_val["y"] = Y_test[:500]
150 |
151 | # Define and compile the model
152 |
153 | model = Sequential()
154 |
155 | model.add(Flatten(input_shape=input_shape))
156 | model.add(Dense(nb_features))
157 | model.add(Activation('relu'))
158 | model.add(Dropout(0.25))
159 |
160 | model.add(Dense(128))
161 | model.add(Activation('relu'))
162 | model.add(Dropout(0.5))
163 | model.add(Dense(nb_classes))
164 | model.add(Activation('softmax'))
165 |
166 | model.compile(loss='categorical_crossentropy',
167 | optimizer='adadelta',
168 | metrics=['accuracy'])
169 |
170 | # Define you experiment
171 |
172 | from alp.appcom.core import Experiment
173 |
174 | expe = Experiment(model)
175 |
176 | # Fit the model linked to your experiment
177 | results = expe.fit([data], [data_val], nb_epoch=2, batch_size=batch_size)
178 |
179 | # Predict using your model
180 | expe.predict(data['X'])
181 |
182 |
183 | `Get started with the tutorial series!`_
184 |
185 | * Free software: Apache license
186 |
187 | .. _`docker setup`: http://python-alp.readthedocs.io/en/latest/dockersetup.html
188 | .. _`Get started with the tutorial series!`: http://python-alp.readthedocs.io/en/latest/Tutorials/index_tuto.html
189 |
--------------------------------------------------------------------------------
/src/alp/backend/common.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions used in every backend
3 | ===============================
4 | """
5 |
6 | import copy
7 | import hashlib
8 | import json
9 | import os
10 | import pickle
11 | from datetime import datetime
12 |
13 | import numpy as np
14 |
15 |
16 | def clean_model(model):
17 | """Clean a dict of a model of uncessary elements
18 |
19 | Args:
20 | model(dict): a dictionnary of the model
21 |
22 | Returns:
23 | a new cleaned dict"""
24 | model_c = copy.deepcopy(model)
25 | if 'ser_metrics' in model_c['model_arch']:
26 | model_c['model_arch'].pop('ser_metrics')
27 | if 'metrics' in model_c['model_arch']:
28 | model_c['model_arch'].pop('metrics')
29 | return model_c
30 |
31 |
32 | def create_model_hash(model, batch_size):
33 | """Creates a hash based on the dict of a model and the batch size
34 |
35 | Args:
36 | model(dict): a dictionnary of the model
37 | batch_size(int): the batch size
38 |
39 | Returns:
40 | a md5 hash of the model"""
41 | # convert dict to json string
42 | model_str = json.dumps(model)
43 |
44 | # create the model hash from the stringified json
45 | mh = hashlib.md5()
46 | str_concat_m = str(model_str) + str(batch_size)
47 | mh.update(str_concat_m.encode('utf-8'))
48 | return mh.hexdigest()
49 |
50 |
51 | def create_data_hash(data):
52 | """Creates a hash based on the data passed
53 |
54 | The unique descriptors are based on the mean of the arrays passed and the
55 | sum of all the elements of the first lines of the first axis.
56 |
57 | Args:
58 | data(list): a dictionnary of the model
59 |
60 | Returns:
61 | a md5 hash of the data"""
62 | un_data_m = 0
63 | un_data_f = 0
64 | for i, _ in enumerate(data):
65 | for key in data[i]:
66 | un_data_m += data[i][key].mean()
67 | un_data_f += data[i][key][0].sum()
68 |
69 | dh = hashlib.md5()
70 | str_concat_d = str(un_data_m) + str(un_data_f)
71 | dh.update(str_concat_d.encode('utf-8'))
72 | return dh.hexdigest()
73 |
74 |
75 | def create_gen_hash(gen):
76 | """Creates a hash based on the data passed
77 |
78 | The unique descriptors are based on the mean of the arrays passed and the
79 | sum of all the elements of the first lines of the first axis.
80 |
81 | Args:
82 | data(list): a dictionnary of the model
83 |
84 | Returns:
85 | a md5 hash of the data"""
86 | pickle_gen = pickle.dumps(gen)
87 | dh = hashlib.md5()
88 | str_concat_g = str(pickle_gen)
89 | dh.update(str_concat_g.encode('utf-8'))
90 | return dh.hexdigest()
91 |
92 |
93 | def create_param_dump(_path_h5, hexdi_m, hexdi_d):
94 | """Create a the path where to dump the params
95 |
96 | Args:
97 | _path_h5(str): the base path
98 | hexdi_m(str): the model hash
99 | hexdi_d(str): the data hash
100 |
101 | Returns:
102 | the full path where to dump the params"""
103 | return os.path.join(os.path.sep, _path_h5, hexdi_m + hexdi_d + '.h5')
104 |
105 |
106 | def make_all_hash(model_c, batch_size, data_hash, _path_h5):
107 | """Generate a hash for the model and the name of the file where
108 | the parameters are dumped"""
109 | hexdi_m = create_model_hash(model_c, batch_size)
110 | params_dump = create_param_dump(_path_h5, hexdi_m, data_hash)
111 | return hexdi_m, params_dump
112 |
113 |
114 | def open_dataset_gen(generator):
115 | """Open a fuel dataset given a fuel pipeline
116 |
117 | This function is recursive and search for the data_set attribute."""
118 | if hasattr(generator, 'data_stream'):
119 | data_stream = generator.data_stream
120 | if hasattr(data_stream, 'dataset'):
121 | data_stream.dataset.open()
122 | else: # pragma: no cover
123 | open_dataset_gen(data_stream)
124 | elif hasattr(generator, 'dataset'):
125 | generator.dataset.open()
126 | else:
127 | raise NotImplementedError('not able to open the dataset')
128 |
129 |
130 | def transform_gen(gen_train, mod_name):
131 | """Transform generators of tupple to generators of dicts
132 |
133 | Args:
134 | gen_train(Fuel data stream): a fuel training data generator
135 | gen_val(Fuel data stream): a fuel validation data generator
136 |
137 | Yield:
138 | a dictionnary mapping training and testing data to numpy arrays if
139 | the model is a graph, a tupple (inputs, ouputs) instead."""
140 | names_dict = gen_train.sources
141 |
142 | inp = 'input_'
143 | out = 'output_'
144 |
145 | li = 'list'
146 |
147 | open_dataset_gen(gen_train)
148 |
149 | while 1:
150 | for d in gen_train.get_epoch_iterator():
151 | data = zip(d, names_dict)
152 | inputs_list = []
153 | outputs_list = []
154 | for arr, name in data:
155 | if inp in name:
156 | if li in name:
157 | inputs_list.append(arr)
158 | elif out in name:
159 | if li in name:
160 | outputs_list.append(arr)
161 | elif 'index' in name: # pragma: no cover
162 | pass
163 | else: # pragma: no cover
164 | raise("Not input nor output, please check your generator")
165 | data_out = (inputs_list, outputs_list)
166 | yield data_out
167 |
168 |
169 | def train_pipe(train_f, save_f, model, data, data_val, generator, size_gen,
170 | params_dump, data_hash, hexdi_m,
171 | *args, **kwargs):
172 | """Common function to train models for all backends
173 |
174 | Args:
175 | train_f(function): the train function to use
176 | save_f(function): the function used to save parameters"""
177 | results, model = train_f(model['model_arch'], data,
178 | data_val, size_gen,
179 | generator=generator,
180 | *args, **kwargs)
181 | res_dict = {
182 | 'iter_stopped': results['metrics']['iter'],
183 | 'trained': 1,
184 | 'date_finished_training': datetime.now()}
185 | for metric in results['metrics']:
186 | res_dict[metric] = results['metrics'][metric]
187 | if metric in ['loss', 'val_loss']:
188 | res_dict[metric] = np.min(results['metrics'][metric])
189 |
190 | save_f(model, params_dump)
191 | results['model_id'] = hexdi_m
192 | results['data_id'] = data_hash
193 | results['params_dump'] = params_dump
194 | return results, res_dict
195 |
196 |
197 | def on_worker():
198 | return os.getenv("ON_WORKER") == "TRUE"
199 |
--------------------------------------------------------------------------------
/docs/Tutorials/tuto1.rst:
--------------------------------------------------------------------------------
1 | ===================================================================
2 | Tutorial 1 : Simple Hyperparameter Tuning with ALP - sklearn models
3 | ===================================================================
4 |
5 | In this tutorial, we will get some data, build an Experiment with a
6 | simple model and tune the parameters of the model to get the best
7 | performance on validation data (by launching several experiments). We
8 | will then reuse this best model on unseen test data an check that it’s
9 | better than the untuned model. The whole thing will be using the
10 | asynchronous fit to highlight the capacity of ALP.
11 |
12 | 1 - Get some data
13 | ~~~~~~~~~~~~~~~~~
14 |
15 | Let us start with the usual Iris dataset. Note that we will split the
16 | test set in 2 samples of size 25: the "validation" set to select the
17 | best model, and the "new" set to assess that the selected model was the
18 | best.
19 |
20 | .. code:: python
21 |
22 | from sklearn import datasets
23 | from sklearn.model_selection import train_test_split
24 |
25 | # get some data
26 | iris = datasets.load_iris()
27 | X_train, X_test, y_train, y_test = train_test_split(
28 | iris.data, iris.target, test_size=50, random_state=0)
29 | X_test_val, X_test_new, y_test_val, y_test_new = train_test_split(
30 | X_test, y_test, test_size=25, random_state=1)
31 |
32 | # put it in ALP expected format
33 | data, data_val, data_new = dict(), dict(), dict()
34 | data["X"], data["y"] = X_train, y_train
35 | data_val["X"], data_val["y"] = X_test_val, y_test_val
36 | data_new["X"], data_new["y"] = X_test_new, y_test_new
37 |
38 |
39 | 2 - Define an easy model and an ALP Experiment in a loop
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 |
42 | We will define a simple `LogisticRegression`_ to demostrate how to use ensembles of experiments in ALP.
43 |
44 | Let us first define an helper function.
45 |
46 | .. code:: python
47 |
48 | import random
49 | import sklearn.linear_model
50 | from alp.appcom.core import Experiment
51 | from operator import mul
52 |
53 | def grid_search(grid_dict, tries, model_type='LogisticRegression'):
54 | ''' This function randomly build Experiments with different hyperparameters and return the list of experiments.
55 |
56 | Args:
57 | grid_dict(dict) : hyperparameter grid from which to draw samples from
58 | tries(int) : number of model to be generated and tested
59 | async(bool) : should the fit be asynchronous
60 | model_type(string) : type of model to be tested (must be in sklearn.linear_model)
61 |
62 | Returns:
63 | expes(list): a list of Experiments.
64 |
65 | '''
66 |
67 | expes = dict()
68 |
69 | # 1 - infos
70 | size_grid = reduce(mul, [len(v) for v in grid_dict.values()])
71 | print("grid size: {}".format(size_grid))
72 | print("tries: {}".format(tries))
73 |
74 |
75 | # 2 - models loop
76 | for i in range(tries):
77 | select_params = {}
78 | key = [str(i)]
79 | for k, v in grid_dict.items():
80 | value = random.choice(v)
81 | select_params[k] = value
82 | key += ['{}:{}'.format(k, value)]
83 | model = getattr(sklearn.linear_model, model_type)(**select_params)
84 | expe = Experiment(model)
85 | expes['_'.join(key)] = expe
86 | return expes
87 |
88 |
89 | Details of what this function does is:
90 | 1. display some infos about the size of the grid.
91 | 2. models loop: as many times as :code:`tries`, it selects randomly a point in the hyperparameter grid, creates an Experiment object with the model parametrized with this point.
92 |
93 |
94 |
95 | 3 - Run the grid search
96 | ~~~~~~~~~~~~~~~~~~~~~~~~~
97 |
98 | We use the :class:`~alp.appcom.ensembles.HParamsSearch` class to wrap several :class:`~alp.appcom.core.Experiment`.
99 | For now, because the grid is defined outside of the class, you have to pass a dictionnary mapping experiments name to :class:`~alp.appcom.core.Experiment`.
100 |
101 | .. code:: python
102 |
103 | from alp.appcom.ensemble import HParamsSearch
104 |
105 | # setting the seed for reproducibility: feel free to change it
106 | random.seed(12345)
107 |
108 | # defining the grid that will be explored
109 | grid_tol = [i*10**-j for i in (1,2,5) for j in (1, 2, 3, 4, 5, 6)]
110 | grid_C = [i*10**-j for i in (1,2,5) for j in (-2, -1, 1, 2, 3, 4, 5, 6)]
111 | grid = {'tol':grid_tol, 'C':grid_C}
112 |
113 | tries = 100
114 |
115 | expes = grid_search(grid, tries)
116 |
117 | # we define the ensemble with our experiments and a metric
118 | ensemble = HParamsSearch(experiments=expes, metric='score', op=np.max)
119 |
120 | results = ensemble.fit([data], [data_val])
121 |
122 | label, predictions = ensemble.predict(data['X'])
123 | print('Best model: {}'.format(label)
124 |
125 | .. note::
126 |
127 | You can also use the :meth:`~alp.appcom.ensembles.HParamsSearch.fit_async` method.
128 |
129 |
130 | .. parsed-literal::
131 |
132 | grid size : 432
133 | tries : 100
134 |
135 | Best model: 52_C:100_tol:1e-06
136 |
137 |
138 | A word on the interpretation of the params:
139 | * the parameter C is the regularisation parameter of the Logistic Regression. A small value of C means a higher L2 constraint on w (the L2 constraint is not applied on C, the intercept parameter). A larger C can lead to overfitting, while a smaller value can lead to too much regularization. As such, it is the ideal candidate for automatic tuning.
140 | * the tol parameter is the tolerance for stopping criteria. Our experiments did not show a strong impact of this parameter unless it was set to high values.
141 |
142 | 4 - Validation that the best model is better than the untuned one
143 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
144 |
145 | ALP makes prediction with the loaded best model on the unseen data easy.
146 | The accuracy of the best model is decent (one mistake over 25 points).
147 |
148 | .. code:: python
149 |
150 | label, predictions = ensemble.predict(data_new['X'])
151 | print('Best model: {}'.format(label))
152 |
153 |
154 | .. parsed-literal::
155 |
156 | 0.96
157 |
158 | We can now create an untuned model (C=1 by default) and assess its precision on unseen data is lower that the tuned one.
159 |
160 | .. code:: python
161 |
162 | model = sklearn.linear_model.LogisticRegression()
163 | expe = Experiment(model)
164 | expe.fit([data], [data_val])
165 | pred_worst_new = expe.predict(X_test_new)
166 | print(sklearn.metrics.accuracy_score(pred_worst_new, data_new["y"]))
167 |
168 |
169 | .. parsed-literal::
170 |
171 | 0.88
172 |
173 |
174 | .. _LogisticRegression: http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
175 |
--------------------------------------------------------------------------------
/docs/Tutorials/tuto4.rst:
--------------------------------------------------------------------------------
1 | ========================================================
2 | Tutorial 4 : how to use custom layers for Keras with ALP
3 | ========================================================
4 |
5 | Because serialization of complex Python objects is still a challenge we will present a way of sending a custom layer to a Keras model with ALP.
6 |
7 |
8 | 1 - Get a dataset
9 | ~~~~~~~~~~~~~~~~~
10 |
11 | We will work with the CIFAR10 dataset available via Keras.
12 |
13 | .. code-block:: python
14 |
15 | from keras.datasets import cifar10
16 | from keras.preprocessing.image import ImageDataGenerator
17 | from keras.models import Sequential
18 | from keras.layers import Dense, Dropout, Activation, Flatten
19 | from keras.layers import Convolution2D, MaxPooling2D
20 | from keras.optimizers import SGD
21 | from keras.utils import np_utils
22 |
23 | from fuel.datasets.hdf5 import H5PYDataset
24 | from fuel.schemes import SequentialScheme
25 | from fuel.streams import DataStream
26 | from fuel.transformers import ScaleAndShift
27 |
28 | from alp.appcom.core import Experiment
29 |
30 | from alp.appcom.utils import to_fuel_h5
31 |
32 | import numpy as np
33 |
34 | nb_classes = 10
35 | nb_epoch = 25
36 |
37 | # input image dimensions
38 | img_rows, img_cols = 32, 32
39 | # the CIFAR10 images are RGB
40 | img_channels = 3
41 |
42 | # the data, shuffled and split between train and test sets
43 | (X_train, y_train), (X_test, y_test) = cifar10.load_data()
44 |
45 | X_train = X_train.astype('float32')
46 | X_test = X_test.astype('float32')
47 | X_train = X_train/255
48 | X_test = X_test/255
49 |
50 | batch_size = 128
51 | print('X_train shape:', X_train.shape)
52 | print(X_train.shape[0], 'train samples')
53 | print(X_test.shape[0], 'test samples')
54 |
55 | # convert class vectors to binary class matrices
56 | Y_train = np_utils.to_categorical(y_train, nb_classes)
57 | Y_test = np_utils.to_categorical(y_test, nb_classes)
58 |
59 |
60 | 2 - Build the generators
61 | ~~~~~~~~~~~~~~~~~~~~~~~~
62 |
63 | We build two generators, one for training and one for validation.
64 |
65 |
66 | .. code-block:: python
67 |
68 | def dump_data():
69 | inputs = [np.concatenate([X_train, X_test])]
70 | outputs = [np.concatenate([Y_train, Y_test])]
71 |
72 | file_name = 'test_data_dropout'
73 | scale = 1.0 / inputs[0].std(axis=0)
74 | shift = - scale * inputs[0].mean(axis=0)
75 |
76 | file_path, i_names, o_names = to_fuel_h5(inputs, outputs, [0, 50000],
77 | ['train', 'test'],
78 | file_name,
79 | '/data_generator')
80 | return file_path, scale, shift, i_names, o_names
81 |
82 | file_path, scale, shift, i_names, o_names = dump_data()
83 |
84 |
85 | def make_gen(set_to_gen, nb_examples):
86 | file_path_f = file_path
87 | names_select = i_names
88 | train_set = H5PYDataset(file_path_f,
89 | which_sets=set_to_gen)
90 |
91 | scheme = SequentialScheme(examples=nb_examples, batch_size=64)
92 |
93 | data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme)
94 |
95 | stand_stream_train = ScaleAndShift(data_stream=data_stream_train,
96 | scale=scale, shift=shift,
97 | which_sources=(names_select[-1],))
98 | return stand_stream_train, train_set, data_stream_train
99 |
100 | train, data_tr, data_stream_tr = make_gen(('train',), 50000)
101 | test, data_te, data_stream_te = make_gen(('test',), 10000)
102 |
103 |
104 | 3 - Build your custom layer
105 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
106 |
107 | Imagine you want to reimplement a dropout layer. We could wrap it in a function that returns the object:
108 |
109 |
110 | .. code-block:: python
111 |
112 | def return_custom():
113 | import keras.backend as K
114 | import numpy as np
115 | from keras.engine import Layer
116 | class Dropout_cust(Layer):
117 | '''Applies Dropout to the input.
118 | '''
119 | def __init__(self, p, **kwargs):
120 | self.p = p
121 | if 0. < self.p < 1.:
122 | self.uses_learning_phase = True
123 | self.supports_masking = True
124 | super(Dropout_cust, self).__init__(**kwargs)
125 |
126 | def call(self, x, mask=None):
127 | if 0. < self.p < 1.:
128 | x = K.in_train_phase(K.dropout(x, level=self.p), x)
129 | return x
130 |
131 | def get_config(self):
132 | config = {'p': self.p}
133 | base_config = super(Dropout_cust, self).get_config()
134 | return dict(list(base_config.items()) + list(config.items()))
135 | return Dropout_cust
136 |
137 |
138 | 4 - Build you model
139 | ~~~~~~~~~~~~~~~~~~~
140 |
141 | We then define our model and call our function to instanciate this custom layer.
142 |
143 | .. code-block:: python
144 |
145 | model = Sequential()
146 |
147 | model.add(Convolution2D(64, 3, 3, border_mode='same',
148 | input_shape=(img_channels, img_rows, img_cols)))
149 | model.add(Activation('relu'))
150 | model.add(Convolution2D(64, 3, 3))
151 | model.add(Activation('relu'))
152 | model.add(MaxPooling2D(pool_size=(2, 2)))
153 | model.add(Dropout(0.25))
154 |
155 | model.add(Convolution2D(128, 3, 3, border_mode='same'))
156 | model.add(Activation('relu'))
157 | model.add(Convolution2D(128, 3, 3))
158 | model.add(Activation('relu'))
159 | model.add(MaxPooling2D(pool_size=(2, 2)))
160 | model.add(Dropout(0.25))
161 |
162 | model.add(Flatten())
163 | model.add(Dense(1024))
164 | model.add(Activation('relu'))
165 | model.add(return_custom()(0.5))
166 | model.add(Dense(nb_classes))
167 | model.add(Activation('softmax'))
168 |
169 | sgd = SGD(lr=0.02, decay=1e-7, momentum=0.9, nesterov=True)
170 | model.compile(loss='categorical_crossentropy',
171 | optimizer=sgd,
172 | metrics=['accuracy'])
173 |
174 |
175 | 5 - Fit your model
176 | ~~~~~~~~~~~~~~~~~~
177 |
178 | We then map the name of the custom object to our function that returns the custom object in a dictionnary.
179 | After wrapping the model in an :meth:`alp.appcom.core.Experiment`, we call the :meth:`alp.appcom.core.Experiment.fit_gen` method and send the custom_objects.
180 |
181 | .. code-block:: python
182 |
183 | custom_objects = {'Dropout_p': return_custom}
184 |
185 | expe = Experiment(model)
186 |
187 | results = expe.fit_gen_async([train], [test], nb_epoch=nb_epoch,
188 | model=model,
189 | metrics=['accuracy'],
190 | samples_per_epoch=50000,
191 | nb_val_samples=10000,
192 | verbose=2,
193 | custom_objects=custom_objects))
194 |
195 | .. note::
196 |
197 | Why do we wrap this class and all the dependencies?
198 |
199 | We use dill to be able to serialize object but unfortunatly, handling class with inheritance is not doable. It's also easier to pass the information about all the dependencies of the object. All the dependencies and your custom objects will be instanciated during the evaluation of the function so that it will be available in the :code:`__main__`. This way the information could be sent to workers without problems.
200 |
--------------------------------------------------------------------------------
/docs/Tutorials/tuto2.rst:
--------------------------------------------------------------------------------
1 | =====================================================
2 | Tutorial 2 : Feed simple data to your ALP Experiment
3 | =====================================================
4 |
5 | In this tutorial, we will build an Experiment with a simple model and
6 | fit it on various number of pieces of data. The aim of this tutorial is
7 | to explain the expected behaviour of ALP.
8 |
9 | 1 - Get some data
10 | ~~~~~~~~~~~~~~~~~
11 |
12 | Let us start with the usual Iris dataset.
13 |
14 | .. code:: python
15 |
16 | from sklearn import datasets
17 | from sklearn.model_selection import train_test_split
18 | from sklearn.metrics import accuracy_score
19 |
20 | # get some data
21 | iris = datasets.load_iris()
22 | X_train, X_val, y_train, y_val = train_test_split(
23 | iris.data, iris.target, test_size=100, random_state=0)
24 |
25 | The data is then put in the form ALP expects: a dictionary with a field
26 | 'X' for the input and a field 'y' for the output. Note that the same is
27 | done for the validation data.
28 |
29 | .. code:: python
30 |
31 | data, data_val = dict(), dict()
32 | data["X"], data["y"] = X_train, y_train
33 | data_val["X"], data_val["y"] = X_val, y_val
34 |
35 | Let us shuffle the data some more. After these lines, 2 more datasets
36 | are created.
37 |
38 | .. code:: python
39 |
40 | more_data, some_more_data = dict(), dict()
41 | more_data["X"], some_more_data["X"], more_data["y"], some_more_data["y"] = train_test_split(
42 | iris.data, iris.target, test_size=75, random_state=1)
43 |
44 | 2 - Expected behaviour with sklearn
45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 |
47 | 2.1 - Defining the experiment and model
48 | +++++++++++++++++++++++++++++++++++++++
49 |
50 | We then define a first simple sklearn logistic regression.
51 |
52 | .. code:: python
53 |
54 | from alp.appcom.core import Experiment
55 | from sklearn.linear_model import LogisticRegression
56 |
57 | lr = LogisticRegression()
58 | Expe = Experiment(lr)
59 |
60 | 2.2 - Fitting with one data set and one validation
61 | ++++++++++++++++++++++++++++++++++++++++++++++++++
62 |
63 | Fitting one data set with one validation set is done this way:
64 |
65 | .. code:: python
66 |
67 | Expe.fit([data],[data_val])
68 |
69 |
70 | .. parsed-literal::
71 |
72 | ({'data_id': '1c59c0c562a5abdb84ad4f4a2c1868bf',
73 | 'metrics': {'iter': nan,
74 | 'score': [0.97999999999999998],
75 | 'val_score': [0.93999999999999995]},
76 | 'model_id': '5cabd17bbac6934fb487fa7f69bbda6e',
77 | 'params_dump': u'/parameters_h5/5cabd17bbac6934fb487fa7f69bbda6e1c59c0c562a5abdb84ad4f4a2c1868bf.h5'},
78 | None)
79 |
80 |
81 |
82 | Now let's take a look at the results:
83 |
84 | * there is a data\_id field: that is where the data is stored in the appropriate collection.
85 |
86 | * there is a model\_id field: this is where the model architecture is stored.
87 |
88 | * the param\_dump field is path of a file where the *attributes* of the fitted model are stored.
89 |
90 | * the metrics field is itself a dictionary with several attributes:
91 | * the iter field is here for compatibility with the keras backend.
92 |
93 | * the score field is model specific, you will have to look into sklearn's documentation to see what kind of metric is used. For the logistic regression, it is the accuracy. This field is then the accuracy of the fitted model on the training data.
94 |
95 | * the val\_score is the score on the validation data (it is still the accuracy in this case).
96 |
97 |
98 | You can access the full result of the experiment in the full\_res
99 | attribut of the object.
100 |
101 | .. code:: python
102 |
103 | Expe.full_res
104 |
105 |
106 | .. parsed-literal::
107 |
108 | {'data_id': '1c59c0c562a5abdb84ad4f4a2c1868bf',
109 | 'metrics': {'iter': nan,
110 | 'score': [0.97999999999999998],
111 | 'val_score': [0.93999999999999995]},
112 | 'model_id': '5cabd17bbac6934fb487fa7f69bbda6e',
113 | 'params_dump': u'/parameters_h5/5cabd17bbac6934fb487fa7f69bbda6e1c59c0c562a5abdb84ad4f4a2c1868bf.h5'}
114 |
115 |
116 |
117 | Predicting the "more\_data" on the model fitted on "data" is done this
118 | way.
119 |
120 | .. code:: python
121 |
122 | pred_on_more_data = Expe.predict(more_data["X"])
123 |
124 | At this point, pred\_on\_more\_data is a vector of prediction. It's
125 | accuracy is obtained as follows:
126 |
127 | .. code:: python
128 |
129 | accuracy_score(pred_on_more_data,more_data["y"])
130 |
131 |
132 |
133 |
134 | .. parsed-literal::
135 |
136 | 0.95999999999999996
137 |
138 |
139 |
140 | Now you can check that the full\_res field of the Expe object was not
141 | modified during the predict call.
142 |
143 | .. code:: python
144 |
145 | Expe.full_res
146 |
147 |
148 |
149 |
150 | .. parsed-literal::
151 |
152 | {'data_id': '1c59c0c562a5abdb84ad4f4a2c1868bf',
153 | 'metrics': {'iter': nan,
154 | 'score': [0.97999999999999998],
155 | 'val_score': [0.93999999999999995]},
156 | 'model_id': '5cabd17bbac6934fb487fa7f69bbda6e',
157 | 'params_dump': u'/parameters_h5/5cabd17bbac6934fb487fa7f69bbda6e1c59c0c562a5abdb84ad4f4a2c1868bf.h5'}
158 |
159 |
160 |
161 | 2.3 - Fitting with one data set and no validation:
162 | ++++++++++++++++++++++++++++++++++++++++++++++++++
163 |
164 | If you want to fit an experiment and don't have a validation set, you
165 | need to specify a None in the validation field. Note that all the fields
166 | have changed. Since the data has changed, the data\_id is different. The
167 | model created is a new one, so are the parameters. Finally, the metrics
168 | are different.
169 |
170 | .. code:: python
171 |
172 | Expe.fit([some_more_data],[None])
173 |
174 |
175 |
176 |
177 | .. parsed-literal::
178 |
179 | ({'data_id': '3554c1421fd9056e69c3cdf1b0ec8c3f',
180 | 'metrics': {'iter': nan, 'score': [0.95999999999999996], 'val_score': [nan]},
181 | 'model_id': 'ceb5d5632334515c4ebbd72a256bd421',
182 | 'params_dump': u'/parameters_h5/ceb5d5632334515c4ebbd72a256bd4213554c1421fd9056e69c3cdf1b0ec8c3f.h5'},
183 | None)
184 |
185 |
186 |
187 | As a result, the model actually stored in the Experiment at that time of
188 | the code execution is not the same as in 2.2. You can check that by
189 | predicting on the more\_data set and check that the score is not the
190 | same.
191 |
192 | .. code:: python
193 |
194 | pred_on_more_data = Expe.predict(more_data["X"])
195 | accuracy_score(pred_on_more_data,more_data["y"])
196 |
197 |
198 |
199 |
200 | .. parsed-literal::
201 |
202 | 0.94666666666666666
203 |
204 |
205 |
206 | 2.4 - Fitting several dataset
207 | +++++++++++++++++++++++++++++
208 |
209 | Now it's an important point since the behavior of sklearn differs from
210 | the keras one: if you feed different datasets to an Experiment with an
211 | sklearn model, ALP proceeds as such:
212 |
213 | * the first model is fitted, then the score and validation score are computed (on the first validation data, if provided).
214 |
215 | * the second model is fitted, then the score and validation score are computed (on the second validation data, if provided).
216 |
217 | * and so on
218 |
219 | As a result, the parameters data\_id, model\_id and param\_dumps in the
220 | full\_res field of the Experiment of the following line are the one of
221 | the second model. The metrics (score and val\_score) fields have a
222 | length of 2, one for each model.
223 |
224 | Note that you can specify a None as validation set if you don't want to
225 | validate a certain model.
226 |
227 | .. code:: python
228 |
229 | Expe.fit([data,more_data],[None,some_more_data])
230 |
231 |
232 | .. parsed-literal::
233 |
234 | ({'data_id': '2767007837282c3da5a86cfe41b57cce',
235 | 'metrics': {'iter': nan,
236 | 'score': [0.97999999999999998, 0.94666666666666666],
237 | 'val_score': [nan, 0.92000000000000004]},
238 | 'model_id': 'c6f885968087dc779ce47f3f1af86a9b',
239 | 'params_dump': u'/parameters_h5/c6f885968087dc779ce47f3f1af86a9b2767007837282c3da5a86cfe41b57cce.h5'},
240 | None)
241 |
--------------------------------------------------------------------------------
/docs/Tutorials/tuto0.rst:
--------------------------------------------------------------------------------
1 | ===================================================================
2 | Tutorial 0 : how to launch a basic experiment with keras or sklearn
3 | ===================================================================
4 |
5 | Step 1 : launching alp
6 | ~~~~~~~~~~~~~~~~~~~~~~
7 |
8 |
9 | Follow the instructions in the setup section.
10 | We assume at this point that you have a Jupyter notebook running on the controller.
11 |
12 |
13 | Step 2 : defining your model
14 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15 |
16 | You can follow step from `Step 2.1 : Keras`_ or from `Step 2.2 : Scikit learn`_ regarding if you want to use Keras_ or `scikit-learn`_. In both case we will do the right imports, get some classification data, put them in the ALP format and instanciate a model. The important thing at the end of step 2 is to have the :code:`data`, :code:`data_val` and :code:`model` objects and a model ready.
17 |
18 | Step 2.1 : Keras
19 | ++++++++++++++++
20 |
21 | The following code gets some data and declares a simple artificial neural network with Keras:
22 |
23 | .. code-block:: python
24 |
25 | # we import numpy and fix the seed
26 | import numpy as np
27 | np.random.seed(1337) # for reproducibility
28 |
29 | # we import alp and Keras tools that we will use
30 | import alp
31 | from keras.datasets import mnist
32 | from keras.models import Sequential
33 | from keras.layers import Dense, Dropout, Activation, Flatten
34 | from keras.utils import np_utils
35 | import keras.backend as K
36 | from keras.optimizers import Adam
37 | from alp.appcom.ensembles import HParamsSearch
38 |
39 | # if you use tensorflow you must use this configuration
40 | # so that it doesn't use all of the GPU's memory (default config)
41 | import tensorflow as tf
42 |
43 | config = tf.ConfigProto(allow_soft_placement=True)
44 | config.gpu_options.allow_growth = True
45 | session = tf.Session(config=config)
46 | K.set_session(session)
47 |
48 | batch_size = 128
49 | nb_classes = 10
50 | nb_epoch = 12
51 |
52 | # input image dimensions
53 | img_rows, img_cols = 28, 28
54 | # number of features to use
55 | nb_filters = 32
56 |
57 | # the data, shuffled and split between train and test sets
58 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
59 |
60 | X_train = X_train.astype('float32')
61 | X_test = X_test.astype('float32')
62 | X_train /= 255
63 | X_test /= 255
64 | print('X_train shape:', X_train.shape)
65 | print(X_train.shape[0], 'train samples')
66 | print(X_test.shape[0], 'test samples')
67 |
68 | if K.image_dim_ordering() == 'th':
69 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
70 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
71 | input_shape = (1, img_rows, img_cols)
72 | else:
73 | X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
74 | X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
75 | input_shape = (img_rows, img_cols, 1)
76 |
77 | # convert class vectors to binary class matrices
78 | Y_train = np_utils.to_categorical(y_train, nb_classes)
79 | Y_test = np_utils.to_categorical(y_test, nb_classes)
80 |
81 | # put the data in the form ALP expects
82 | data, data_val = dict(), dict()
83 | data["X"] = X_train
84 | data["y"] = Y_train
85 | data_val["X"] = X_test
86 | data_val["y"] = Y_test
87 |
88 | # finally define and compile the model
89 |
90 | model = Sequential()
91 |
92 | model.add(Flatten(input_shape=input_shape))
93 | model.add(Dense(nb_filters))
94 | model.add(Activation('relu'))
95 | model.add(Dropout(0.25))
96 |
97 | model.add(Dense(128))
98 | model.add(Activation('relu'))
99 | model.add(Dropout(0.5))
100 | model.add(Dense(nb_classes))
101 | model.add(Activation('softmax'))
102 |
103 | model.compile(loss='categorical_crossentropy',
104 | optimizer='adadelta',
105 | metrics=['accuracy'])
106 |
107 | Note that we compile the model so that we also have information about the optimizer.
108 |
109 |
110 | Step 2.2 : Scikit learn
111 | +++++++++++++++++++++++
112 |
113 | The following code gets some data and declares a simple logistic regression with :code:`scikit-learn`:
114 |
115 | .. code-block:: python
116 |
117 | # some imports
118 | from sklearn import cross_validation
119 | from sklearn import datasets
120 | from sklearn.linear_model import LogisticRegression
121 |
122 | # get some data
123 | iris = datasets.load_iris()
124 | X_train, X_test, y_train, y_test = cross_validation.train_test_split(
125 | iris.data, iris.target, test_size=0.2, random_state=0)
126 |
127 | # put the data in the form ALP expects
128 | data, data_val = dict(), dict()
129 | data["X"] = X_train
130 | data["y"] = y_train
131 | data_val["X"] = X_test
132 | data_val["y"] = y_test
133 |
134 | # define the model
135 | model = LogisticRegression()
136 |
137 | Please note that by default for the :code:`LogisticRegression`, the :code:`multi-class` parameter is set to OvR, that is to say one classifier per class. On the iris dataset, it means 3 classifiers. Unlike in Keras, the model is not compiled. So far, the measure of performance (validation metric) can only be the mean absolute error, but we will soon have several metrics working.
138 |
139 |
140 | Step 3 : fitting the model with ALP
141 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
142 |
143 | Step 3.1 : defining the Experiment
144 | ++++++++++++++++++++++++++++++++++
145 |
146 | In ALP, the base object is the Experiment.
147 | An Experiment trains, predicts, saves and logs a model.
148 | So the first step is to import and define the Experiment object.
149 |
150 | .. code-block:: python
151 |
152 | from alp.appcom.core import Experiment
153 |
154 | expe = Experiment(model)
155 |
156 |
157 | Step 3.2 : fit the model
158 | ++++++++++++++++++++++++
159 |
160 | You have access to two types of methods to fit the model.
161 |
162 | * The :code:`fit` and :code:`fit_gen` methods allows you to fit the model in the same process.
163 |
164 | For the :code:`scikit-learn` backend, you can launch the computation with the following command without extra arguments:
165 |
166 | .. code-block:: python
167 |
168 | expe.fit([data], [data_val])
169 |
170 | Note that the :code:`data` and the :code:`data_val` are put in lists.
171 |
172 |
173 | With Keras you might want to specify the number of epochs and the batch_size, as you would have done to fit directly a Keras :code:`model` object. These arguments will flow trough to the final call. Note that they are not necessary for the fit, see the default arguments in the `Keras model doc `_.
174 |
175 | .. code-block:: python
176 |
177 | expe.fit([data], [data_val], nb_epoch=2, batch_size=batch_size)
178 |
179 | In both cases, the model is trained and automatically saved in the databases.
180 |
181 | * The :code:`fit_async` method sends the model to the broker container that will manage the training using the workers you defined in the setup phase. The commands are then straightforward:
182 | For the :code:`scikit-learn` backend:
183 |
184 | .. code-block:: python
185 |
186 | expe.fit_async([data], [data_val])
187 |
188 |
189 | For the Keras backend you still need to provide extra arguments to override the defaults.
190 |
191 | .. code-block:: python
192 |
193 | expe.fit_async([data], [data_val], nb_epoch=2, batch_size=batch_size)
194 |
195 | In both cases, the model is also trained and automatically saved in the databases.
196 |
197 |
198 |
199 | Step 4 : Identifying and reusing the fitted model
200 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
201 |
202 | Once the experiment has been fitted, you can access the id of the model in the db and load it to make prediction or access the parameters in the current process.
203 |
204 | .. code-block:: python
205 |
206 | print(expe.mod_id)
207 | print(expe.data_id)
208 |
209 | expe.load_model(expe.mod_id, expe.data_id)
210 |
211 |
212 | It's then possible to make predictions using the loaded model.
213 |
214 | .. code-block:: python
215 |
216 | expe.predict(data['X'])
217 |
218 | You could of course provide new data to the model. You can also load the model in another experiment.
219 |
220 | .. _Keras: http://keras.io/
221 | .. _`scikit-learn`: http://scikit-learn.org/stable/
222 |
--------------------------------------------------------------------------------
/src/alp/appcom/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility functions for the appcom module
3 | =======================================
4 | """
5 |
6 | import functools
7 | import pickle
8 | import threading
9 | from itertools import islice
10 |
11 | from six.moves import zip as szip
12 |
13 |
14 | def _get_backend_attributes(ABE):
15 | """Gets the backend attributes.
16 |
17 | Args:
18 | ABE(module): the module to get attributes from.
19 |
20 | Returns:
21 | the backend, the backend name and the backend version
22 |
23 | """
24 | backend_m = ABE.get_backend()
25 | backend_name = backend_m.__name__
26 | if hasattr(backend_m, '__version__'):
27 | backend_version = backend_m.__version__
28 | else: # pragma: no cover
29 | backend_version = None
30 |
31 | return ABE, backend_name, backend_version
32 |
33 |
34 | def init_backend(model):
35 | """Initialization of the backend
36 |
37 | Args:
38 | backend(str): only 'keras' or 'sklearn' at the moment
39 |
40 | Returns:
41 | the backend, the backend name and the backend version
42 | """
43 | if 'keras' in repr(model):
44 | from ..backend import keras_backend as ABE
45 | elif 'sklearn' in repr(type(model)):
46 | from ..backend import sklearn_backend as ABE
47 | else:
48 | raise NotImplementedError(
49 | "this backend is not supported: {}".format(
50 | model)) # pragma: no cover
51 |
52 | return _get_backend_attributes(ABE)
53 |
54 |
55 | def switch_backend(backend_name):
56 | """Switch the backend based on it's name
57 |
58 | Args:
59 | backend_name(str): the name of the backend to import
60 |
61 | Return:
62 | the backend asked"""
63 | if backend_name == 'keras':
64 | from ..backend.keras_backend import get_backend
65 | elif backend_name == 'sklearn':
66 | from ..backend.sklearn_backend import get_backend
67 | else:
68 | raise NotImplementedError
69 | return get_backend()
70 |
71 |
72 | def list_to_dict(list_to_transform):
73 | """Transform a list of object to a dict
74 |
75 | Args:
76 | list_to_transform(list): the list to transform
77 |
78 | Returns:
79 | a dictionnary mapping names of the objects to objects"""
80 | return {el.__name__: el for el in list_to_transform}
81 |
82 |
83 | def background(f):
84 | '''
85 | a threading decorator
86 | use @background above the function you want to run in the background
87 | '''
88 | @functools.wraps(f)
89 | def bg_f(*a, **kw):
90 | t = threading.Thread(target=f, args=a, kwargs=kw)
91 | t.start()
92 | return t
93 | return bg_f
94 |
95 |
96 | def imports(packages=None):
97 | """A decorator to import packages only once when a function is serialized
98 |
99 | Args:
100 | packages(list or dict): a list or dict of packages to import. If the
101 | object is a dict, the name of the import is the key and the value
102 | is the module. If the object is a list, it's transformed to a dict
103 | mapping the name of the module to the imported module.
104 | """
105 | if packages is None:
106 | packages = dict()
107 |
108 | def dec(wrapped):
109 | @functools.wraps(wrapped)
110 | def inner(*args, **kwargs):
111 | packs = packages
112 | if isinstance(packages, list):
113 | packs = list_to_dict(packages)
114 | for name, pack in packs.items():
115 | if name not in wrapped.__globals__:
116 | wrapped.__globals__[name] = pack
117 | return wrapped(*args, **kwargs)
118 | return inner
119 | return dec
120 |
121 |
122 | def norm_iterator(iterable):
123 | """returns a normalized iterable of tuples"""
124 | if isinstance(iterable, list):
125 | names = ['list_' + str(i) for i, j in enumerate(iterable)]
126 | return szip(names, iterable)
127 | else:
128 | raise NotImplementedError('Iterables other than lists '
129 | 'cannot be passed to this function')
130 |
131 |
132 | def window(seq, n=2):
133 | """Returns a sliding window (of width n) over data from the iterable"""
134 | it = iter(seq)
135 | result = tuple(islice(it, n))
136 | if len(result) == n: # pragma: no cover
137 | yield result
138 | for elem in it:
139 | result = result[1:] + (elem,)
140 | yield result
141 |
142 |
143 | def to_fuel_h5(inputs, outputs, slices, names,
144 | file_name, file_path=''):
145 | """Transforms list of numpy arrays to a structured hdf5 file
146 |
147 |
148 | Args:
149 | inputs(list): a list of inputs(numpy.arrays)
150 | outputs(list): a list of outputs(numpy.arrays)
151 | slices(list): a list of int representing the end of a slice and the
152 | begining of another slice. The last slice is automatically added
153 | if missing (maximum length of the inputs).
154 | names(list): a list of names for the datasets
155 | file_name(str): the name of the file to save.
156 | file_path(str): the path where the file is located
157 |
158 | Returns:
159 | The file full path
160 | """
161 | import h5py
162 | import os
163 | from fuel.datasets.hdf5 import H5PYDataset
164 |
165 | suffix = 'hdf5'
166 |
167 | inp = 'input_'
168 | out = 'output_'
169 |
170 | full_path = os.path.join(file_path, file_name.lower() + '.' + suffix)
171 | f = h5py.File(full_path, mode='w')
172 |
173 | dict_data_set = dict()
174 | split_dict = dict()
175 | for name in names:
176 | split_dict[name] = dict()
177 |
178 | slices.append(max_v_len(inputs))
179 |
180 | def insert_info_h5(iterable, suf):
181 | names_out = []
182 | for k, v in norm_iterator(iterable):
183 | dict_data_set[suf + k] = f.create_dataset(suf + k, v.shape,
184 | v.dtype)
185 | dict_data_set[suf + k][...] = v
186 | for sl, name in zip(window(slices, 2), names):
187 | split_dict[name][suf + k] = sl
188 | names_out.append(suf + str(k))
189 | return names_out
190 |
191 | inputs_names = insert_info_h5(inputs, inp)
192 | outputs_names = insert_info_h5(outputs, out)
193 |
194 | f.attrs['split'] = H5PYDataset.create_split_array(split_dict)
195 | f.flush()
196 | f.close()
197 | return full_path, inputs_names, outputs_names
198 |
199 |
200 | def max_v_len(iterable_to_check):
201 | """Returns the max length of a list of iterable"""
202 | max_v = 0
203 | for _, v in norm_iterator(iterable_to_check):
204 | if len(v) > max_v: # pragma: no cover
205 | max_v = len(v)
206 | return max_v
207 |
208 |
209 | def pickle_gen(gen_train, data_val):
210 | """Check and serialize the validation data object and serialize the
211 | training data generator.
212 |
213 | Args:
214 | gen_train(generator): the training data generator
215 | data_val(dict or generator): the training data object
216 |
217 | Returns:
218 | normalized datasets"""
219 | gen_train = [pickle.dumps(g).decode('raw_unicode_escape')
220 | for g in gen_train]
221 |
222 | val_gen = check_gen(data_val)
223 |
224 | if val_gen:
225 | data_val = [pickle.dumps(g).decode('raw_unicode_escape')
226 | for g in data_val]
227 | return gen_train, data_val
228 |
229 |
230 | def check_gen(iterable):
231 | """Check if the last object of the iterable is an iterator
232 |
233 | Args:
234 | iterable(list): a list containing data.
235 |
236 | Returns:
237 | True if the last object is a generator, False otherwise.
238 | """
239 | is_gen = (hasattr(iterable[-1], 'next') or
240 | hasattr(iterable[-1], '__next__'))
241 | is_gen += 'fuel' in repr(iterable[-1])
242 |
243 | return is_gen
244 |
245 |
246 | def get_nb_chunks(generator):
247 | """Get the number of chunks that yields a generator
248 |
249 | Args:
250 | generator: a Fuel generator
251 |
252 | Returns:
253 | number of chunks (int)"""
254 | if hasattr(generator, 'iteration_scheme'):
255 | if generator.iteration_scheme is not None:
256 | batch_size = generator.iteration_scheme.batch_size
257 | nb_examples = len(generator.iteration_scheme.indices)
258 | return nb_examples // batch_size
259 | else:
260 | if hasattr(generator, 'data_stream'):
261 | return get_nb_chunks(generator.data_stream)
262 | else:
263 | raise Exception('No data stream in the generator')
264 |
--------------------------------------------------------------------------------
/src/alp/cli.py:
--------------------------------------------------------------------------------
1 | """
2 | CLI to launch ALP services
3 | ==========================
4 | """
5 |
6 | import os
7 | import click
8 | import pandas as pd
9 | from docker import Client
10 | from . import __version__
11 | from .cli_utils import a_text
12 | from .cli_utils import action_config
13 | from .cli_utils import banner
14 | from .cli_utils import col_info
15 | from .cli_utils import col_warn
16 | from .cli_utils import gen_all_configs
17 | from .cli_utils import get_config_names
18 | from .cli_utils import open_config
19 | from .cli_utils import pass_config
20 | from .cli_utils import pull_config
21 |
22 |
23 | @click.group()
24 | @click.option('--verbose', is_flag=True)
25 | @pass_config
26 | def main(conf, verbose):
27 | """
28 | The alp command provide you with a number of options to manage alp services
29 | """
30 | docker_client = Client('unix://var/run/docker.sock')
31 | kernel_version = docker_client.info()['ServerVersion']
32 | click.echo(click.style(banner, fg=col_info, bold=True))
33 | click.echo(click.style('Version: {}'.format(__version__),
34 | fg=col_info, bold=True))
35 | click.echo(click.style('Running with Docker version: {}'.format(
36 | kernel_version), fg=col_info, bold=True))
37 | click.echo(click.style('\n'))
38 | conf.verbose = verbose
39 | return 0
40 |
41 |
42 | @main.command()
43 | @click.option('--force', is_flag=True)
44 | @click.option('--dry_run', is_flag=True)
45 | @click.argument('action', type=click.STRING, required=True)
46 | @click.argument('config', type=click.Path(exists=True), required=True)
47 | @pass_config
48 | def service(conf, force, dry_run, action, config):
49 | """Subcommand to take action on services"""
50 | config = open_config(config, conf.verbose)
51 | if action == 'start':
52 | results = action_config(config, 'run', conf.verbose, force=force,
53 | dry_run=dry_run)
54 | elif action == 'stop':
55 | results = action_config(config, 'stop', conf.verbose, force=force,
56 | dry_run=dry_run)
57 | elif action == 'restart':
58 | results = action_config(config, 'restart', conf.verbose, force=force,
59 | dry_run=dry_run)
60 | elif action == 'rm':
61 | results = action_config(config, 'rm', conf.verbose, force=force,
62 | dry_run=dry_run)
63 | else:
64 | raise Exception('Action must be in start, stop, restart, rm')
65 | return results
66 |
67 |
68 | @main.command()
69 | @click.argument('config', type=click.Path(exists=True), required=True)
70 | @pass_config
71 | def status(conf, config):
72 | """Get the status of the running containers"""
73 | config = open_config(config)
74 | docker_client = Client('unix://var/run/docker.sock')
75 | all_containers = docker_client.containers(all=True)
76 | running_containers = []
77 | running_ids = dict()
78 |
79 | names = get_config_names(config)
80 | for container in all_containers:
81 | name = container['Names'][0].replace('/', '')
82 | if name in names:
83 | print_cont = dict()
84 | print_cont['name'] = name
85 | print_cont['status'] = container['Status']
86 | print_cont['image'] = container['Image']
87 | print_cont['image_id'] = container['ImageID']
88 | running_ids[container['ImageID']] = print_cont['image']
89 | print_cont['ports'] = []
90 | if 'Ports' in container: # pragma: no cover
91 | for port in container['Ports']:
92 | pub_port = None
93 | priv_port = None
94 | if 'PublicPort' in port:
95 | pub_port = port['PublicPort']
96 | if 'PrivatePort' in port:
97 | priv_port = port['PrivatePort']
98 | if pub_port:
99 | print_cont['ports'] += ['{}:{}'.format(pub_port,
100 | priv_port)]
101 | running_containers.append(print_cont)
102 | else: # pragma: no cover
103 | click.echo(click.style(
104 | a_text('{}'.format(name), 'not in the config'),
105 | fg=col_warn))
106 |
107 | click.echo(click.style('Running containers'.center(80, '='),
108 | fg=col_info, bold=True))
109 | click.echo()
110 | for container in running_containers:
111 | click.echo(click.style('{}'.format(container['name']).center(80, '-'),
112 | fg=col_info, bold=True))
113 | for k in container:
114 | if isinstance(container[k], list):
115 | container[k] = ' '.join(container[k])
116 | if len(container[k]) > 40:
117 | cut = len(container[k]) - 40
118 | container[k] = container[k][:cut - 3] + '...'
119 | click.echo(click.style(a_text(k, container[k]),
120 | fg=col_info))
121 | click.echo('\n')
122 | images = docker_client.images()
123 |
124 | click.echo(click.style('Images from the config'.center(80, '='),
125 | fg=col_info, bold=True))
126 | click.echo()
127 | for image in images:
128 | if image['Id'] in running_ids:
129 | print_im = dict()
130 | print_im['name'] = '{}'.format(running_ids[image['Id']])
131 | print_im['created'] = pd.to_datetime(image['Created'] * 1e9)
132 | print_im['created'] = print_im['created'].strftime(
133 | '%Y-%m-%d %H:%M')
134 | print_im['size'] = '{:.2f}'.format(image['Size'] / 1000000000.)
135 |
136 | click.echo(click.style(
137 | '{}'.format(print_im['name']).center(80, '-'),
138 | fg=col_info, bold=True))
139 | for k in print_im:
140 | if isinstance(print_im[k], list): # pragma: no cover
141 | container[k] = ' '.join(print_im[k])
142 | if len(print_im[k]) > 40: # pragma: no cover
143 | cut = len(print_im[k]) - 40
144 | container[k] = print_im[k][:cut - 3] + '...'
145 | click.echo(click.style(a_text(k, print_im[k]),
146 | fg=col_info))
147 | click.echo('\n')
148 | return 0
149 |
150 |
151 | @main.command()
152 | @click.option('--force', is_flag=True)
153 | @click.argument('config', type=click.Path(exists=True), required=True)
154 | @pass_config
155 | def update(conf, config, force):
156 | """Pull, stop, remove and rerun all containers"""
157 | config = open_config(config)
158 | pull_config(config, conf.verbose)
159 | res_stop = action_config(config, 'stop', conf.verbose, force=force)
160 | res_rm = action_config(config, 'rm', conf.verbose, force=force)
161 | res_run = action_config(config, 'run', conf.verbose, force=force)
162 | succeeded = all([res_stop, res_rm, res_run])
163 | return succeeded
164 |
165 |
166 | @main.command()
167 | @click.argument('config', type=click.Path(exists=True), required=True)
168 | @pass_config
169 | def pull(conf, config):
170 | """Pull containers"""
171 | config = open_config(config)
172 | res = pull_config(config, conf.verbose)
173 | return res
174 |
175 |
176 | @main.command()
177 | @click.option('--outdir', type=click.Path(exists=True))
178 | @click.option('--namesuf', type=click.STRING, default='')
179 | @click.option('--portshift', type=click.INT, default=0)
180 | @click.option('--rootfolder', type=click.Path(exists=True))
181 | @click.option('--controlers', type=click.INT, default=1)
182 | @click.option('--skworkers', type=click.INT, default=1)
183 | @click.option('--kworkers', type=click.INT, default=1)
184 | @click.option('--cpu', is_flag=True)
185 | @pass_config
186 | def genconfig(conf, outdir, namesuf, portshift, rootfolder, controlers,
187 | skworkers, kworkers, cpu):
188 | """Generates and writes configurations files in .alp"""
189 |
190 | if outdir is None:
191 | outdir = os.path.expanduser('~')
192 | if not os.access(outdir, os.W_OK): # pragma: no cover
193 | outdir = '/tmp'
194 | outdir = os.path.join(outdir, '.alp')
195 | else:
196 | if not os.access(outdir, os.W_OK): # pragma: no cover
197 | raise IOError('Cannot access directory')
198 | outdir = os.path.join(outdir, '.alp')
199 | if not os.path.exists(outdir): # pragma: no cover
200 | os.makedirs(outdir)
201 |
202 | alpapp, alpdb, containers = gen_all_configs(outdir, namesuf, portshift,
203 | rootfolder, controlers,
204 | skworkers, kworkers, cpu)
205 |
206 | if conf.verbose:
207 | click.echo(click.style('Auto generated configuration:', fg=col_info))
208 | click.echo(click.style(a_text(' Controlers', str(controlers)),
209 | fg=col_info))
210 | click.echo(click.style(a_text(' Sklearn workers', str(skworkers)),
211 | fg=col_info))
212 | click.echo(click.style(a_text(' Keras workers', str(kworkers)),
213 | fg=col_info))
214 | click.echo()
215 |
216 | # dump configs in .alp
217 |
218 | alpapp_json = os.path.join(outdir, 'alpapp.json')
219 | alpdb_json = os.path.join(outdir, 'alpdb.json')
220 | containers_json = os.path.join(outdir, 'containers.json')
221 |
222 | with open(alpapp_json, 'w') as f:
223 | f.write(alpapp)
224 |
225 | with open(alpdb_json, 'w') as f:
226 | f.write(alpdb)
227 |
228 | with open(containers_json, 'w') as f:
229 | f.write(containers)
230 |
231 | return 0
232 |
--------------------------------------------------------------------------------
/src/alp/appcom/ensembles.py:
--------------------------------------------------------------------------------
1 | """
2 | Ensembles module
3 | ================
4 | """
5 |
6 | import warnings
7 | from time import time
8 |
9 | import numpy as np
10 | import pandas as pd
11 | from progressbar import ETA
12 | from progressbar import Bar
13 | from progressbar import DynamicMessage
14 | from progressbar import FormatLabel
15 | from progressbar import Percentage
16 | from progressbar import ProgressBar
17 | from progressbar import SimpleProgress
18 |
19 |
20 | def get_best(experiments, metric, op, partial=False):
21 | """Helper function for manipulation of a list of experiments
22 |
23 | In case of equality in the metric, the behaviour of op_arg determines the
24 | result.
25 |
26 | Args:
27 | experiments(list): a list of experiments
28 | metric(str): the name of a metric used in the experiments
29 | op (function): operation to perform with the metric (optional)
30 | partial(bool): if True will pass an experiment without result. Raise
31 | an error otherwise.
32 | """
33 | best_perf_expes = []
34 | list_experiments = []
35 | list_keys = []
36 | not_ready = False
37 | for k, expe in experiments.items():
38 | if not hasattr(expe, 'full_res'): # pragma: no cover
39 | if not partial:
40 | raise Exception('Results are not ready')
41 | else:
42 | not_ready = True
43 | else:
44 | best_perf_expes.append(op(expe.full_res['metrics'][metric]))
45 | list_experiments.append(expe)
46 | list_keys.append(k)
47 |
48 | if not_ready is True: # pragma: no cover
49 | warnings.warn('Some results are not ready: Using the best available'
50 | ' model.')
51 |
52 | if len(list_experiments) == 0:
53 | raise Exception('No result is ready yet')
54 |
55 | ar_expes = np.array(list_experiments)
56 | ar_keys = np.array(list_keys)
57 | perf_array = np.array(best_perf_expes)
58 | perf_nans = np.isnan(perf_array)
59 | if (1 - perf_nans).sum() == 0:
60 | raise Exception('The selected metric evaluations are all nans')
61 |
62 | best_perf_expes = perf_array[perf_nans == False] # NOQA
63 | bool_choice = op(best_perf_expes) == np.array(best_perf_expes)
64 | best = ar_expes[bool_choice] # NOQA
65 | best_key = ar_keys[bool_choice]
66 | return best[0], best_key[0]
67 |
68 |
69 | widgets = [Percentage(), ' ',
70 | SimpleProgress(), ' ',
71 | Bar(marker='=', left='[', right=']'),
72 | ' ', FormatLabel('in: %(elapsed)s'), ' ',
73 | ETA(), ' | ', 'job/', DynamicMessage('s')]
74 |
75 |
76 | class Ensemble(object):
77 |
78 | """Base class to build experiments containers able to execute batch
79 | sequences of action. Must implement the `fit`, `fit_gen`, `fit_async`
80 | `fit_gen_async` methods
81 |
82 | Args:
83 | experiments(dict or list): experiments to be wrapped. If a dictionnary
84 | is passed, it should map experiment names to experiments.
85 | """
86 | def __init__(self, experiments):
87 | if isinstance(experiments, list):
88 | experiments = {i: v for i, v in enumerate(experiments)}
89 | if not isinstance(experiments, dict): # pragma: no cover
90 | raise TypeError('You must pass either an experiments dict or list')
91 | self.experiments = experiments
92 |
93 | def fit(self, data, data_val, *args, **kwargs):
94 | raise NotImplementedError
95 |
96 | def fit_gen(self, data, data_val, *args, **kwargs):
97 | raise NotImplementedError
98 |
99 | def fit_async(self, data, data_val, *args, **kwargs):
100 | raise NotImplementedError
101 |
102 | def fit_gen_async(self, data, data_val, *args, **kwargs):
103 | raise NotImplementedError
104 |
105 | def predict(self, data, data_val, *args, **kwargs):
106 | raise NotImplementedError
107 |
108 | def summary(self, metrics, verbose=False):
109 | raise NotImplementedError
110 |
111 | def plt_summary(self):
112 | raise NotImplementedError
113 |
114 |
115 | class HParamsSearch(Ensemble):
116 | """Hyper parameters search class
117 |
118 | Train several experiments with different hyperparameters and save results.
119 | Wraps the training process so that it's possible to access results easily.
120 |
121 | Args:
122 | experiments(dict or list): experiments to be wrapped. If a dictionnary
123 | is passed, it should map experiment names to experiments
124 | hyperparams(dict): a dict of hyperparameters
125 | metric(str): the name of a metric used in the experiments
126 | op(str): an operator to select a model
127 |
128 | """
129 | def __init__(self, experiments, hyperparams=None, metric=None, op=None):
130 | super(HParamsSearch, self).__init__(experiments=experiments)
131 | self.hyperparams = hyperparams
132 | self.metric = metric
133 | self.op = op
134 | self.results = dict()
135 |
136 | def fit(self, data, data_val, *args, **kwargs):
137 | """Apply the fit method to all the experiments
138 |
139 | Args:
140 | see `alp.core.Experiment.fit`
141 |
142 | Returns:
143 | a list of results"""
144 | self._fit_cm(data, data_val, gen=False, async=False, *args, **kwargs)
145 | return self.results
146 |
147 | def fit_gen(self, data, data_val, *args, **kwargs):
148 | """Apply the fit_gen method to all the experiments
149 |
150 | Args:
151 | see :meth:`alp.appcom.core.Experiment.fit_gen`
152 |
153 | Returns:
154 | a list of results"""
155 | self._fit_cm(data, data_val, gen=True, async=False, *args, **kwargs)
156 | return self.results
157 |
158 | def fit_gen_async(self, data, data_val, *args, **kwargs):
159 | """Apply the fit_gen_async method to all the experiments
160 |
161 | Args:
162 | see :meth:`alp.appcom.core.Experiment.fit_gen_async`
163 |
164 | Returns:
165 | a list of results"""
166 | self._fit_cm(data, data_val, gen=True, async=True, *args, **kwargs)
167 | return self.results
168 |
169 | def fit_async(self, data, data_val, *args, **kwargs):
170 | """Apply the fit_async method to all the experiments
171 |
172 | Args:
173 | see :meth:`alp.appcom.core.Experiment.fit_async`
174 |
175 | Returns:
176 | a list of results"""
177 | self._fit_cm(data, data_val, gen=False, async=True, *args, **kwargs)
178 | return self.results
179 |
180 | def _fit_cm(self, data, data_val, gen, async, *args, **kwargs):
181 | with ProgressBar(max_value=len(self.experiments),
182 | redirect_stdout=True,
183 | widgets=widgets, term_width=80) as progress:
184 | for i, kv in enumerate(self.experiments.items()):
185 | k, expe = kv
186 | b = time()
187 | if gen and async:
188 | res = expe.fit_gen_async(data, data_val, *args, **kwargs)
189 | elif gen and not async:
190 | res = expe.fit_gen(data, data_val, *args, **kwargs)
191 | elif not gen and async:
192 | res = expe.fit_async(data, data_val, *args, **kwargs)
193 | else:
194 | res = expe.fit(data, data_val, *args, **kwargs)
195 |
196 | self.results[k] = res
197 | if i == 0:
198 | spent = time() - b
199 | to_print = spent
200 | else:
201 | spent += time() - b
202 | to_print = spent / (i + 1)
203 | progress.update(i, s=float(1 / to_print))
204 | if expe.backend_name == 'keras' and async: # pragma: no cover
205 | import keras.backend as K
206 | if K.backend() == 'tensorflow':
207 | K.clear_session()
208 | return self.results
209 |
210 | def predict(self, data, metric=None, op=None, partial=False,
211 | *args, **kwargs):
212 | """Apply the predict method to all the experiments
213 |
214 | Args:
215 | see :meth:`alp.appcom.core.Experiment.predict`
216 | metric(str): the name of the metric to use
217 | op(function): an operator returning the value to select an
218 | experiment
219 |
220 | Returns:
221 | an array of results"""
222 | if not metric:
223 | metric = self.metric
224 | if not op:
225 | op = self.op
226 |
227 | if metric is None or op is None:
228 | raise Exception('You should provide a metric along with an op')
229 | best_exp, best_key = get_best(self.experiments, metric, op, partial)
230 | return best_key, best_exp.predict(data, *args, **kwargs)
231 |
232 | def summary(self, metrics, verbose=False):
233 | """Build a results table using individual results from models
234 |
235 | Args:
236 | verbose(bool): if True, print a description of the results
237 | metrics(dict): a dictionnary mapping metric's names to ops.
238 |
239 | Returns:
240 | a pandas DataFrame of results"""
241 | # build results table
242 | res_dict = dict()
243 | expes = self.experiments
244 | for kv in self.results.items():
245 | k, res = kv
246 | res, t = res
247 | if t is not None:
248 | t.join()
249 | for kr, v in expes[k].full_res['metrics'].items():
250 | if isinstance(v, list):
251 | if kr in metrics:
252 | op = metrics[kr]
253 | if kr in res_dict:
254 | res_dict[kr] += [op(v)]
255 | else:
256 | res_dict[kr] = []
257 | res_dict[kr] += [op(v)]
258 | res_table = pd.DataFrame(res_dict)
259 | if verbose is True:
260 | print(res_table.describe())
261 | return res_table
262 |
--------------------------------------------------------------------------------
/tests/backend/test_sklearn_backend.py:
--------------------------------------------------------------------------------
1 | """Tests for the sklearn backend"""
2 |
3 | import numpy as np
4 | import pytest
5 | import sklearn
6 |
7 | from fuel.datasets.hdf5 import H5PYDataset
8 | from fuel.schemes import SequentialScheme
9 | from fuel.streams import DataStream
10 | from fuel.transformers import ScaleAndShift
11 |
12 | from six.moves import zip as szip
13 |
14 | from sklearn import cross_validation as cv
15 | from sklearn import datasets
16 |
17 | from alp.appcom.core import Experiment
18 | from alp.appcom.utils import to_fuel_h5
19 | from alp.backend import sklearn_backend as SKB
20 | from alp.backend.sklearn_backend import getname
21 | from alp.utils.utils_tests import close_gens
22 |
23 |
24 | np.random.seed(1336)
25 | NAME = sklearn.__name__
26 | VERSION = sklearn.__version__
27 | CLASSIF = ['sklearn.linear_model.logistic.LogisticRegression',
28 | 'sklearn.discriminant_analysis.LinearDiscriminantAnalysis',
29 | 'sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis']
30 |
31 |
32 | def generate_data(classif=False):
33 | data, data_val = dict(), dict()
34 | if classif:
35 | datas = datasets.load_iris()
36 | Xs = datas.data
37 | Ys = datas.target
38 | else:
39 | Xs = np.linspace(0, 12.3, num=150, endpoint=False).reshape(1, -1).T
40 | Ys = (Xs * np.sin(Xs)).ravel()
41 |
42 | data["X"], data_val["X"], data["y"], data_val["y"] = cv.train_test_split(
43 | Xs, Ys, test_size=20, random_state=0)
44 |
45 | return data, data_val
46 |
47 |
48 | def dump_data(data, data_val, classif=False):
49 | '''
50 | The sklearn version differs from the keras version
51 | in the following points:
52 | no local import of np
53 | no graph model
54 | validation cut at index 130
55 | classification or regression data will dump different files
56 | '''
57 | suffix = '_R'
58 | if classif:
59 | suffix = '_C'
60 |
61 | inputs = [np.concatenate([data['X'], data_val['X']])]
62 | outputs = [np.concatenate([data['y'], data_val['y']])]
63 |
64 | file_name = 'test_data' + suffix
65 | scale = 1.0 / inputs[0].std(axis=0)
66 | shift = scale * inputs[0].mean(axis=0)
67 |
68 | file_path, i_names, o_names = to_fuel_h5(inputs, outputs, [0, 130],
69 | ['train', 'test'],
70 | file_name,
71 | '/data_generator')
72 | return file_path, scale, shift, i_names, o_names
73 |
74 |
75 | data_R, data_val_R = generate_data(False)
76 | data_C, data_val_C = generate_data(True)
77 | file_path_R, scale_R, shift_R, i_names_R, o_names_R = dump_data(
78 | data_R, data_val_R, False)
79 | file_path_C, scale_C, shift_C, i_names_C, o_names_C = dump_data(
80 | data_C, data_val_C, True)
81 |
82 |
83 | def make_gen(Nchunks=True, classif=False, train=True):
84 | '''
85 | Nchunks==True : 10 chunks in the generator
86 | Nchunks == False : 1 chunk in the generator
87 | Makes the distinction between classification/regression
88 | Makes the distinction between test/train
89 | '''
90 |
91 | file_path_f = file_path_R
92 | shift_f = shift_R
93 | scale_f = scale_R
94 | if classif:
95 | file_path_f = file_path_C
96 | shift_f = shift_C
97 | scale_f = scale_C
98 |
99 | if Nchunks:
100 | batch_size = 13
101 | else:
102 | batch_size = 130
103 | t_scheme = SequentialScheme(examples=130, batch_size=batch_size)
104 | t_source = 'train'
105 | if not train:
106 | if Nchunks:
107 | batch_size = 2
108 | else:
109 | batch_size = 20
110 | t_source = 'test'
111 | t_scheme = SequentialScheme(examples=20, batch_size=batch_size)
112 |
113 | t_set = H5PYDataset(file_path_f, which_sets=[t_source])
114 | data_stream_t = DataStream(dataset=t_set, iteration_scheme=t_scheme)
115 |
116 | stand_stream_t = ScaleAndShift(data_stream=data_stream_t,
117 | scale=scale_f, shift=shift_f,
118 | which_sources=t_source)
119 |
120 | return stand_stream_t, t_set, data_stream_t
121 |
122 |
123 | keyval = dict()
124 | for m in SKB.SUPPORTED:
125 | keyval[getname(m)] = m()
126 |
127 |
128 | @pytest.fixture(scope='module', params=['no_metric', 'accuracy and mse'])
129 | def get_metric(request):
130 | if request.param == 'no_metric':
131 | return(None)
132 | elif request.param == 'accuracy and mse':
133 | return(['accuracy_score', 'mean_squared_error'])
134 |
135 |
136 | @pytest.fixture(scope='module', params=list(keyval.keys()))
137 | def get_model(request):
138 | model = keyval[request.param]
139 | return(model)
140 |
141 |
142 | @pytest.fixture
143 | def get_model_data_expe(get_model, get_metric):
144 | model, metric = get_model, get_metric
145 |
146 | expe = Experiment(model)
147 |
148 | data, data_val = data_R, data_val_R
149 | is_classif = False
150 | if getname(model, False) in CLASSIF:
151 | data, data_val = data_C, data_val_C
152 | is_classif = True
153 | else: # if regression model, remove accuracy
154 | if metric:
155 | if "accuracy_score" in metric:
156 | metric.remove("accuracy_score")
157 |
158 | return data, data_val, is_classif, model, metric, expe
159 |
160 |
161 | class TestExperiment:
162 |
163 | def test_experiment_instance_utils(self, get_model_data_expe):
164 | _, _, _, model, _, expe = get_model_data_expe
165 | expe.model_dict = model
166 | expe.backend_name = 'another_backend'
167 | expe.model_dict = model
168 | print(self)
169 |
170 | assert expe.backend is not None
171 |
172 | def test_experiment_fit(self, get_model_data_expe):
173 | data, data_val, _, model, metric, expe = get_model_data_expe
174 |
175 | for mod in [None, model]:
176 | for data_val_loc in [None, data_val]:
177 | expe.fit([data], [data_val_loc], model=mod,
178 | overwrite=True, metrics=metric)
179 |
180 | expe.backend_name = 'another_backend'
181 | expe.load_model()
182 | expe.load_model(expe.mod_id, expe.data_id)
183 |
184 | assert expe.data_id is not None
185 | assert expe.mod_id is not None
186 | assert expe.params_dump is not None
187 | print(self)
188 |
189 | def test_experiment_fit_gen_nogenval(self, get_model_data_expe):
190 | '''
191 | Main case: generator on train
192 |
193 | Subcases:
194 | 10 chunks on train B with and without val
195 | 1 chunk on train B with and without val
196 | '''
197 | data, data_val, is_classif, model, metric, expe = get_model_data_expe
198 |
199 | for Nchunks_gen, expected_value in szip([True, False], [10, 1]):
200 | gen_train, data_train, data_stream_train = make_gen(
201 | Nchunks_gen, is_classif, train=True)
202 |
203 | for data_val_loc in [None, data_val]:
204 | expe.fit_gen([gen_train], [data_val_loc],
205 | model=model,
206 | overwrite=True, metrics=metric)
207 |
208 | assert len(expe.full_res['metrics'][
209 | 'score']) == expected_value
210 | assert len(expe.full_res['metrics'][
211 | 'val_score']) == expected_value
212 |
213 | if data_val_loc is not None:
214 | assert None not in expe.full_res['metrics'][
215 | 'val_score']
216 | else:
217 | assert np.all([np.isnan(v) for v in expe.full_res[
218 | 'metrics']['val_score']])
219 |
220 | assert expe.data_id is not None
221 | assert expe.mod_id is not None
222 | assert expe.params_dump is not None
223 | assert expe
224 |
225 | close_gens(gen_train, data_train, data_stream_train)
226 |
227 | print(self)
228 |
229 | def test_experiment_fit_gen_withgenval(self, get_model_data_expe):
230 | '''
231 | Main case: gen on train, gen on val
232 | Subcases:
233 | 10 chunks on train / 10 chunks on val C3
234 | 10 chunks on train / 1 chunk on val C1
235 | 1 chunk on train / 10 chunks on val C2
236 | '''
237 | data, data_val, is_classif, model, metric, expe = get_model_data_expe
238 |
239 | for Nchunks_gen, Nchunks_val in szip([True, True, False],
240 | [True, False, True]):
241 | gen_train, data_train, data_stream_train = make_gen(
242 | Nchunks_gen, is_classif, train=True)
243 | gen_test, data_test, data_stream_test = make_gen(
244 | Nchunks_val, is_classif, train=False)
245 |
246 | expe.fit_gen([gen_train], [gen_test],
247 | overwrite=True, metrics=metric)
248 |
249 | expected_value_gen = 10
250 | if not Nchunks_gen:
251 | expected_value_gen = 1
252 |
253 | assert len(expe.full_res['metrics'][
254 | 'score']) == expected_value_gen
255 | assert len(expe.full_res['metrics'][
256 | 'val_score']) == 10
257 | assert expe.data_id is not None
258 | assert expe.mod_id is not None
259 | assert expe.params_dump is not None
260 | assert expe
261 |
262 | close_gens(gen_train, data_train, data_stream_train)
263 | close_gens(gen_test, data_test, data_stream_test)
264 |
265 | print(self)
266 |
267 | def test_experiment_fit_async(self, get_model_data_expe):
268 | data, data_val, _, model, metric, expe = get_model_data_expe
269 |
270 | for mod in [None, model]:
271 | for data_val_loc in [None, data_val]:
272 | _, thread = expe.fit_async([data], [data_val_loc],
273 | model=mod, overwrite=True,
274 | metrics=metric)
275 | thread.join()
276 | assert expe.data_id is not None
277 | assert expe.mod_id is not None
278 | assert expe.params_dump is not None
279 | assert expe
280 |
281 | # TODO: check consistency of results
282 | print(self)
283 |
284 | def test_experiment_fit_gen_async_nogenval(self, get_model_data_expe):
285 | '''
286 | Main case: gen on train, data on val
287 | Subcases:
288 | 10 chunks on train
289 | 1 chunk on train
290 | '''
291 | data, data_val, is_classif, model, metric, expe = get_model_data_expe
292 |
293 | for Nchunks_gen, expected_value in szip([True, False], [10, 1]):
294 | gen_train, data_train, data_stream_train = make_gen(
295 | Nchunks_gen, is_classif, train=True)
296 |
297 | for data_val_loc in [None, data_val]:
298 |
299 | _, thread = expe.fit_gen_async([gen_train], [data_val_loc],
300 | model=model,
301 | overwrite=True, metrics=metric)
302 | thread.join()
303 |
304 | assert len(expe.full_res['metrics'][
305 | 'score']) == expected_value
306 | assert len(expe.full_res['metrics'][
307 | 'val_score']) == expected_value
308 |
309 | if data_val_loc is not None:
310 | assert None not in expe.full_res['metrics'][
311 | 'val_score']
312 | else:
313 | assert np.all([np.isnan(v) for v in expe.full_res[
314 | 'metrics']['val_score']])
315 |
316 | assert expe.data_id is not None
317 | assert expe.mod_id is not None
318 | assert expe.params_dump is not None
319 | assert expe
320 |
321 | close_gens(gen_train, data_train, data_stream_train)
322 |
323 | print(self)
324 |
325 | def test_experiment_fit_gen_async_withgenval(self, get_model_data_expe):
326 | '''
327 | Main case: gen on train, gen on val
328 | Subcases:
329 | 10 chunks on train / 10 chunks on val
330 | 10 chunks on train / 1 chunk on val
331 | 1 chunk on train / 10 chunks on val
332 | '''
333 | data, data_val, is_classif, model, metric, expe = get_model_data_expe
334 |
335 | for Nchunks_gen, Nchunks_val in szip([True, True, False],
336 | [True, False, True]):
337 |
338 | gen_train, data_train, data_stream_train = make_gen(
339 | Nchunks_gen, is_classif, train=True)
340 | gen_test, data_test, data_stream_test = make_gen(
341 | Nchunks_val, is_classif, train=False)
342 |
343 | _, thread = expe.fit_gen_async(
344 | [gen_train], [gen_test], overwrite=True, metrics=metric)
345 | thread.join()
346 |
347 | expected_value_gen = 10
348 | if not Nchunks_gen:
349 | expected_value_gen = 1
350 |
351 | assert len(expe.full_res['metrics'][
352 | 'score']) == expected_value_gen
353 | assert len(expe.full_res['metrics'][
354 | 'val_score']) == 10
355 | assert expe.data_id is not None
356 | assert expe.mod_id is not None
357 | assert expe.params_dump is not None
358 | assert expe
359 |
360 | close_gens(gen_train, data_train, data_stream_train)
361 |
362 | print(self)
363 |
364 | def test_experiment_predict(self, get_model_data_expe):
365 | data, data_val, _, model, metric, expe = get_model_data_expe
366 | model._test_ = 'test'
367 |
368 | for mod in [None, model]:
369 | expe.fit([data], [data_val], model=mod, custom_objects={},
370 | overwrite=True, metrics=metric)
371 | expe.load_model()
372 | alp_pred = expe.predict(data['X'])
373 | alp_pred_async = expe.predict_async(data['X'])
374 |
375 | alp_pred_async = alp_pred_async.wait()
376 |
377 | model.fit(data['X'], data['y'])
378 | sklearn_pred = model.predict(data['X'])
379 | assert(np.allclose(alp_pred, sklearn_pred))
380 | assert(np.allclose(np.array(alp_pred_async), sklearn_pred))
381 | print(self)
382 |
383 | def test_experiment_predict_async(self, get_model_data_expe):
384 | data, data_val, _, model, metric, expe = get_model_data_expe
385 | model._test_ = 'test'
386 |
387 | for mod in [None, model]:
388 | expe.fit([data], [data_val], model=mod, custom_objects={},
389 | overwrite=True, metrics=metric)
390 | expe.load_model()
391 | alp_pred = expe.predict(data['X'])
392 |
393 | model.fit(data['X'], data['y'])
394 | sklearn_pred = model.predict(data['X'])
395 | assert(np.allclose(alp_pred, sklearn_pred))
396 | print(self)
397 |
398 |
399 |
400 | def test_utils():
401 | objects = [list(),
402 | [1, 2],
403 | [1., 2.],
404 | list(np.array([1, 2], dtype=np.integer)),
405 | list(np.array([1., 2.], dtype=np.float)),
406 | list(np.array([np.ones((1))]))]
407 | for el in objects:
408 | SKB.typeconversion(el)
409 |
410 |
411 | if __name__ == "__main__":
412 | pytest.main([__file__])
413 |
--------------------------------------------------------------------------------
/src/alp/appcom/core.py:
--------------------------------------------------------------------------------
1 | """
2 | .. codeauthor:: Thomas Boquet thomas.boquet@r2.ca
3 |
4 | A simple module to perform training and prediction of models
5 | ============================================================
6 |
7 | Using `celery `_, this module helps to schedule
8 | the training of models if the users send enough models in a short
9 | period of time.
10 |
11 | """
12 |
13 | import copy
14 | import sys
15 |
16 | from six.moves import zip as szip
17 | from ..appcom.utils import background
18 | from ..backend import common as cm
19 | from ..dbbackend import get_models
20 | from .utils import get_nb_chunks
21 | from .utils import init_backend
22 | from .utils import pickle_gen
23 | from .utils import switch_backend
24 |
25 |
26 | class Experiment(object):
27 | """An Experiment trains, predicts, saves and logs a model
28 |
29 | Attributes:
30 | model(model): the model used in the experiment
31 | metrics(list): a list of callables
32 | """
33 |
34 | def __init__(self, model=None, metrics=None, verbose=0):
35 | self.model = model
36 | self.trained = False
37 | self.verbose = verbose
38 | self.metrics = metrics
39 | if model is not None:
40 | backend, backend_name, backend_version = init_backend(model)
41 | self.backend = backend
42 | self.backend_name = backend_name
43 | self.backend_version = backend_version
44 | self.model_dict = self.backend.to_dict_w_opt(self.model,
45 | self.metrics)
46 | else:
47 | self.backend = None
48 | self.backend_name = None
49 | self.backend_version = None
50 | self.model_dict = None
51 |
52 | @property
53 | def model_dict(self):
54 | return self.__model_dict
55 |
56 | @model_dict.setter
57 | def model_dict(self, model_dict):
58 | if isinstance(model_dict, dict) or model_dict is None:
59 | self.__model_dict = dict()
60 | self.__model_dict['model_arch'] = model_dict
61 | self.mod_id = None
62 | self.params_dump = None
63 | self.data_id = None
64 | else:
65 | self.model = model_dict
66 | backend, backend_name, backend_version = init_backend(model_dict)
67 | self.backend = backend
68 | self.backend_name = backend_name
69 | self.backend_version = backend_version
70 | self.__model_dict['model_arch'] = self.backend.to_dict_w_opt(
71 | self.model, self.metrics)
72 | self.mod_id = None
73 | self.params_dump = None
74 | self.data_id = None
75 |
76 | @property
77 | def params_dump(self):
78 | return self.__params_dump
79 |
80 | @params_dump.setter
81 | def params_dump(self, params_dump):
82 | self.__model_dict['params_dump'] = params_dump
83 | self.__params_dump = params_dump
84 |
85 | @property
86 | def mod_id(self):
87 | return self.__mod_id
88 |
89 | @mod_id.setter
90 | def mod_id(self, mod_id):
91 | self.__model_dict['mod_id'] = mod_id
92 | self.__mod_id = mod_id
93 |
94 | @property
95 | def data_id(self):
96 | return self.__data_id
97 |
98 | @data_id.setter
99 | def data_id(self, data_id):
100 | self.__model_dict['data_id'] = data_id
101 | self.__data_id = data_id
102 |
103 | def fit(self, data, data_val, model=None, *args, **kwargs):
104 | """Build and fit a model given data and hyperparameters
105 |
106 | Args:
107 | data(list(dict)): a list of dictionnaries mapping inputs and
108 | outputs names to numpy arrays for training.
109 | data_val(list(dict)): a list of dictionnaries mapping inputs and
110 | outputs names to numpy arrays for validation.
111 | model(model, optionnal): a model from a supported backend
112 |
113 | Returns:
114 | the id of the model in the db, the id of the data in the db and
115 | path to the parameters.
116 | """
117 | res = self._prepare_fit(model, data, data_val, generator=False,
118 | delay=False, *args, **kwargs)
119 | return res
120 |
121 | def fit_async(self, data, data_val, model=None,
122 | *args, **kwargs):
123 | """Build and fit asynchronously a model given data and hyperparameters
124 |
125 | Args:
126 | data(list(dict)): a list of dictionnaries mapping inputs and
127 | outputs names to numpy arrays for training.
128 | data_val(list(dict)): a list of dictionnaries mapping inputs and
129 | outputs names to numpy arrays for validation.
130 | model(model, optionnal): a model from a supported backend
131 |
132 | Returns:
133 | the id of the model in the db, the id of the data in the db and a
134 | path to the parameters.
135 | """
136 | res = self._prepare_fit(model, data, data_val, generator=False,
137 | delay=True, *args, **kwargs)
138 |
139 | return res
140 |
141 | def fit_gen(self, gen_train, data_val,
142 | model=None, *args, **kwargs):
143 | """Build and fit asynchronously a model given data and hyperparameters
144 |
145 | Args:
146 | gen_train(list(dict)): a list of generators.
147 | data_val(list(dict)): a list of dictionnaries mapping inputs and
148 | outputs names to numpy arrays or generators for validation.
149 | model(model, optionnal): a model from a supported backend
150 |
151 | Returns:
152 | the id of the model in the db, the id of the data in the db and a
153 | path to the parameters.
154 | """
155 | res = self._prepare_fit(model, gen_train, data_val, generator=True,
156 | delay=False, *args, **kwargs)
157 |
158 | return res
159 |
160 | def fit_gen_async(self, gen_train, data_val,
161 | model=None, *args, **kwargs):
162 | """Build and fit asynchronously a model given generator(s) and
163 | hyperparameters.
164 |
165 | Args:
166 | gen_train(list(dict)): a list of generators.
167 | data_val(list(dict)): a list of dictionnaries mapping inputs and
168 | outputs names to numpy arrays or generators for validation.
169 | model(model, optionnal): a model from a supported backend
170 |
171 | Returns:
172 | the id of the model in the db, the id of the data in the db and a
173 | path to the parameters.
174 | """
175 | res = self._prepare_fit(model, gen_train, data_val, generator=True,
176 | delay=True, *args, **kwargs)
177 | return res
178 |
179 | def load_model(self, mod_id=None, data_id=None):
180 | """Load a model from the database form it's mod_id and data_id
181 |
182 | Args:
183 | mod_id(str): the id of the model in the database
184 | data_id(str): the id of the data in the database"""
185 | if mod_id is None and data_id is None:
186 | mod_id = self.mod_id
187 | data_id = self.data_id
188 | assert mod_id is not None, 'You must provide a model id'
189 | assert data_id is not None, 'You must provide a data id'
190 | models = get_models()
191 | model_db = models.find_one({'mod_id': mod_id, 'data_id': data_id})
192 | self._switch_backend(model_db)
193 | self.model_dict = model_db['model_arch']
194 | self.params_dump = model_db['params_dump']
195 | self.mod_id = model_db['mod_id']
196 | self.data_id = model_db['data_id']
197 | self.full_res = None
198 | self.async_res = None
199 | self.trained = True
200 |
201 | return self
202 |
203 | def predict(self, data, *args, **kwargs):
204 | """Make predictions given data
205 |
206 | Args:
207 | data(np.array):
208 |
209 | Returns:
210 | an np.array of predictions"""
211 | return self._predict(data, async=False, *args, **kwargs)
212 |
213 | def predict_async(self, data, *args, **kwargs):
214 | """Make predictions given data
215 |
216 | Args:
217 | data(np.array):
218 |
219 | Returns:
220 | an np.array of predictions"""
221 | return self._predict(data, async=True, *args, **kwargs)
222 |
223 | def _predict(self, data, async, *args, **kwargs):
224 | """Make predictions given data
225 |
226 | Args:
227 | data(np.array):
228 |
229 | Returns:
230 | an np.array of predictions"""
231 | if self.trained:
232 | if async:
233 | return self.backend.predict.delay(
234 | copy.deepcopy(self.model_dict), data, async, *args,
235 | **kwargs)
236 | else:
237 | return self.backend.predict(
238 | copy.deepcopy(self.model_dict), data, async, *args,
239 | **kwargs)
240 | else:
241 | raise Exception("You must have a trained model"
242 | "in order to make predictions")
243 |
244 | def _check_compile(self, model, kwargs_m):
245 | """Check if we have to recompile and reserialize the model
246 |
247 | Args:
248 | model(a supported model): the model sent (could be None).
249 | kwargs_m(dict): the keyword arguments passed to the wrapper
250 | """
251 | _recompile = False
252 | if model is not None:
253 | self.model = model
254 | _recompile = True
255 | if "metrics" in kwargs_m:
256 | self.metrics = kwargs_m.pop("metrics")
257 | _recompile = True
258 |
259 | if _recompile is True:
260 | self.model_dict = self.backend.to_dict_w_opt(self.model,
261 | self.metrics)
262 |
263 | if self.model is None:
264 | raise Exception('No model provided')
265 |
266 | def _switch_backend(self, model_db):
267 | """A utility function to switch backend when loading a model
268 |
269 | Args:
270 | model_db(dict): the dictionnary stored in the database
271 | """
272 | if model_db['backend_name'] != self.backend_name:
273 | backend = switch_backend(model_db['backend_name'])
274 | self.backend_name = backend.__name__
275 | self.backend_version = None
276 | if self.backend_name == 'keras':
277 | from ..backend import keras_backend
278 | self.backend = keras_backend
279 | elif self.backend_name == 'sklearn':
280 | from ..backend import sklearn_backend
281 | self.backend = sklearn_backend
282 | if hasattr(backend, '__version__'):
283 | check = self.backend_version != backend.__version__
284 | self.backend_version = backend.__version__
285 | if check and self.verbose > 0: # pragma: no cover
286 | sys.stderr.write('Warning: the backend versions'
287 | 'do not match.\n') # pragma: no cover
288 |
289 | def _check_serialize(self, kwargs):
290 | """Serialize the object mapped in the kwargs
291 |
292 | Args:
293 | kwargs(dict): keyword arguments
294 |
295 | Returns:
296 | kwargs
297 | """
298 | for k in kwargs:
299 | if k in self.backend.TO_SERIALIZE:
300 | if isinstance(kwargs[k], dict):
301 | kwargs[k] = {j: self.backend.serialize(kwargs[k][j])
302 | for j in kwargs[k]}
303 | elif isinstance(kwargs[k], list):
304 | kwargs[k] = [self.backend.serialize(j)
305 | for j in kwargs[k]]
306 | else:
307 | raise TypeError('Your iterable should be a dict or a list')
308 | return kwargs
309 |
310 | def _prepare_message(self, model, data, data_val, kwargs, generator=False):
311 | """Prepare the elements to be passed to the backend
312 |
313 | Args:
314 | model(supported model): the model to be prepared
315 | data(list): the list of dicts or generators used for training
316 | data_val(list): the list of dicts or generator used for validation
317 |
318 | Returns:
319 | the transformed data object, the transformed validation data object,
320 | the data_hash
321 | """
322 | self._check_compile(model, kwargs)
323 | kwargs = self._check_serialize(kwargs)
324 | gen_setup = []
325 |
326 | if generator:
327 | nb_data_chunks = [get_nb_chunks(d) for d in data]
328 | nb_data_val_chunks = [get_nb_chunks(dv) for dv in data_val]
329 | for d_c, dv_c in szip(nb_data_chunks, nb_data_val_chunks):
330 | is_val_one = dv_c == 1
331 | is_train_one = d_c == 1
332 |
333 | if dv_c is not None:
334 | # many to one
335 | if d_c > dv_c and is_val_one:
336 | gen_setup.append(1)
337 |
338 | # one to many
339 | elif d_c < dv_c and is_train_one:
340 | gen_setup.append(2)
341 |
342 | # equal
343 | elif d_c == dv_c:
344 | gen_setup.append(3)
345 |
346 | else: # pragma: no cover
347 | Exception('Nb batches in train generator and'
348 | 'validation generator not compatible')
349 |
350 | data_hash = cm.create_gen_hash(data)
351 | data, data_val = pickle_gen(data, data_val)
352 | else:
353 | data_hash = cm.create_data_hash(data)
354 |
355 | return data, data_val, data_hash, gen_setup
356 |
357 | def _prepare_fit(self, model, data, data_val,
358 | generator=False, delay=False,
359 | *args, **kwargs):
360 | """Prepare the model and the datasets and fit the model
361 |
362 | Args:
363 | model(a supported model): the model to send
364 | data(dict or generator): the training data
365 | data_val(dict or generator): the validation data
366 | generator(bool): if True, transforms the generators
367 | delay(bool): if True, fits the model in asynchronous mode
368 | """
369 |
370 | data, data_val, data_hash, size_gen = self._prepare_message(model,
371 | data,
372 | data_val,
373 | kwargs,
374 | generator)
375 |
376 | f = self.backend.fit
377 | if delay:
378 | f = self.backend.fit.delay
379 | res = f(self.backend_name,
380 | self.backend_version,
381 | copy.deepcopy(self.model_dict),
382 | data, data_hash, data_val,
383 | size_gen=size_gen,
384 | generator=generator,
385 | *args, **kwargs)
386 | return self._handle_results(res, delay)
387 |
388 | def _handle_results(self, res, delay):
389 | """Modify the Experiment given the results received from the worker
390 |
391 | Args:
392 | res(celery result or dict): the results returned by the model
393 | delay(bool): if True the result is an async celery result
394 |
395 | Returns:
396 | the results and the thread used to handle the results"""
397 | if delay:
398 | thread = self._get_results(res)
399 | else:
400 | self.mod_id = res['model_id']
401 | self.data_id = res['data_id']
402 | self.params_dump = res['params_dump']
403 |
404 | self.trained = True
405 | self.full_res = res
406 | thread = None
407 | return res, thread
408 |
409 | @background
410 | def _get_results(self, res):
411 | """Handle the results of an asynchronous task
412 |
413 | Args:
414 | res(async result): result of an asynchronous task"""
415 | self.async_res = res
416 | self.full_res = res.wait() # pragma: no cover
417 | self.trained = True # pragma: no cover
418 | self.mod_id = self.full_res['model_id'] # pragma: no cover
419 | self.data_id = self.full_res['data_id'] # pragma: no cover
420 | self.params_dump = self.full_res['params_dump'] # pragma: no cover
421 | if self.verbose > 0: # pragma: no cover
422 | print("Result {} | {} ready".format(
423 | self.mod_id, self.data_id)) # pragma: no cover
424 |
--------------------------------------------------------------------------------
/docs/_static/last_bouquetin.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
260 |
--------------------------------------------------------------------------------