├── lab
    ├── project
    │   ├── __init__.py
    │   └── cli.py
    ├── __init__.py
    ├── experiment
    │   ├── __init__.py
    │   ├── cli.py
    │   └── experiment.py
    ├── checks.py
    └── cli.py
├── docs_requirements.txt
├── examples
    ├── .DS_Store
    ├── README.txt
    ├── sklearn_randomforest.py
    └── keras_mnist_mlp.py
├── requirements.txt
├── docs
    ├── source
    │   ├── _static
    │   │   ├── lab-uml.png
    │   │   └── lab_screenshot.jpeg
    │   ├── auto_examples
    │   │   ├── auto_examples_jupyter.zip
    │   │   ├── auto_examples_python.zip
    │   │   ├── images
    │   │   │   └── thumb
    │   │   │   │   ├── sphx_glr_keras_mnist_mlp_thumb.png
    │   │   │   │   ├── sphx_glr_train_randomforest_thumb.png
    │   │   │   │   └── sphx_glr_sklearn_randomforest_thumb.png
    │   │   ├── index.rst
    │   │   ├── sklearn_randomforest.py
    │   │   ├── sklearn_randomforest.ipynb
    │   │   ├── keras_mnist_mlp.py
    │   │   ├── sklearn_randomforest.rst
    │   │   ├── keras_mnist_mlp.ipynb
    │   │   └── keras_mnist_mlp.rst
    │   ├── notebook.rst
    │   ├── faq.rst
    │   ├── index.rst
    │   ├── concepts.rst
    │   ├── conf.py
    │   ├── push.rst
    │   ├── quickstart.rst
    │   ├── dlexperiments.rst
    │   ├── cli.rst
    │   └── logging.rst
    ├── Makefile
    └── make.bat
├── setup.py
├── .gitignore
├── README.md
└── LICENSE.txt


/lab/project/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/docs_requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_materialdesign_theme
3 | sphinx_gallery
4 | 


--------------------------------------------------------------------------------
/examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/examples/.DS_Store


--------------------------------------------------------------------------------
/lab/__init__.py:
--------------------------------------------------------------------------------
1 | from . import project
2 | from . import experiment
3 | 
4 | from .checks import *
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | minio
2 | click
3 | pyyaml
4 | pandas
5 | numpy
6 | graphviz
7 | tabulate
8 | joblib
9 | 


--------------------------------------------------------------------------------
/lab/experiment/__init__.py:
--------------------------------------------------------------------------------
1 | from .experiment import Experiment
2 | from .experiment import show_experiment
3 | 


--------------------------------------------------------------------------------
/docs/source/_static/lab-uml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/_static/lab-uml.png


--------------------------------------------------------------------------------
/docs/source/_static/lab_screenshot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/_static/lab_screenshot.jpeg


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | Examples Gallery
2 | =====================
3 | 
4 | Several examples of how Lab can be used in common machine learning projects.
5 | 


--------------------------------------------------------------------------------
/docs/source/auto_examples/auto_examples_jupyter.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/auto_examples_jupyter.zip


--------------------------------------------------------------------------------
/docs/source/auto_examples/auto_examples_python.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/auto_examples_python.zip


--------------------------------------------------------------------------------
/docs/source/auto_examples/images/thumb/sphx_glr_keras_mnist_mlp_thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/images/thumb/sphx_glr_keras_mnist_mlp_thumb.png


--------------------------------------------------------------------------------
/docs/source/auto_examples/images/thumb/sphx_glr_train_randomforest_thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/images/thumb/sphx_glr_train_randomforest_thumb.png


--------------------------------------------------------------------------------
/docs/source/auto_examples/images/thumb/sphx_glr_sklearn_randomforest_thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/images/thumb/sphx_glr_sklearn_randomforest_thumb.png


--------------------------------------------------------------------------------
/docs/source/notebook.rst:
--------------------------------------------------------------------------------
 1 | .. _notebook:
 2 | 
 3 | Working with Jupyter Notebooks
 4 | ==============================
 5 | 
 6 | Lab makes it easy to work with Jupyter notebooks by creating a kernel directly from a lab project 
 7 | 
 8 | .. code-block:: bash
 9 | 
10 |     lab notebook
11 | 
12 | Once the kernel is created, you can select it from any Jupyter session.
13 | 
14 | Lab also provides a `notebooks` directory to organise and maintain all notebooks associated with a Lab Project.
15 | 


--------------------------------------------------------------------------------
/docs/source/faq.rst:
--------------------------------------------------------------------------------
 1 | .. _faq:
 2 | 
 3 | Frequently Asked Questions
 4 | ==========================
 5 | 
 6 | How can I include a ``github`` repository in a lab Project
 7 | ----------------------------------------------------------
 8 | 
 9 | Like ``pip``, lab works with ``requirements.txt`` file. To let lab know that your virtual environment should contain a package maintained on github, add the following line to your ``requirements.txt``:
10 | 
11 | .. code::
12 | 
13 |   -e git+https://github.com/beringresearch/ivis#egg=ivis
14 | 
15 | Modify repository and package information accordingly.
16 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Machine Learning Lab
 2 | ====================
 3 | 
 4 | .. image:: _static/lab_screenshot.jpeg
 5 |   :height: 400 px
 6 |   :width: 800 px
 7 |   :scale: 100 %
 8 | 
 9 | Lab is an open source platform for managing machine learning pipelines. It addresses three core concepts: **Reproducibility**, **Logging**, and **Model Persistence**. Lab is lightweight and was designed to easily integrate with your existing training scripts.
10 | 
11 | .. warning::
12 | 
13 |     Lab is in active development and the current version of Lab is a beta release. This means that APIs and storage formats are subject to breaking change.
14 | 
15 | .. toctree::
16 |   :maxdepth: 1
17 |   :caption: User Guide:
18 | 
19 |   Quickstart <quickstart>
20 |   Core concepts <concepts>
21 |   Command Line Interface <cli>
22 |   Tracking API <logging>
23 | 
24 | .. toctree::
25 |   :maxdepth: 1
26 |   :caption: Functionality:
27 |  
28 |   Managing Deep Learning Experiments <dlexperiments>
29 |   Working with Jupyter Notebooks <notebook>
30 |   Model repository <push>
31 |   FAQ <faq>
32 | 
33 | .. toctree::
34 |   :maxdepth: 1
35 |   :caption: Applications:
36 |   
37 |   Examples <auto_examples/index>
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('README.md', encoding='utf-8') as f:
 4 |     long_description = f.read()
 5 | 
 6 | setup(
 7 |     name='lab-ml',
 8 |     version='0.83',
 9 |     long_description=long_description,
10 |     long_description_content_type='text/markdown',
11 |     packages=find_packages(),
12 |     py_modules=['lab'],
13 |     install_requires=[
14 |         'click>=6.7',
15 |         'minio',
16 |         'numpy',
17 |         'pandas',
18 |         'pyyaml',
19 |         'tabulate',
20 |         'graphviz',
21 |         'joblib'
22 |         ],
23 |     entry_points='''
24 |     [console_scripts]
25 |     lab=lab.cli:cli
26 |     ''',
27 |     author='Ignat Drozdov',
28 |     author_email='idrozdov@beringresearch.com',
29 |     description='Lab: a command line interface for the management of arbitrary machine learning tasks.',
30 |     license='Apache License 2.0',
31 |     classifiers=[
32 |         'Intended Audience :: Developers',
33 |         'Programming Language :: Python :: 3.6',
34 |         'Operating System :: OS Independent',
35 |     ],
36 |     keywords='ml ai',
37 |     url='https://github.com/beringresearch/lab'
38 |     )
39 | 


--------------------------------------------------------------------------------
/docs/source/auto_examples/index.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | 
 4 | 
 5 | .. _sphx_glr_auto_examples:
 6 | 
 7 | Examples Gallery
 8 | =====================
 9 | 
10 | Several examples of how Lab can be used in common machine learning projects.
11 | 
12 | 
13 | 
14 | .. raw:: html
15 | 
16 |     <div class="sphx-glr-thumbcontainer" tooltip="This example illustrates how Lab can be used to create and run a simple classifier on the iris ...">
17 | 
18 | .. only:: html
19 | 
20 |     .. figure:: /auto_examples/images/thumb/sphx_glr_sklearn_randomforest_thumb.png
21 | 
22 |         :ref:`sphx_glr_auto_examples_sklearn_randomforest.py`
23 | 
24 | .. raw:: html
25 | 
26 |     </div>
27 | 
28 | 
29 | .. toctree::
30 |    :hidden:
31 | 
32 |    /auto_examples/sklearn_randomforest
33 | 
34 | .. raw:: html
35 | 
36 |     <div class="sphx-glr-thumbcontainer" tooltip="Lab integrates into a typical keras workflow.">
37 | 
38 | .. only:: html
39 | 
40 |     .. figure:: /auto_examples/images/thumb/sphx_glr_keras_mnist_mlp_thumb.png
41 | 
42 |         :ref:`sphx_glr_auto_examples_keras_mnist_mlp.py`
43 | 
44 | .. raw:: html
45 | 
46 |     </div>
47 | 
48 | 
49 | .. toctree::
50 |    :hidden:
51 | 
52 |    /auto_examples/keras_mnist_mlp
53 | .. raw:: html
54 | 
55 |     <div style='clear:both'></div>
56 | 
57 | 
58 | 
59 | .. only :: html
60 | 
61 |  .. container:: sphx-glr-footer
62 |     :class: sphx-glr-footer-gallery
63 | 
64 | 
65 |   .. container:: sphx-glr-download
66 | 
67 |     :download:`Download all examples in Python source code: auto_examples_python.zip <//Users/ignat/Documents/git/lab/docs/source/auto_examples/auto_examples_python.zip>`
68 | 
69 | 
70 | 
71 |   .. container:: sphx-glr-download
72 | 
73 |     :download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip <//Users/ignat/Documents/git/lab/docs/source/auto_examples/auto_examples_jupyter.zip>`
74 | 
75 | 
76 | .. only:: html
77 | 
78 |  .. rst-class:: sphx-glr-signature
79 | 
80 |     `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
81 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | develop-eggs/
 12 | dist/
 13 | downloads/
 14 | eggs/
 15 | .eggs/
 16 | lib/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | *.egg-info/
 23 | .installed.cfg
 24 | *.egg
 25 | MANIFEST
 26 | 
 27 | _build
 28 | docs/build/
 29 | 
 30 | __pycache__/
 31 | *.egg
 32 | .DS_Store
 33 | *.swp
 34 | .ipynb_checkpoints
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | .static_storage/
 64 | .media/
 65 | local_settings.py
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Environments
 93 | .env
 94 | .venv
 95 | env/
 96 | venv/
 97 | ENV/
 98 | env.bak/
 99 | venv.bak/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | 
114 | .vscode/*
115 | !.vscode/settings.json
116 | !.vscode/tasks.json
117 | !.vscode/launch.json
118 | !.vscode/extensions.json
119 | 


--------------------------------------------------------------------------------
/docs/source/concepts.rst:
--------------------------------------------------------------------------------
 1 | .. _concepts:
 2 | 
 3 | Concepts
 4 | ========
 5 | 
 6 | Lab is centred around three core concepts: *Reproducibility*, *Logging*, and *Model Persistence*. Lab is designed to integrate with your existing
 7 | training scripts, with imposing as few constraints as possible. 
 8 | 
 9 | 
10 | Reproducibility
11 | ---------------
12 | 
13 | Lab Projects are designed to be shared and re-used. This feature makes havy use of Python's ``virtualenv`` module,
14 | enabling users to precisely define modules and environments that are required to run the associated experiments.
15 | 
16 | Every Project is initiated using a `requirements.txt <https://pip.readthedocs.io/en/1.1/requirements.html>`_ file.
17 | 
18 | Logging
19 | -------
20 | 
21 | Lab was designed to benchmark multiple predictive models and hyperparameters. To accomplish this, it implements a simple API that stores:
22 | 
23 | - Feature names
24 | - Hyperparameters
25 | - Performance metrics
26 | - Model files
27 | 
28 | Model Persistence
29 | -----------------
30 | 
31 | Models are logged using the ``joblib`` module. This applies to both ``sklearn`` and ``keras`` experiments. This simple structure allows for a quick
32 | performance assessment and deployment of a model of choice into production.
33 | 
34 | Example Use Cases
35 | -----------------
36 | 
37 | At Bering, we use Lab for a number of use cases:
38 | 
39 | **Data Scientists** track individual experiments locally on their machine, consistently organising all files and artefacts for reproducibility.
40 | By setting up a naming schema, Teams can work together on the same datasets to benchmark performance of novel ML algorithms.
41 | 
42 | **Production Engineers** assess model performances and decide on the best possible model to be served in production environments. Lab's strict model
43 | versioning serves as a link between research and development environment and evolving production components.
44 | 
45 | **ML Researchers** can publish code to GitHub as a Lab Project, making it easy for others to reproduce findings.
46 | 


--------------------------------------------------------------------------------
/examples/sklearn_randomforest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Getting started with Lab and scikit-learn
 3 | =========================================
 4 | 
 5 | This example illustrates how Lab can be used to create and run a simple
 6 | classifier on the iris dataset.
 7 | 
 8 | Begin by creating a new Lab Project:
 9 | 
10 |     >>> echo "scikit-learn" > requirements.txt
11 |     >>> lab init --name simple-iris
12 | 
13 | """
14 | 
15 | import argparse
16 | from sklearn import datasets
17 | from sklearn.ensemble import RandomForestClassifier
18 | from sklearn.model_selection import train_test_split
19 | from sklearn.metrics import accuracy_score, precision_score
20 | 
21 | from lab.experiment import Experiment
22 | 
23 | parser = argparse.ArgumentParser('Test arguments')
24 | 
25 | parser.add_argument('--n_estimators', type=int, dest='n_estimators')
26 | args = parser.parse_args()
27 | 
28 | n_estimators=args.n_estimators
29 | 
30 | if n_estimators is None:
31 |     n_estimators=100
32 |     max_depth=2
33 | 
34 | if __name__ == "__main__":
35 |     e = Experiment(dataset='iris_75')
36 | 
37 |     @e.start_run
38 |     def train():
39 |         iris = datasets.load_iris()
40 |         X = iris.data
41 |         y = iris.target
42 | 
43 |         X_train, X_test, y_train, y_test = train_test_split(X, y,
44 |                                                             test_size=0.25,
45 |                                                             random_state=42)
46 | 
47 |         e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length',
48 |                         'Petal Width'])
49 |         clf = RandomForestClassifier(n_estimators=n_estimators)
50 | 
51 |         clf.fit(X_train, y_train)
52 | 
53 |         y_pred = clf.predict(X_test)
54 |         accuracy = accuracy_score(y_test, y_pred)
55 |         precision = precision_score(y_test, y_pred, average = 'macro')
56 | 
57 |         e.log_metric('accuracy_score', accuracy)
58 |         e.log_metric('precision_score', precision)
59 | 
60 |         e.log_parameter('n_estimators', n_estimators)
61 |         e.log_parameter('max_depth', max_depth)
62 | 
63 |         e.log_model('randomforest', clf)
64 | 
65 | ##############################################################
66 | # After execute training script through the `lab run` command.
67 | #
68 | # >>> lab run train.py
69 | # >>> lab ls
70 | 


--------------------------------------------------------------------------------
/docs/source/auto_examples/sklearn_randomforest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Getting started with Lab and scikit-learn
 3 | =========================================
 4 | 
 5 | This example illustrates how Lab can be used to create and run a simple
 6 | classifier on the iris dataset.
 7 | 
 8 | Begin by creating a new Lab Project:
 9 | 
10 |     >>> echo "scikit-learn" > requirements.txt
11 |     >>> lab init --name simple-iris
12 | 
13 | """
14 | 
15 | import argparse
16 | from sklearn import datasets
17 | from sklearn.ensemble import RandomForestClassifier
18 | from sklearn.model_selection import train_test_split
19 | from sklearn.metrics import accuracy_score, precision_score
20 | 
21 | from lab.experiment import Experiment
22 | 
23 | parser = argparse.ArgumentParser('Test arguments')
24 | 
25 | parser.add_argument('--n_estimators', type=int, dest='n_estimators')
26 | args = parser.parse_args()
27 | 
28 | n_estimators=args.n_estimators
29 | 
30 | if n_estimators is None:
31 |     n_estimators=100
32 |     max_depth=2
33 | 
34 | if __name__ == "__main__":
35 |     e = Experiment(dataset='iris_75')
36 | 
37 |     @e.start_run
38 |     def train():
39 |         iris = datasets.load_iris()
40 |         X = iris.data
41 |         y = iris.target
42 | 
43 |         X_train, X_test, y_train, y_test = train_test_split(X, y,
44 |                                                             test_size=0.25,
45 |                                                             random_state=42)
46 | 
47 |         e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length',
48 |                         'Petal Width'])
49 |         clf = RandomForestClassifier(n_estimators=n_estimators)
50 | 
51 |         clf.fit(X_train, y_train)
52 | 
53 |         y_pred = clf.predict(X_test)
54 |         accuracy = accuracy_score(y_test, y_pred)
55 |         precision = precision_score(y_test, y_pred, average = 'macro')
56 | 
57 |         e.log_metric('accuracy_score', accuracy)
58 |         e.log_metric('precision_score', precision)
59 | 
60 |         e.log_parameter('n_estimators', n_estimators)
61 |         e.log_parameter('max_depth', max_depth)
62 | 
63 |         e.log_model('randomforest', clf)
64 | 
65 | ##############################################################
66 | # After execute training script through the `lab run` command.
67 | #
68 | # >>> lab run train.py
69 | # >>> lab ls
70 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # http://www.sphinx-doc.org/en/master/config
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'lab'
21 | copyright = '2020, Bering Limited'
22 | author = 'Bering Limited'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = ['sphinx_gallery.gen_gallery']
31 | 
32 | master_doc = 'index'
33 | 
34 | sphinx_gallery_conf = {
35 |             'examples_dirs': '../../examples',
36 |             'gallery_dirs': 'auto_examples',
37 |             'plot_gallery': True
38 |             }
39 | 
40 | html_theme_options = {
41 |     'header_links': [
42 |         ('Home', 'index', False, 'home'),
43 |         ('GitHub', 'https://github.com/beringresearch/lab', True, 'link'),
44 |         ('Bering', 'http://beringresearch.com', True, 'launch')],
45 |     'show_drawer_title': True,
46 | }
47 | 
48 | # Add any paths that contain templates here, relative to this directory.
49 | templates_path = ['_templates']
50 | 
51 | # List of patterns, relative to source directory, that match files and
52 | # directories to ignore when looking for source files.
53 | # This pattern also affects html_static_path and html_extra_path.
54 | exclude_patterns = []
55 | 
56 | 
57 | # -- Options for HTML output -------------------------------------------------
58 | 
59 | # The theme to use for HTML and HTML Help pages.  See the documentation for
60 | # a list of builtin themes.
61 | #
62 | html_theme = 'sphinx_materialdesign_theme'
63 | #html_logo = '_static/lab-logo.jpeg'
64 | 
65 | # Add any paths that contain custom static files (such as style sheets) here,
66 | # relative to this directory. They are copied after the builtin static files,
67 | # so a file named "default.css" will overwrite the builtin "default.css".
68 | html_static_path = ['_static']
69 | 


--------------------------------------------------------------------------------
/lab/checks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | import click
 4 | import shutil
 5 | import subprocess
 6 | import venv as ve
 7 | 
 8 | lab_project = ['experiments', 'data', 'logs', 'notebooks', 'config']
 9 | 
10 | 
11 | # Project
12 | def check_minio_config(minio_tag):
13 |     """Check that minio configuration exists"""
14 |     home_dir = os.path.expanduser('~')
15 |     lab_dir = os.path.join(home_dir, '.lab')
16 | 
17 |     try:
18 |         with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file:
19 |             yaml.load(file)[minio_tag]
20 |     except Exception as e:
21 |         print(str(e))
22 |         click.secho('Invalid global minio connection tag.', fg='red')
23 |         raise click.Abort()
24 | 
25 | 
26 | def is_venv(home_dir):
27 |     """Check that virtual environment exists"""
28 |     if not os.path.exists(os.path.join(home_dir, '.venv')):
29 |         click.secho('Virtual environment not found. '
30 |                     'Creating one for this project',
31 |                     fg='blue')
32 |         create_venv(home_dir)
33 | 
34 | 
35 | def is_empty_project():
36 |     """Check if there are any experiments in the project"""
37 |     experiments = next(os.walk('experiments'))[1]
38 |     if len(experiments) == 0:
39 |         click.secho("It looks like you've started a brand new project. "
40 |                     'Run your first experiment to generate a list of metrics.',
41 |                     fg='blue')
42 |         raise click.Abort()
43 | 
44 | 
45 | def is_lab_project():
46 |     """Check if the current directory is a lab project"""
47 |     _exists = [f for f in lab_project if os.path.exists(f)]
48 | 
49 |     if len(_exists) != len(lab_project):
50 |         click.secho('This directory does not appear to be a valid '
51 |                     'Lab Project.\nRun <lab init> to create one.',
52 |                     fg='red')
53 |         raise click.Abort()
54 | 
55 | 
56 | def create_venv(project_name):
57 |     """Create a lab virtual environment"""
58 |     # Create a virtual environment
59 |     venv_dir = os.path.join(project_name, '.venv')
60 | 
61 |     try:
62 |         environment = ve.EnvBuilder(system_site_packages=False,
63 |                                     symlinks=True, with_pip=True)
64 |         environment.create(venv_dir)
65 | 
66 |         subprocess.call([venv_dir + '/bin/pip', 'install',
67 |                          '--upgrade', 'pip'])
68 | 
69 |         subprocess.call([venv_dir + '/bin/pip',
70 |                          'install', '--upgrade', 'lab-ml'])
71 | 
72 |         subprocess.call([venv_dir + '/bin/pip', 'install',
73 |                          '-r', 'requirements.txt'])
74 | 
75 |     except Exception as e:
76 |         shutil.rmtree(venv_dir)
77 |         click.secho('Something went wrong during .venv creation.',
78 |                     fg='red')
79 |         print(str(e))
80 |         raise click.Abort()
81 | 


--------------------------------------------------------------------------------
/docs/source/push.rst:
--------------------------------------------------------------------------------
 1 | .. _push:
 2 | 
 3 | Model Repository
 4 | ================
 5 | 
 6 | Lab uses minio to store Projects. `Minio <https://minio.io>`_ is a high performance distributed object storage server, designed for large-scale private cloud infrastructure. This makes it a great fit as a storage environment for multiple Lab Projects and Experiments. Lab makes it trivial to back up completed Projects and share them across teams.
 7 | 
 8 | .. image:: _static/lab-uml.png
 9 |   :height: 500 px
10 |   :width: 800 px
11 |   :scale: 100 %
12 | 
13 | 
14 | Configuring minio server
15 | ------------------------
16 | 
17 | There are a number of ways to `install minio <https://docs.minio.io>`_ on a wide range of operating systems. See more details installation
18 | instructions in minio documentation pages.
19 | 
20 | Setting up Lab minio interface
21 | ------------------------------
22 | 
23 | Once minio is up and running, you will need to make a note of the ``endpoint``, ``access key``, and ``secret key``. Lab supports multiple minio configurations
24 | through a convenient tagging system. Each configuration can be set up through CLI:
25 | 
26 | .. code-block:: bash
27 | 
28 |     lab config minio --tag [MINIO_TAG] -- endpoint [TEXT] --accesskey [TEXT] --secretkey [TEXT]
29 | 
30 | Note that the endpoint is simply an IP address and port of a minio host, e.g. ``192.168.1.50:9000``.
31 | 
32 | Storing Lab Projects
33 | --------------------
34 | 
35 | Lab Projects can be pushed to a specific minio host by running a simple command from the Project root folder:
36 | 
37 | .. code-block:: bash
38 | 
39 |   lab push --tag [MINIO_TAG] --bucket [TEXT] .
40 | 
41 | Here, ``--tag`` specifies a nickname of an exisiting minio connection and ``--bucket`` refers to a unique destination name on minio host, analogous to an S3 bucket.
42 | 
43 | Each project contains a `.labignore` file that specifies intentionally untracked files to ignore during a push. A default `.labignore` will omit the virtual environment directory `.venv`. Further omitions can be specified on each line:
44 | 
45 | .. code::
46 | 
47 |   .venv
48 |   data
49 |   experiments/abcdefgh/model.joblib
50 | 
51 | 
52 | Pruning remote repository
53 | -------------------------
54 | 
55 | Sometimes it may be desirable to prune a remote repository. Pruning simply replaces the entire content of a remote repository with local files.
56 | The user is warned just before proceding, as this operation can have undersirable consequences.
57 | 
58 | .. code-block:: bash
59 | 
60 |     lab push --tag [MINIO_TAG] --bucket [TEXT] --force .
61 | 
62 | Pulling from a remote repository
63 | --------------------------------
64 | 
65 | To retrieve a Lab Project from a minio host, run a simple command from folder into which you'd like to pull the Project:
66 | 
67 | .. code-block:: bash
68 | 
69 |   lab pull --tag [MINIO_TAG] --bucket [TEXT] --project [TEXT].
70 | 
71 | 
72 | In cases where connection with minio has already been establish, a project can be pushed/pulled directly from the project directory via ``lab push`` or ``lab pull`` without further options.
73 | 


--------------------------------------------------------------------------------
/docs/source/auto_examples/sklearn_randomforest.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "code",
 5 |       "execution_count": null,
 6 |       "metadata": {
 7 |         "collapsed": false
 8 |       },
 9 |       "outputs": [],
10 |       "source": [
11 |         "%matplotlib inline"
12 |       ]
13 |     },
14 |     {
15 |       "cell_type": "markdown",
16 |       "metadata": {},
17 |       "source": [
18 |         "\nGetting started with Lab and scikit-learn\n=========================================\n\nThis example illustrates how Lab can be used to create and run a simple\nclassifier on the iris dataset.\n\nBegin by creating a new Lab Project:\n\n    >>> echo \"scikit-learn\" > requirements.txt\n    >>> lab init --name simple-iris\n"
19 |       ]
20 |     },
21 |     {
22 |       "cell_type": "code",
23 |       "execution_count": null,
24 |       "metadata": {
25 |         "collapsed": false
26 |       },
27 |       "outputs": [],
28 |       "source": [
29 |         "import argparse\nfrom sklearn import datasets\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, precision_score\n\nfrom lab.experiment import Experiment\n\nparser = argparse.ArgumentParser('Test arguments')\n\nparser.add_argument('--n_estimators', type=int, dest='n_estimators')\nargs = parser.parse_args()\n\nn_estimators=args.n_estimators\n\nif n_estimators is None:\n    n_estimators=100\n    max_depth=2\n\nif __name__ == \"__main__\":\n    e = Experiment(dataset='iris_75')\n\n    @e.start_run\n    def train():\n        iris = datasets.load_iris()\n        X = iris.data\n        y = iris.target\n\n        X_train, X_test, y_train, y_test = train_test_split(X, y,\n                                                            test_size=0.25,\n                                                            random_state=42)\n\n        e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length',\n                        'Petal Width'])\n        clf = RandomForestClassifier(n_estimators=n_estimators)\n\n        clf.fit(X_train, y_train)\n\n        y_pred = clf.predict(X_test)\n        accuracy = accuracy_score(y_test, y_pred)\n        precision = precision_score(y_test, y_pred, average = 'macro')\n\n        e.log_metric('accuracy_score', accuracy)\n        e.log_metric('precision_score', precision)\n\n        e.log_parameter('n_estimators', n_estimators)\n        e.log_parameter('max_depth', max_depth)\n\n        e.log_model('randomforest', clf)"
30 |       ]
31 |     },
32 |     {
33 |       "cell_type": "markdown",
34 |       "metadata": {},
35 |       "source": [
36 |         "After execute training script through the `lab run` command.\n\n>>> lab run train.py\n>>> lab ls\n\n"
37 |       ]
38 |     }
39 |   ],
40 |   "metadata": {
41 |     "kernelspec": {
42 |       "display_name": "Python 3",
43 |       "language": "python",
44 |       "name": "python3"
45 |     },
46 |     "language_info": {
47 |       "codemirror_mode": {
48 |         "name": "ipython",
49 |         "version": 3
50 |       },
51 |       "file_extension": ".py",
52 |       "mimetype": "text/x-python",
53 |       "name": "python",
54 |       "nbconvert_exporter": "python",
55 |       "pygments_lexer": "ipython3",
56 |       "version": "3.7.5"
57 |     }
58 |   },
59 |   "nbformat": 4,
60 |   "nbformat_minor": 0
61 | }


--------------------------------------------------------------------------------
/examples/keras_mnist_mlp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Running Keras models with Tensorboard
  3 | =====================================
  4 | 
  5 | Lab integrates into a typical keras workflow.
  6 | 
  7 | WARNING: model persistence in Keras can be complicated, especially when
  8 | working with complext models. It is recommended to checkpoint each training
  9 | epoch independently from Lab's ``log_model`` API.
 10 | 
 11 | Bering by creating a new Lab Project:
 12 | 
 13 |     >>> echo "keras" > requirements.txt
 14 |     >>> lab init --name simple-keras
 15 | """
 16 | 
 17 | import keras
 18 | from keras.datasets import mnist
 19 | from keras.models import Sequential
 20 | from keras.layers import Dense, Dropout
 21 | from keras.optimizers import RMSprop
 22 | from keras.callbacks import TensorBoard
 23 | 
 24 | import tempfile
 25 | 
 26 | from sklearn.metrics import accuracy_score, precision_score
 27 | 
 28 | from lab.experiment import Experiment
 29 | 
 30 | batch_size = 128
 31 | num_classes = 10
 32 | epochs = 20
 33 | 
 34 | # the data, split between train and test sets
 35 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
 36 | 
 37 | x_train = x_train.reshape(60000, 784)
 38 | x_test = x_test.reshape(10000, 784)
 39 | x_train = x_train.astype('float32')
 40 | x_test = x_test.astype('float32')
 41 | x_train /= 255
 42 | x_test /= 255
 43 | print(x_train.shape[0], 'train samples')
 44 | print(x_test.shape[0], 'test samples')
 45 | 
 46 | # convert class vectors to binary class matrices
 47 | y_train = keras.utils.to_categorical(y_train, num_classes)
 48 | y_test = keras.utils.to_categorical(y_test, num_classes)
 49 | 
 50 | model = Sequential()
 51 | model.add(Dense(512, activation='relu', input_shape=(784,)))
 52 | model.add(Dropout(0.2))
 53 | model.add(Dense(512, activation='relu'))
 54 | model.add(Dropout(0.2))
 55 | model.add(Dense(num_classes, activation='softmax'))
 56 | 
 57 | model.compile(loss='categorical_crossentropy',
 58 |               optimizer=RMSprop(),
 59 |               metrics=['accuracy'])
 60 | 
 61 | e = Experiment()
 62 | 
 63 | 
 64 | @e.start_run
 65 | def train():
 66 | 
 67 |     # Create a temporary directory for tensorboard logs
 68 |     output_dir = tempfile.mkdtemp()
 69 |     print("Writing TensorBoard events locally to %s\n" % output_dir)
 70 |     tensorboard = TensorBoard(log_dir=output_dir)
 71 | 
 72 |     # During Experiment execution, tensorboard can be viewed through:
 73 |     # tensorboard --logdir=[output_dir]
 74 | 
 75 |     model.fit(x_train, y_train,
 76 |               batch_size=batch_size,
 77 |               epochs=epochs,
 78 |               verbose=1,
 79 |               validation_data=(x_test, y_test),
 80 |               callbacks=[tensorboard])
 81 | 
 82 |     y_prob = model.predict(x_test)
 83 |     y_classes = y_prob.argmax(axis=-1)
 84 |     actual = y_test.argmax(axis=-1)
 85 | 
 86 |     accuracy = accuracy_score(y_true=actual, y_pred=y_classes)
 87 |     precision = precision_score(y_true=actual, y_pred=y_classes,
 88 |                                 average='macro')
 89 | 
 90 |     # Log tensorboard
 91 |     e.log_artifacts('tensorboard', output_dir)
 92 | 
 93 |     # Log all metrics
 94 |     e.log_metric('accuracy_score', accuracy)
 95 |     e.log_metric('precision_score', precision)
 96 | 
 97 |     # Log parameters
 98 |     e.log_parameter('batch_size', batch_size)
 99 | 
100 |     # Save model
101 |     e.log_model('mnist-mlp', model)
102 | 


--------------------------------------------------------------------------------
/docs/source/auto_examples/keras_mnist_mlp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Running Keras models with Tensorboard
  3 | =====================================
  4 | 
  5 | Lab integrates into a typical keras workflow.
  6 | 
  7 | WARNING: model persistence in Keras can be complicated, especially when
  8 | working with complext models. It is recommended to checkpoint each training
  9 | epoch independently from Lab's ``log_model`` API.
 10 | 
 11 | Bering by creating a new Lab Project:
 12 | 
 13 |     >>> echo "keras" > requirements.txt
 14 |     >>> lab init --name simple-keras
 15 | """
 16 | 
 17 | import keras
 18 | from keras.datasets import mnist
 19 | from keras.models import Sequential
 20 | from keras.layers import Dense, Dropout
 21 | from keras.optimizers import RMSprop
 22 | from keras.callbacks import TensorBoard
 23 | 
 24 | import tempfile
 25 | 
 26 | from sklearn.metrics import accuracy_score, precision_score
 27 | 
 28 | from lab.experiment import Experiment
 29 | 
 30 | batch_size = 128
 31 | num_classes = 10
 32 | epochs = 20
 33 | 
 34 | # the data, split between train and test sets
 35 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
 36 | 
 37 | x_train = x_train.reshape(60000, 784)
 38 | x_test = x_test.reshape(10000, 784)
 39 | x_train = x_train.astype('float32')
 40 | x_test = x_test.astype('float32')
 41 | x_train /= 255
 42 | x_test /= 255
 43 | print(x_train.shape[0], 'train samples')
 44 | print(x_test.shape[0], 'test samples')
 45 | 
 46 | # convert class vectors to binary class matrices
 47 | y_train = keras.utils.to_categorical(y_train, num_classes)
 48 | y_test = keras.utils.to_categorical(y_test, num_classes)
 49 | 
 50 | model = Sequential()
 51 | model.add(Dense(512, activation='relu', input_shape=(784,)))
 52 | model.add(Dropout(0.2))
 53 | model.add(Dense(512, activation='relu'))
 54 | model.add(Dropout(0.2))
 55 | model.add(Dense(num_classes, activation='softmax'))
 56 | 
 57 | model.compile(loss='categorical_crossentropy',
 58 |               optimizer=RMSprop(),
 59 |               metrics=['accuracy'])
 60 | 
 61 | e = Experiment()
 62 | 
 63 | 
 64 | @e.start_run
 65 | def train():
 66 | 
 67 |     # Create a temporary directory for tensorboard logs
 68 |     output_dir = tempfile.mkdtemp()
 69 |     print("Writing TensorBoard events locally to %s\n" % output_dir)
 70 |     tensorboard = TensorBoard(log_dir=output_dir)
 71 | 
 72 |     # During Experiment execution, tensorboard can be viewed through:
 73 |     # tensorboard --logdir=[output_dir]
 74 | 
 75 |     model.fit(x_train, y_train,
 76 |               batch_size=batch_size,
 77 |               epochs=epochs,
 78 |               verbose=1,
 79 |               validation_data=(x_test, y_test),
 80 |               callbacks=[tensorboard])
 81 | 
 82 |     y_prob = model.predict(x_test)
 83 |     y_classes = y_prob.argmax(axis=-1)
 84 |     actual = y_test.argmax(axis=-1)
 85 | 
 86 |     accuracy = accuracy_score(y_true=actual, y_pred=y_classes)
 87 |     precision = precision_score(y_true=actual, y_pred=y_classes,
 88 |                                 average='macro')
 89 | 
 90 |     # Log tensorboard
 91 |     e.log_artifacts('tensorboard', output_dir)
 92 | 
 93 |     # Log all metrics
 94 |     e.log_metric('accuracy_score', accuracy)
 95 |     e.log_metric('precision_score', precision)
 96 | 
 97 |     # Log parameters
 98 |     e.log_parameter('batch_size', batch_size)
 99 | 
100 |     # Save model
101 |     e.log_model('mnist-mlp', model)
102 | 


--------------------------------------------------------------------------------
/docs/source/auto_examples/sklearn_randomforest.rst:
--------------------------------------------------------------------------------
  1 | .. note::
  2 |     :class: sphx-glr-download-link-note
  3 | 
  4 |     Click :ref:`here <sphx_glr_download_auto_examples_sklearn_randomforest.py>` to download the full example code
  5 | .. rst-class:: sphx-glr-example-title
  6 | 
  7 | .. _sphx_glr_auto_examples_sklearn_randomforest.py:
  8 | 
  9 | 
 10 | Getting started with Lab and scikit-learn
 11 | =========================================
 12 | 
 13 | This example illustrates how Lab can be used to create and run a simple
 14 | classifier on the iris dataset.
 15 | 
 16 | Begin by creating a new Lab Project:
 17 | 
 18 |     >>> echo "scikit-learn" > requirements.txt
 19 |     >>> lab init --name simple-iris
 20 | 
 21 | 
 22 | .. code-block:: default
 23 | 
 24 | 
 25 |     import argparse
 26 |     from sklearn import datasets
 27 |     from sklearn.ensemble import RandomForestClassifier
 28 |     from sklearn.model_selection import train_test_split
 29 |     from sklearn.metrics import accuracy_score, precision_score
 30 | 
 31 |     from lab.experiment import Experiment
 32 | 
 33 |     parser = argparse.ArgumentParser('Test arguments')
 34 | 
 35 |     parser.add_argument('--n_estimators', type=int, dest='n_estimators')
 36 |     args = parser.parse_args()
 37 | 
 38 |     n_estimators=args.n_estimators
 39 | 
 40 |     if n_estimators is None:
 41 |         n_estimators=100
 42 |         max_depth=2
 43 | 
 44 |     if __name__ == "__main__":
 45 |         e = Experiment(dataset='iris_75')
 46 | 
 47 |         @e.start_run
 48 |         def train():
 49 |             iris = datasets.load_iris()
 50 |             X = iris.data
 51 |             y = iris.target
 52 | 
 53 |             X_train, X_test, y_train, y_test = train_test_split(X, y,
 54 |                                                                 test_size=0.25,
 55 |                                                                 random_state=42)
 56 | 
 57 |             e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length',
 58 |                             'Petal Width'])
 59 |             clf = RandomForestClassifier(n_estimators=n_estimators)
 60 | 
 61 |             clf.fit(X_train, y_train)
 62 | 
 63 |             y_pred = clf.predict(X_test)
 64 |             accuracy = accuracy_score(y_test, y_pred)
 65 |             precision = precision_score(y_test, y_pred, average = 'macro')
 66 | 
 67 |             e.log_metric('accuracy_score', accuracy)
 68 |             e.log_metric('precision_score', precision)
 69 | 
 70 |             e.log_parameter('n_estimators', n_estimators)
 71 |             e.log_parameter('max_depth', max_depth)
 72 | 
 73 |             e.log_model('randomforest', clf)
 74 | 
 75 | 
 76 | After execute training script through the `lab run` command.
 77 | 
 78 | >>> lab run train.py
 79 | >>> lab ls
 80 | 
 81 | 
 82 | .. rst-class:: sphx-glr-timing
 83 | 
 84 |    **Total running time of the script:** ( 0 minutes  0.000 seconds)
 85 | 
 86 | 
 87 | .. _sphx_glr_download_auto_examples_sklearn_randomforest.py:
 88 | 
 89 | 
 90 | .. only :: html
 91 | 
 92 |  .. container:: sphx-glr-footer
 93 |     :class: sphx-glr-footer-example
 94 | 
 95 | 
 96 | 
 97 |   .. container:: sphx-glr-download
 98 | 
 99 |      :download:`Download Python source code: sklearn_randomforest.py <sklearn_randomforest.py>`
100 | 
101 | 
102 | 
103 |   .. container:: sphx-glr-download
104 | 
105 |      :download:`Download Jupyter notebook: sklearn_randomforest.ipynb <sklearn_randomforest.ipynb>`
106 | 
107 | 
108 | .. only:: html
109 | 
110 |  .. rst-class:: sphx-glr-signature
111 | 
112 |     `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
113 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
  1 | .. _quickstart:
  2 | 
  3 | Quickstart
  4 | ==========
  5 | 
  6 | Installing Lab
  7 | --------------
  8 | 
  9 | For the time being, lab is available through our github repository:
 10 | 
 11 | .. code-block:: bash
 12 | 
 13 |     git clone https://github.com/beringresearch/lab
 14 |     cd lab
 15 |     pip install --editable .
 16 | 
 17 | .. note::
 18 | 
 19 |     You cannot install Lab on the MacOS system installation of Python. We recommend installing
 20 |     Python 3 through the `Homebrew <https://brew.sh/>`_ package manager using
 21 |     ``brew install python``.
 22 | 
 23 | Setting up your first Project
 24 | -----------------------------
 25 | Lab projects are initiated using a ``requirements.txt`` file. This ensures a consistent and reproducible environment.
 26 | 
 27 | Let's create a simple environment that imports sklearn:
 28 | 
 29 | .. code-block:: bash
 30 | 
 31 |     echo "scikit-learn" >> requirements.txt
 32 |     lab init --name test
 33 | 
 34 | Lab will run through project initialisation and create a new **test** project with its own virtual environment.
 35 | 
 36 | Creating your first Lab Experiment
 37 | ----------------------------------
 38 | Training scripts can be placed directly into the *test/* directory. Here's an example training script, *train.py*, set up to train a Random Forest classifier with appropriate Lab logging API:
 39 | 
 40 | .. code-block:: python
 41 | 
 42 |     from sklearn import datasets
 43 |     from sklearn.ensemble import RandomForestClassifier
 44 |     from sklearn.model_selection import train_test_split
 45 |     from sklearn.metrics import accuracy_score, precision_score
 46 | 
 47 |     from lab.experiment import Experiment # Import Experiment
 48 | 
 49 |     e = Experiment() # Initialise Lab Experiment
 50 | 
 51 |     @e.start_run # Indicate the start of the Experiment
 52 |     def train():        
 53 |         iris = datasets.load_iris()
 54 |         X = iris.data
 55 |         y = iris.target
 56 | 
 57 |         X_train, X_test, \
 58 |           y_train, y_test = train_test_split(X, y,
 59 |                                              test_size=0.24,
 60 |                                              random_state=42)
 61 |         
 62 |         n_estimators = 100
 63 | 
 64 |         e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width'])
 65 |         clf = RandomForestClassifier(n_estimators = n_estimators)
 66 |         clf.fit(X_train, y_train)
 67 | 
 68 |         y_pred = clf.predict(X_test)
 69 |         accuracy = accuracy_score(y_test, y_pred)
 70 |         precision = precision_score(y_test, y_pred, average = 'macro')
 71 | 
 72 |         e.log_metric('accuracy_score', accuracy)        # Log accuracy
 73 |         e.log_metric('precision_score', precision)      # Log aprecision
 74 | 
 75 |         e.log_parameter('n_estimators', n_estimators)   # Log parameters of your choice
 76 | 
 77 |         e.log_model('randomforest', clf)                # Log the actual model
 78 | 
 79 | Running a Lab Experiment
 80 | ------------------------
 81 | 
 82 | The Experiment can now be launched through:
 83 | 
 84 | .. code-block:: bash
 85 | 
 86 |     lab run train.py
 87 | 
 88 | Lab will log performance metrics and model files into appropriate Experiment folders.
 89 | 
 90 | Compare Lab Experiments
 91 | ------------------------
 92 | 
 93 | Multiple Experiments can be compared from the root of the Project folder:
 94 | 
 95 | .. code-block:: bash
 96 | 
 97 |     lab ls
 98 | 
 99 |     Experiment    Source              Date        accuracy_score    precision_score
100 |     ------------  ------------------  ----------  ----------------  -----------------
101 |     49ffb76e      train_mnist_mlp.py  2019-01-15  0.97: ██████████  0.97: ██████████
102 |     261a34e4      train_mnist_cnn.py  2019-01-15  0.98: ██████████  0.98: ██████████
103 | 


--------------------------------------------------------------------------------
/lab/experiment/cli.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | import os
  3 | import subprocess
  4 | import yaml
  5 | import shutil
  6 | import sys
  7 | import graphviz
  8 | 
  9 | from lab.experiment import show_experiment
 10 | from lab import is_lab_project, is_empty_project, is_venv
 11 | 
 12 | 
 13 | @click.command('rm')
 14 | @click.argument('experiment_id', required=True)
 15 | def lab_rm(experiment_id):
 16 |     """ Remove a Lab Experiment """
 17 | 
 18 |     is_lab_project()
 19 | 
 20 |     experiment_dir = os.path.join('experiments', experiment_id)
 21 |     logs_dir = os.path.join('logs', experiment_id)
 22 | 
 23 |     if not os.path.exists(experiment_dir):
 24 |         click.secho("Can't find experiment ["+experiment_id+'] in the current '
 25 |                     'directory.\nEnsure that you are in Lab Project root',
 26 |                     fg='red')
 27 |     else:
 28 |         shutil.rmtree(experiment_dir)
 29 |         shutil.rmtree(logs_dir)
 30 |         click.secho('['+experiment_id+'] removed', fg='blue')
 31 | 
 32 | 
 33 | @click.command('show')
 34 | @click.argument('experiment_id', required=False)
 35 | def lab_show(experiment_id=None):
 36 |     """ Show a Lab Experiment """
 37 | 
 38 |     is_lab_project()
 39 |     is_empty_project()
 40 | 
 41 |     models_directory = 'experiments'
 42 | 
 43 |     experiments = next(os.walk(models_directory))[1]
 44 | 
 45 |     if experiment_id is None:
 46 |         experiments = next(os.walk('experiments'))[1]
 47 |         p = graphviz.Digraph(name='lab_project', format='png')
 48 |         p.graph_attr['rankdir'] = 'LR'
 49 | 
 50 |         for e in experiments:
 51 |             p.subgraph(show_experiment(e))
 52 |     else:
 53 |         experiment_dir = os.path.join('experiments', experiment_id)
 54 |         if not os.path.exists(experiment_dir):
 55 |             click.secho(
 56 |                 "Can't find experiment ["+experiment_id+'] in the current '
 57 |                 'directory.\nEnsure that you are in Lab Project root',
 58 |                 fg='red')
 59 |             click.Abort()
 60 |         else:
 61 |             p = show_experiment(experiment_id)
 62 | 
 63 |     p.render()
 64 | 
 65 | 
 66 | @click.command('run', context_settings=dict(
 67 |     ignore_unknown_options=True,
 68 | ))
 69 | @click.argument('script', required=False,
 70 |                 nargs=-1, type=click.UNPROCESSED)
 71 | def lab_run(script):
 72 |     """ Run a training script """
 73 | 
 74 |     home_dir = os.getcwd()
 75 | 
 76 |     is_lab_project()
 77 |     is_venv(home_dir)
 78 | 
 79 |     try:
 80 |         with open(os.path.join(os.getcwd(),
 81 |                                'config', 'runtime.yaml'), 'r') as file:
 82 |             config = yaml.load(file)
 83 |             home_dir = config['path']
 84 | 
 85 |             # Update project directory if it hasn't been updated
 86 |             if home_dir != os.getcwd():
 87 |                 config['path'] = os.getcwd()
 88 |                 home_dir = config['path']
 89 | 
 90 |                 with open(os.path.join(os.getcwd(),
 91 |                                        'config', 'runtime.yaml'), 'w') as file:
 92 |                     yaml.dump(config, file, default_flow_style=False)
 93 |                     
 94 |     except KeyError:
 95 |         click.secho('Looks like this Project was configured with an earlier '
 96 |                     'version of Lab. Check that config/runtime.yaml file '
 97 |                     'has a valid path key and value.', fg='red')
 98 |         raise click.Abort()
 99 | 
100 |     # Extract lab version from virtual environment
101 |     click.secho('Intializing', fg='cyan')
102 | 
103 |     python_bin = os.path.join(home_dir, '.venv', 'bin/python')
104 | 
105 |     click.secho('Running '+str(script), fg='green')
106 |     subprocess.call([python_bin] + list(script))
107 |     click.secho('Finished!', fg='green')
108 | 


--------------------------------------------------------------------------------
/docs/source/dlexperiments.rst:
--------------------------------------------------------------------------------
  1 | .. _dlexperiments:
  2 | 
  3 | Managing Deep Learning Experiments
  4 | ==================================
  5 | 
  6 | Deep Learning experiment lifecycle generates a rich set of data artifacts, e.g., expansive datasets, complex model architectures, varied hyperparameters, learned weights, and training logs. To produce an effective model, a researcher often has to iterate over multiple scripts, making it challenging to reproduce complex experiments.
  7 | 
  8 | Lab functionality offers a clean and standardised interface for managing the many moving parts of a Deep Learning experiment.
  9 | 
 10 | MNIST Example
 11 | ~~~~~~~~~~~~~~~~
 12 | 
 13 | Consider the following lab training script. Let's set up our hyperparameters and training, validation, testing sets:
 14 | 
 15 | .. code-block:: python
 16 | 
 17 | 	import keras
 18 | 	from keras.datasets import mnist
 19 | 	from keras.models import Sequential
 20 | 	from keras.layers import Dense, Dropout
 21 | 	from keras.optimizers import RMSprop
 22 | 	from keras.callbacks import TensorBoard
 23 | 
 24 | 	import tempfile
 25 | 
 26 | 	from sklearn.metrics import accuracy_score, precision_score
 27 | 
 28 | 	from lab.experiment import Experiment
 29 | 
 30 | 	BATCH_SIZE = 128
 31 | 	EPOCHS = 20
 32 | 	CHECKPOINT_PATH = 'tf/weights'
 33 | 	num_classes = 10
 34 | 	
 35 | 
 36 | 	# the data, split between train and test sets
 37 | 	(x_train, y_train), (x_test, y_test) = mnist.load_data()
 38 | 
 39 | 	x_train = x_train.reshape(60000, 784)
 40 | 	x_test = x_test.reshape(10000, 784)
 41 | 	x_train = x_train.astype('float32')
 42 | 	x_test = x_test.astype('float32')
 43 | 	x_train /= 255
 44 | 	x_test /= 255
 45 | 	print(x_train.shape[0], 'train samples')
 46 | 	print(x_test.shape[0], 'test samples')
 47 | 
 48 | 	# convert class vectors to binary class matrices
 49 | 	y_train = keras.utils.to_categorical(y_train, num_classes)
 50 | 	y_test = keras.utils.to_categorical(y_test, num_classes)
 51 | 
 52 | 
 53 | Set up a simple model and train:
 54 | 
 55 | .. code-block:: python
 56 | 
 57 | 	e = Experiment()
 58 | 
 59 | 
 60 | 	@e.start_run
 61 | 	def train():
 62 | 
 63 | 	    # Create a temporary directory for tensorboard logs
 64 | 	    output_dir = tempfile.mkdtemp()
 65 | 	    print("Writing TensorBoard events locally to %s\n" % output_dir)
 66 | 	    tensorboard = TensorBoard(log_dir=output_dir)
 67 | 
 68 | 	    # During Experiment execution, tensorboard can be viewed through:
 69 | 	    # tensorboard --logdir=[output_dir]
 70 | 
 71 | 	    model.fit(x_train, y_train,
 72 | 	              batch_size=BATCH_SIZE,
 73 | 	              epochs=EPOCHS,
 74 | 	              verbose=1,
 75 | 	              validation_data=(x_test, y_test),
 76 | 	              callbacks=[tensorboard])
 77 | 
 78 | 	    model.save_weights(CHECKPOINT_PATH)
 79 | 
 80 | 	    y_prob = model.predict(x_test)
 81 | 	    y_classes = y_prob.argmax(axis=-1)
 82 | 	    actual = y_test.argmax(axis=-1)
 83 | 
 84 | 	    accuracy = accuracy_score(y_true=actual, y_pred=y_classes)
 85 | 	    precision = precision_score(y_true=actual, y_pred=y_classes,
 86 | 	                                average='macro')
 87 | 
 88 | 	    # Log tensorboard
 89 | 	    e.log_artifacts('tensorboard', output_dir)
 90 | 	    e.log_artifacts('weights', CHECKPOINT_PATH)
 91 | 
 92 | 	    # Log all metrics
 93 | 	    e.log_metric('accuracy_score', accuracy)
 94 | 	    e.log_metric('precision_score', precision)
 95 | 
 96 | 	    # Log parameters
 97 | 	    e.log_parameter('batch_size', BATCH_SIZE)
 98 | 	    e.log_parameter('epochs', EPOCHS)
 99 | 
100 | When training on distributed systems with Horovod, `model.fit` element can be abstracted into a file, say `horovod-train.py` and called directly from the `train()` method:
101 | 
102 | .. code-block:: python
103 | 
104 | 	import subprocess
105 | 
106 | 	args = ['-np', str(8), # 8 GPUs
107 | 		'-H', 'localhost:8', 'python',
108 | 		'horovod-train.py',
109 | 		'--checkpoint', CHECKPOINT_PATH,
110 | 		'--batch-size', BATCH,
111 | 		'--epochs', EPOCHS]
112 | 
113 | Note that you need to enable your Horovod script to accept some basic model hyperparameters that you wish to log downstream.


--------------------------------------------------------------------------------
/docs/source/auto_examples/keras_mnist_mlp.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "code",
 5 |       "execution_count": null,
 6 |       "metadata": {
 7 |         "collapsed": false
 8 |       },
 9 |       "outputs": [],
10 |       "source": [
11 |         "%matplotlib inline"
12 |       ]
13 |     },
14 |     {
15 |       "cell_type": "markdown",
16 |       "metadata": {},
17 |       "source": [
18 |         "\nRunning Keras models with Tensorboard\n=====================================\n\nLab integrates into a typical keras workflow.\n\nWARNING: model persistence in Keras can be complicated, especially when\nworking with complext models. It is recommended to checkpoint each training\nepoch independently from Lab's ``log_model`` API.\n\nBering by creating a new Lab Project:\n\n    >>> echo \"keras\" > requirements.txt\n    >>> lab init --name simple-keras\n"
19 |       ]
20 |     },
21 |     {
22 |       "cell_type": "code",
23 |       "execution_count": null,
24 |       "metadata": {
25 |         "collapsed": false
26 |       },
27 |       "outputs": [],
28 |       "source": [
29 |         "import keras\nfrom keras.datasets import mnist\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Dropout\nfrom keras.optimizers import RMSprop\nfrom keras.callbacks import TensorBoard\n\nimport tempfile\n\nfrom sklearn.metrics import accuracy_score, precision_score\n\nfrom lab.experiment import Experiment\n\nbatch_size = 128\nnum_classes = 10\nepochs = 20\n\n# the data, split between train and test sets\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nx_train = x_train.reshape(60000, 784)\nx_test = x_test.reshape(10000, 784)\nx_train = x_train.astype('float32')\nx_test = x_test.astype('float32')\nx_train /= 255\nx_test /= 255\nprint(x_train.shape[0], 'train samples')\nprint(x_test.shape[0], 'test samples')\n\n# convert class vectors to binary class matrices\ny_train = keras.utils.to_categorical(y_train, num_classes)\ny_test = keras.utils.to_categorical(y_test, num_classes)\n\nmodel = Sequential()\nmodel.add(Dense(512, activation='relu', input_shape=(784,)))\nmodel.add(Dropout(0.2))\nmodel.add(Dense(512, activation='relu'))\nmodel.add(Dropout(0.2))\nmodel.add(Dense(num_classes, activation='softmax'))\n\nmodel.compile(loss='categorical_crossentropy',\n              optimizer=RMSprop(),\n              metrics=['accuracy'])\n\ne = Experiment()\n\n\n@e.start_run\ndef train():\n\n    # Create a temporary directory for tensorboard logs\n    output_dir = tempfile.mkdtemp()\n    print(\"Writing TensorBoard events locally to %s\\n\" % output_dir)\n    tensorboard = TensorBoard(log_dir=output_dir)\n\n    # During Experiment execution, tensorboard can be viewed through:\n    # tensorboard --logdir=[output_dir]\n\n    model.fit(x_train, y_train,\n              batch_size=batch_size,\n              epochs=epochs,\n              verbose=1,\n              validation_data=(x_test, y_test),\n              callbacks=[tensorboard])\n\n    y_prob = model.predict(x_test)\n    y_classes = y_prob.argmax(axis=-1)\n    actual = y_test.argmax(axis=-1)\n\n    accuracy = accuracy_score(y_true=actual, y_pred=y_classes)\n    precision = precision_score(y_true=actual, y_pred=y_classes,\n                                average='macro')\n\n    # Log tensorboard\n    e.log_artifacts('tensorboard', output_dir)\n\n    # Log all metrics\n    e.log_metric('accuracy_score', accuracy)\n    e.log_metric('precision_score', precision)\n\n    # Log parameters\n    e.log_parameter('batch_size', batch_size)\n\n    # Save model\n    e.log_model('mnist-mlp', model)"
30 |       ]
31 |     }
32 |   ],
33 |   "metadata": {
34 |     "kernelspec": {
35 |       "display_name": "Python 3",
36 |       "language": "python",
37 |       "name": "python3"
38 |     },
39 |     "language_info": {
40 |       "codemirror_mode": {
41 |         "name": "ipython",
42 |         "version": 3
43 |       },
44 |       "file_extension": ".py",
45 |       "mimetype": "text/x-python",
46 |       "name": "python",
47 |       "nbconvert_exporter": "python",
48 |       "pygments_lexer": "ipython3",
49 |       "version": "3.7.5"
50 |     }
51 |   },
52 |   "nbformat": 4,
53 |   "nbformat_minor": 0
54 | }


--------------------------------------------------------------------------------
/lab/cli.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | import os
  3 | import warnings
  4 | import yaml
  5 | import sys
  6 | import pkg_resources
  7 | 
  8 | from minio import Minio
  9 | from urllib3.exceptions import MaxRetryError
 10 | 
 11 | from lab.project import cli as lab_project
 12 | from lab.experiment import cli as lab_experiment
 13 | 
 14 | working_directory = os.getcwd()
 15 | warnings.filterwarnings("ignore")
 16 | 
 17 | 
 18 | @click.group()
 19 | def cli():
 20 |     """
 21 | Bering's Machine Learning Lab
 22 | 
 23 | Copyright 2020 Bering Limited. https://beringresearch.com
 24 | """
 25 | 
 26 | 
 27 | # Project
 28 | cli.add_command(lab_project.lab_init)
 29 | cli.add_command(lab_project.lab_push)
 30 | cli.add_command(lab_project.lab_pull)
 31 | cli.add_command(lab_project.lab_ls)
 32 | cli.add_command(lab_project.lab_update)
 33 | cli.add_command(lab_project.lab_notebook)
 34 | 
 35 | # Experiment
 36 | cli.add_command(lab_experiment.lab_run)
 37 | cli.add_command(lab_experiment.lab_rm)
 38 | cli.add_command(lab_experiment.lab_show)
 39 | 
 40 | 
 41 | # Lab configuration
 42 | @click.group()
 43 | def config():
 44 |     """ Global Lab configuration """
 45 |     pass
 46 | 
 47 | 
 48 | @click.command('info')
 49 | def lab_info():
 50 |     """ Display system-wide information """
 51 |     import multiprocessing
 52 |     import platform
 53 | 
 54 |     lab_version = pkg_resources.require('lab-ml')[0].version
 55 | 
 56 |     system_version = str(sys.version_info[0]) + '.' + \
 57 |         str(sys.version_info[1]) + \
 58 |         '.' + str(sys.version_info[2])
 59 |     home_dir = os.path.expanduser('~')
 60 |     lab_dir = os.path.join(home_dir, '.lab')
 61 | 
 62 |     # Test connection
 63 |     if not os.path.exists(lab_dir):
 64 |         n_minio_hosts = 0
 65 |     else:
 66 |         with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file:
 67 |             minio_config = yaml.safe_load(file)
 68 |         n_minio_hosts = len(minio_config.keys())
 69 | 
 70 |     click.echo('😎  Lab version: '+str(lab_version))
 71 |     click.echo('Minio hosts: '+str(n_minio_hosts)+'\n')
 72 |     click.echo('Operating System: '+platform.system())
 73 |     click.echo('Python version: '+system_version)
 74 |     click.echo('CPUs: '+str(multiprocessing.cpu_count()))
 75 | 
 76 | 
 77 | @click.command('minio')
 78 | @click.option('--tag', type=str, help='helpful minio host tag', required=True)
 79 | @click.option('--endpoint', type=str, help='minio endpoint address',
 80 |               required=True)
 81 | @click.option('--accesskey', type=str, help='minio access key', required=True)
 82 | @click.option('--secretkey', type=str, help='minio secret key', required=True)
 83 | def minio_config(tag, endpoint, accesskey, secretkey):
 84 |     """ Setup remote minio host """
 85 |     home_dir = os.path.expanduser('~')
 86 |     lab_dir = os.path.join(home_dir, '.lab')
 87 | 
 88 |     # Test connection
 89 |     if not os.path.exists(lab_dir):
 90 |         os.makedirs(lab_dir)
 91 | 
 92 |     try:
 93 |         minioClient = Minio(endpoint,
 94 |                             access_key=accesskey,
 95 |                             secret_key=secretkey,
 96 |                             secure=False)
 97 |         minioClient.list_buckets()
 98 |     except MaxRetryError:
 99 |         click.secho('Cannot connect to minio instance. Check your credentials '
100 |                     'and hostname. Ensure that endpoint is not prefixed with'
101 |                     'http or https.', fg='red')
102 |         raise click.Abort()
103 | 
104 |     # Create configuration
105 |     config = {'minio_endpoint': endpoint,
106 |               'minio_accesskey': accesskey,
107 |               'minio_secretkey': secretkey}
108 | 
109 |     if os.path.exists(os.path.join(lab_dir, 'config.yaml')):
110 |         with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file:
111 |             minio_config = yaml.safe_load(file)
112 |             if tag in minio_config.keys():
113 |                 click.secho('Host tag '+tag+' already exists in your '
114 |                             'configuration. Try a different name.', fg='red')
115 |                 raise click.Abort()
116 | 
117 |             minio_config[tag] = config
118 |     else:
119 |         minio_config = {}
120 |         minio_config[tag] = config
121 | 
122 |     with open(os.path.join(lab_dir, 'config.yaml'), 'w') as file:
123 |         yaml.safe_dump(minio_config, file, default_flow_style=False)
124 | 
125 | 
126 | cli.add_command(config)
127 | cli.add_command(lab_info)
128 | config.add_command(minio_config)
129 | 
130 | if __name__ == '__main__':
131 |     cli()
132 | 


--------------------------------------------------------------------------------
/docs/source/cli.rst:
--------------------------------------------------------------------------------
  1 | .. _cli:
  2 | 
  3 | ======================
  4 | Command Line Interface
  5 | ======================
  6 | 
  7 | Lab is invoked through a simple Command Line Interface (CLI).
  8 | 
  9 | .. code::
 10 |     
 11 |     lab --help
 12 | 
 13 |     Usage: lab [OPTIONS] COMMAND [ARGS]...
 14 | 
 15 |     Bering's Machine Learning Lab
 16 | 
 17 |     Copyright 2020 Bering Limited. https://beringresearch.com
 18 | 
 19 |     Options:
 20 |     --help  Show this message and exit.
 21 | 
 22 |     Commands:
 23 |       config    Global Lab configuration
 24 |       info      Display system-wide information
 25 |       init      Initialise a new Lab Project
 26 |       ls        Compare multiple Lab Experiments
 27 |       notebook  Launch a jupyter notebook
 28 |       pull      Pulls Lab Experiment from minio to current...
 29 |       push      Push Lab Experiment to minio
 30 |       rm        Remove a Lab Experiment
 31 |       run       Run a training script
 32 |       show      Show a Lab Experiment
 33 |       update    Update Lab Environment from Project's... 
 34 | 
 35 | 
 36 | General Parameters
 37 | ------------------
 38 | 
 39 | ``config`` ``minio``
 40 | ^^^^^^^^^^^^^^^^^^^^
 41 | 
 42 | Setup remote minio host
 43 | 
 44 | .. code::
 45 | 
 46 |   Usage: lab config minio [OPTIONS]
 47 | 
 48 |   Setup remote minio host
 49 | 
 50 |   Options:
 51 |     --tag TEXT        helpful minio host tag  [required]
 52 |     --endpoint TEXT   minio endpoint address  [required]
 53 |     --accesskey TEXT  minio access key  [required]
 54 |     --secretkey TEXT  minio secret key  [required]
 55 |     --help            Show this message and exit.
 56 | 
 57 | `tag` option is a helpful name to identify a minio endpoint. It can be used to quickly access push and pull APIs.
 58 | 
 59 | ``info``
 60 | ^^^^^^^^
 61 | 
 62 | Display system-wide information, including Lab version, number of CPUs, etc.
 63 | 
 64 | .. code::
 65 | 
 66 |   Usage: lab info [OPTIONS]
 67 | 
 68 | Project
 69 | -------
 70 | 
 71 | ``init``
 72 | ^^^^^^^^
 73 | 
 74 | Initialise a new Lab Project.
 75 | 
 76 | .. code::
 77 | 
 78 |   Usage: lab init [OPTIONS] 
 79 | 
 80 |   Options:
 81 |     --name TEXT  environment name
 82 |     --help       Show this message and exit.
 83 | 
 84 | Command is run in the presence of a ``requirements.txt`` file that describes the Project environment. Lab will create a dedicate virtual environemnt in a ``.venv`` directory.
 85 | 
 86 | ``ls``
 87 | ^^^^^^
 88 | 
 89 | List Lab Experiments and their performance metrics.
 90 | 
 91 | .. code::
 92 | 
 93 |   Usage: lab ls [OPTIONS] [SORT_BY]
 94 | 
 95 |   Options:
 96 |     --help  Show this message and exit.
 97 | 
 98 | Optional ``SORT_BY`` option is a string column name in the results table. For example, if a Lab Experiment logged a metric AUC, calling ``lab ls AUC`` sort all Experiments by decreasing AUC values. The default is to show the most recently completed Lab run.
 99 | 
100 | ``show``
101 | ^^^^^^^^
102 | 
103 | Create a PNG file of experiment-data-script-hyperparameter-performance diagram.
104 | 
105 | .. code::
106 | 
107 |   Usage: lab show
108 | 
109 |   Options:
110 |     --help  Show this message and exit.
111 | 
112 | ``notebook``
113 | ^^^^^^^^^^^^
114 | Lancuhes a jupyter notebook, pointing to the ``notebooks`` directory. If this is the first time launching the notebook, Lab will automatically create a jupyter kernel using the ``requirements.txt`` file. Kernel name is stored on your system as TIMESTAMP_PROJECT_NAME.
115 | 
116 | ``update``
117 | ^^^^^^^^^^
118 | Updates the Lab project. Can be run if the local Lab version was updated or if ``requirements.txt`` has been modified with additional dependencies.
119 | 
120 | Experiment
121 | ----------
122 | 
123 | ``run``
124 | ^^^^^^^
125 | Execute a Lab Experiment.
126 | 
127 | .. code::
128 | 
129 |   Usage: lab run [OPTIONS] [SCRIPT]... 
130 | 
131 |   Options:
132 |     --help  Show this message and exit.
133 | 
134 | ``rm``
135 | ^^^^^^
136 | 
137 | Remove a Lab Experiment
138 | 
139 | .. code::
140 | 
141 |   Usage: lab rm [OPTIONS] EXPERIMENT_ID
142 |  
143 | EXPERIMENT_ID can be obtained by running ``lab ls`` inside the Project directory.
144 | 
145 | Model Management
146 | ----------------
147 | 
148 | ``push``
149 | ^^^^^^^^
150 | 
151 | Push Lab Project to a configured minio repository.
152 | 
153 | .. code::
154 |   
155 |   lab push --tag [MINIO_TAG] --bucket [TEXT] --force.
156 | 
157 | 
158 | ``pull``
159 | ^^^^^^^^
160 | 
161 | Pull a Lab Project from a configured minio repository.
162 | 
163 | .. code::
164 | 
165 |   lab pull --tag [MINIO_TAG] --bucket [TEXT] --project [TEXT] --force.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Documentation Status](https://readthedocs.org/projects/bering-ml-lab/badge/?version=latest)](https://bering-ml-lab.readthedocs.io/en/latest/?badge=latest)
  2 | 
  3 | # Machine Learning Lab
  4 | 
  5 | A lightweight command line interface for the management of arbitrary machine learning tasks.
  6 | 
  7 | Documentation is available at: <https://bering-ml-lab.readthedocs.io/en/latest/>
  8 | 
  9 | NOTE: Lab is in active development - expect a bumpy ride!
 10 | 
 11 | ![alt text](https://github.com/beringresearch/lab/blob/master/docs/source/_static/lab_screenshot.jpeg "Bering's Lab")
 12 | 
 13 | ## Installation
 14 | 
 15 | The latest stable version can be installed directly from PyPi:
 16 | 
 17 | ```bash
 18 | pip install lab-ml
 19 | ```
 20 | 
 21 | Development version can be installed from github.
 22 | 
 23 | ```bash
 24 | git clone https://github.com/beringresearch/lab
 25 | cd lab
 26 | pip install --editable .
 27 | ```
 28 | 
 29 | ## Concepts
 30 | 
 31 | Lab employs three concepts: __reproducible environment__, __logging__, and __model persistence__.
 32 | A typical machine learning workflow can be turned into a Lab Experiment by adding a single decorator.
 33 | 
 34 | ## Creating a new Lab Project
 35 | 
 36 | ```bash
 37 | lab init --name [NAME]
 38 | ```
 39 | 
 40 | Lab will look for a **requirements.txt** file in the working directory to generate a portable virtual environment for ML experiments.
 41 | 
 42 | ## Setting up a Lab Experiment
 43 | 
 44 | Here's a simple script that trains an SVM classifier on the iris data set:
 45 | 
 46 | ```python
 47 | from sklearn import svm, datasets
 48 | from sklearn.model_selection import train_test_split
 49 | from sklearn.metrics import accuracy_score, precision_score
 50 | 
 51 | C = 1.0
 52 | gamma = 0.7
 53 | iris = datasets.load_iris()
 54 | X = iris.data
 55 | y = iris.target
 56 | 
 57 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.24, random_state=42)
 58 | 
 59 | clf = svm.SVC(C, 'rbf', gamma=gamma, probability=True)
 60 | clf.fit(X_train, y_train)
 61 | 
 62 | y_pred = clf.predict(X_test)
 63 | accuracy = accuracy_score(y_test, y_pred)
 64 | precision = precision_score(y_test, y_pred, average = 'macro')
 65 | ```
 66 | 
 67 | It's trivial to create a Lab Experiment using a simple decorator:
 68 | 
 69 | ```python
 70 | from sklearn import svm, datasets
 71 | from sklearn.model_selection import train_test_split
 72 | from sklearn.metrics import accuracy_score, precision_score
 73 | 
 74 | from lab.experiment import Experiment ## New Line
 75 | 
 76 | e = Experiment() ## New Line
 77 | 
 78 | @e.start_run ## New Line
 79 | def train():
 80 |     C = 1.0
 81 |     gamma = 0.7
 82 |     iris = datasets.load_iris()
 83 |     X = iris.data
 84 |     y = iris.target
 85 | 
 86 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.24, random_state=42)
 87 | 
 88 |     clf = svm.SVC(C, 'rbf', gamma=gamma, probability=True)
 89 |     clf.fit(X_train, y_train)
 90 | 
 91 |     y_pred = clf.predict(X_test)
 92 |     accuracy = accuracy_score(y_test, y_pred)
 93 |     precision = precision_score(y_test, y_pred, average = 'macro')
 94 | 
 95 |     e.log_metric('accuracy_score', accuracy) ## New Line
 96 |     e.log_metric('precision_score', precision) ## New Line
 97 | 
 98 |     e.log_parameter('C', C) ## New Line
 99 |     e.log_parameter('gamma', gamma) ## New Line
100 | 
101 |     e.log_model('svm', clf) ## New Line
102 | ```
103 | 
104 | ## Running an Experiment
105 | 
106 | Lab Experiments can be run as:
107 | 
108 | ```bash
109 | lab run <PATH/TO/TRAIN.py>
110 | ```
111 | 
112 | ## Comparing models
113 | 
114 | Lab assumes that all Experiments associated with a Project log consistent performance metrics. We can quickly assess performance of each experiment by running:
115 | 
116 | ```bash
117 | lab ls
118 | 
119 | Experiment    Source              Date        accuracy_score    precision_score
120 | ------------  ------------------  ----------  ----------------  -----------------
121 | 49ffb76e      train_mnist_mlp.py  2019-01-15  0.97: ██████████  0.97: ██████████
122 | 261a34e4      train_mnist_cnn.py  2019-01-15  0.98: ██████████  0.98: ██████████
123 | ```
124 | 
125 | ## Pushing models to a centralised repository
126 | 
127 | Lab experiments can be pushed to a centralised filesystem through integration with [minio](https://minio.io). Lab assumes that you have setup minio on a private cloud.
128 | 
129 | Lab can be configured once to interface with a remote minio instance:
130 | 
131 | ```bash
132 | lab config minio --tag my-minio --endpoint [URL:PORT] --accesskey [STRING] --secretkey [STRING]
133 | ```
134 | 
135 | To push a local lab experiment to minio:
136 | 
137 | ```bash
138 | lab push --tag my-minio --bucket [BUCKETNAME] .
139 | ```
140 | 
141 | Copyright 2020, Bering Limited
142 | 


--------------------------------------------------------------------------------
/docs/source/auto_examples/keras_mnist_mlp.rst:
--------------------------------------------------------------------------------
  1 | .. note::
  2 |     :class: sphx-glr-download-link-note
  3 | 
  4 |     Click :ref:`here <sphx_glr_download_auto_examples_keras_mnist_mlp.py>` to download the full example code
  5 | .. rst-class:: sphx-glr-example-title
  6 | 
  7 | .. _sphx_glr_auto_examples_keras_mnist_mlp.py:
  8 | 
  9 | 
 10 | Running Keras models with Tensorboard
 11 | =====================================
 12 | 
 13 | Lab integrates into a typical keras workflow.
 14 | 
 15 | WARNING: model persistence in Keras can be complicated, especially when
 16 | working with complext models. It is recommended to checkpoint each training
 17 | epoch independently from Lab's ``log_model`` API.
 18 | 
 19 | Bering by creating a new Lab Project:
 20 | 
 21 |     >>> echo "keras" > requirements.txt
 22 |     >>> lab init --name simple-keras
 23 | 
 24 | 
 25 | .. code-block:: default
 26 | 
 27 | 
 28 |     import keras
 29 |     from keras.datasets import mnist
 30 |     from keras.models import Sequential
 31 |     from keras.layers import Dense, Dropout
 32 |     from keras.optimizers import RMSprop
 33 |     from keras.callbacks import TensorBoard
 34 | 
 35 |     import tempfile
 36 | 
 37 |     from sklearn.metrics import accuracy_score, precision_score
 38 | 
 39 |     from lab.experiment import Experiment
 40 | 
 41 |     batch_size = 128
 42 |     num_classes = 10
 43 |     epochs = 20
 44 | 
 45 |     # the data, split between train and test sets
 46 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
 47 | 
 48 |     x_train = x_train.reshape(60000, 784)
 49 |     x_test = x_test.reshape(10000, 784)
 50 |     x_train = x_train.astype('float32')
 51 |     x_test = x_test.astype('float32')
 52 |     x_train /= 255
 53 |     x_test /= 255
 54 |     print(x_train.shape[0], 'train samples')
 55 |     print(x_test.shape[0], 'test samples')
 56 | 
 57 |     # convert class vectors to binary class matrices
 58 |     y_train = keras.utils.to_categorical(y_train, num_classes)
 59 |     y_test = keras.utils.to_categorical(y_test, num_classes)
 60 | 
 61 |     model = Sequential()
 62 |     model.add(Dense(512, activation='relu', input_shape=(784,)))
 63 |     model.add(Dropout(0.2))
 64 |     model.add(Dense(512, activation='relu'))
 65 |     model.add(Dropout(0.2))
 66 |     model.add(Dense(num_classes, activation='softmax'))
 67 | 
 68 |     model.compile(loss='categorical_crossentropy',
 69 |                   optimizer=RMSprop(),
 70 |                   metrics=['accuracy'])
 71 | 
 72 |     e = Experiment()
 73 | 
 74 | 
 75 |     @e.start_run
 76 |     def train():
 77 | 
 78 |         # Create a temporary directory for tensorboard logs
 79 |         output_dir = tempfile.mkdtemp()
 80 |         print("Writing TensorBoard events locally to %s\n" % output_dir)
 81 |         tensorboard = TensorBoard(log_dir=output_dir)
 82 | 
 83 |         # During Experiment execution, tensorboard can be viewed through:
 84 |         # tensorboard --logdir=[output_dir]
 85 | 
 86 |         model.fit(x_train, y_train,
 87 |                   batch_size=batch_size,
 88 |                   epochs=epochs,
 89 |                   verbose=1,
 90 |                   validation_data=(x_test, y_test),
 91 |                   callbacks=[tensorboard])
 92 | 
 93 |         y_prob = model.predict(x_test)
 94 |         y_classes = y_prob.argmax(axis=-1)
 95 |         actual = y_test.argmax(axis=-1)
 96 | 
 97 |         accuracy = accuracy_score(y_true=actual, y_pred=y_classes)
 98 |         precision = precision_score(y_true=actual, y_pred=y_classes,
 99 |                                     average='macro')
100 | 
101 |         # Log tensorboard
102 |         e.log_artifacts('tensorboard', output_dir)
103 | 
104 |         # Log all metrics
105 |         e.log_metric('accuracy_score', accuracy)
106 |         e.log_metric('precision_score', precision)
107 | 
108 |         # Log parameters
109 |         e.log_parameter('batch_size', batch_size)
110 | 
111 |         # Save model
112 |         e.log_model('mnist-mlp', model)
113 | 
114 | 
115 | .. rst-class:: sphx-glr-timing
116 | 
117 |    **Total running time of the script:** ( 0 minutes  0.000 seconds)
118 | 
119 | 
120 | .. _sphx_glr_download_auto_examples_keras_mnist_mlp.py:
121 | 
122 | 
123 | .. only :: html
124 | 
125 |  .. container:: sphx-glr-footer
126 |     :class: sphx-glr-footer-example
127 | 
128 | 
129 | 
130 |   .. container:: sphx-glr-download
131 | 
132 |      :download:`Download Python source code: keras_mnist_mlp.py <keras_mnist_mlp.py>`
133 | 
134 | 
135 | 
136 |   .. container:: sphx-glr-download
137 | 
138 |      :download:`Download Jupyter notebook: keras_mnist_mlp.ipynb <keras_mnist_mlp.ipynb>`
139 | 
140 | 
141 | .. only:: html
142 | 
143 |  .. rst-class:: sphx-glr-signature
144 | 
145 |     `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
146 | 


--------------------------------------------------------------------------------
/docs/source/logging.rst:
--------------------------------------------------------------------------------
  1 | .. _logging:
  2 | 
  3 | Tracking Machine Learning Experiments 
  4 | =====================================
  5 | 
  6 | The Lab logging component was designed to interface directly with your training code without disrupting the machine learning workflow.
  7 | Currently, users can keep track of the following experiment artfacts:
  8 | 
  9 | - ``e.log_features``: Feature names
 10 | - ``e.log_parameter``: Hyperparameters
 11 | - ``e.log_metric``: Performance metrics
 12 | - ``e.log_artifact``: Experimental artifacts
 13 | - ``e.log_model``: Model persistence
 14 | 
 15 | Feature names
 16 | -------------
 17 | Data features are simply lists of feature names or column indices. Consider the snippet:
 18 | 
 19 | .. code-block:: python
 20 | 
 21 |     from sklearn import datasets
 22 |     
 23 |     iris = datasets.load_iris()
 24 |     feature_names = iris['feature_names']
 25 | 
 26 |     print(feature_names)
 27 | 
 28 |     ['sepal length (cm)',
 29 |      'sepal width (cm)',
 30 |      'petal length (cm)',
 31 |      'petal width (cm)']
 32 | 
 33 | We can log these features by adding a few lines of code:
 34 | 
 35 | .. code-block:: python
 36 | 
 37 |     from sklearn import datasets
 38 |     from lab.experiment import Experiment #import lab Experiment
 39 |     
 40 |     e = Experiment()
 41 | 
 42 |     # Initialize Lab Experiment
 43 |     @e.start_run
 44 |     def train():
 45 |         iris = datasets.load_iris()
 46 |         feature_names = iris['feature_names']
 47 |         
 48 |         # Log features
 49 |         e.log_features(feature_names)
 50 | 
 51 | Hyperparameters: ``e.log_parameter``
 52 | -------------------------------------
 53 | 
 54 | Let's carry on with the Iris dataset and consider a Random Forest Classifier with an exhaustive grid search along the number of trees and maximum depth of a tree:
 55 | 
 56 | .. code-block:: python
 57 | 
 58 |     from sklearn import datasets
 59 |     from sklearn.ensemble import RandomForestClassifier
 60 |     from sklearn.model_selection import GridSearchCV
 61 |     from lab.experiment import Experiment #import lab Experiment
 62 |     
 63 |     e = Experiment()
 64 | 
 65 |     # Initialize Lab Experiment
 66 |     @e.start_run
 67 |     def train():
 68 |         iris = datasets.load_iris()
 69 | 
 70 |         feature_names = iris['feature_names']
 71 | 
 72 |         # Log features
 73 |         e.log_features(feature_names)
 74 |         
 75 |         parameters = {'n_estimators': [10, 50, 100],
 76 |                       'max_depth': [2, 4]}
 77 |         
 78 |         rfc = RandomForestClassifier()
 79 | 
 80 |         # Run a grid search
 81 |         clf = GridSearchCV(rfc, parameters)
 82 |         clf.fit(iris.data, iris.target)
 83 | 
 84 |         best_parameters = clf.best_estimator_.get_params()
 85 | 
 86 |         # Log parameters
 87 |         e.log_parameter('n_estimators', best_parameters['n_estimators'])
 88 |         e.log_parameter('max_depth', best_parameters['max_depth'])
 89 | 
 90 | Performance Metrics: ``e.log_metric``
 91 | -------------------------------------
 92 | 
 93 | Lab was designed to easily compare multiple machine lerning experiments through consistent performance metrics.
 94 | Let's expand our example and assess model accuracy and precision.
 95 | 
 96 | .. code-block:: python
 97 | 
 98 |     from sklearn import datasets
 99 |     from sklearn.ensemble import RandomForestClassifier
100 |     from sklearn.model_selection import GridSearchCV
101 |     from sklearn.model_selection import train_test_split
102 |     from sklearn.metrics import accuracy_score, precision_score
103 |     from lab.experiment import Experiment 
104 |     
105 |     e = Experiment()
106 | 
107 |     # Initialize Lab Experiment
108 |     @e.start_run
109 |     def train():
110 |         iris = datasets.load_iris()
111 | 
112 |         feature_names = iris['feature_names']
113 | 
114 |         # Log features
115 |         e.log_features(feature_names) 
116 |         
117 |         parameters = {'n_estimators': [10, 50, 100],
118 |                       'max_depth': [2, 4]}
119 |         
120 |         # Run a grid search 
121 |         rfc = RandomForestClassifier()
122 |         clf = GridSearchCV(rfc, parameters)
123 |         clf.fit(iris.data, iris.target)
124 | 
125 |         best_parameters = clf.best_estimator_.get_params()
126 | 
127 |         # Log parameters
128 |         e.log_parameter('n_estimators', best_parameters['n_estimators'])
129 |         e.log_parameter('max_depth', best_parameters['max_depth'])
130 | 
131 |         X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
132 |                                                 test_size=0.25, random_state=42)
133 | 
134 |         rfc = RandomForestClassifer(n_estimators = best_parameters['n_estimators'],
135 |                                     max_depth = best_parameters['max_depth'])
136 |         rfc.fit(X_train, y_train)
137 | 
138 |         # Generate predictions
139 |         y_pred = rfc.predict(X_test)
140 |         accuracy = accuracy_score(y_test, y_pred)                           
141 |         precision = precision_score(y_test, y_pred, average = 'macro')      
142 | 
143 |         # Log performance metrics
144 |         e.log_metric('accuracy_score', accuracy)
145 |         e.log_metric('precision_score', precision) 
146 | 
147 | Experiment Artifacts: ``e.log_artifact``
148 | ----------------------------------------
149 | 
150 | In certain cases, it may be desirable for a Lab Experiment to write certain artifacts to a temporary folder - e.g.
151 | ROC curves or Tensorboard log directory. Lab naturally bundles these artifacts within each respective experiment for subsequent exploration.
152 | 
153 | Let's explore an example where Lab logs Tensorboard outputs:
154 | 
155 | .. code-block:: python
156 | 
157 |     # Additional imports would go here
158 |     from keras.callbacks import TensorBoard
159 |     import tempfile
160 |     
161 |     from lab.experiment import Experiment
162 | 
163 |     e = Experiment()
164 |    
165 |     @e.start_run
166 |     def train():
167 |     
168 |       # ... Further training code goes here
169 | 
170 |       # Create a temporary directory for tensorboard logs
171 |       output_dir = dirpath = tempfile.mkdtemp()
172 |       print("Writing TensorBoard events locally to %s\n" % output_dir)
173 |     
174 |       tensorboard = TensorBoard(log_dir=output_dir)
175 | 
176 |       model.fit(x_train, y_train,
177 |                 batch_size=batch_size,
178 |                 epochs=epochs,
179 |                 verbose=1,
180 |                 validation_data=(x_test, y_test),
181 |                 callbacks=[tensorboard])
182 | 
183 |       # Log tensorboard artifact
184 |       e.log_artifact('tensorboard', output_dir)
185 | 
186 | 
187 | In this example, Tensorboard logs are written to a temporary folder, which can be tracked in real-time. Once the run is complete,
188 | Lab moves all the directory content into a subdirectory of the current Lab Experiment.
189 | 
190 | 
191 | Model Persistence: ``e.log_model``
192 | ----------------------------------
193 | 
194 | Finally, it's useful to store model objects themselves for future use. Consider our fitted GridSearchCV object ``clf`` from an earlier example.
195 | It can now be logged using a simple expression:
196 | 
197 | .. code-block:: python
198 | 
199 |     e.log_model('GridSearchCV', clf)
200 | 


--------------------------------------------------------------------------------
/lab/experiment/experiment.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import uuid
  3 | import os
  4 | import sys
  5 | import yaml
  6 | import numpy
  7 | import warnings
  8 | import joblib
  9 | import graphviz
 10 | import json
 11 | from distutils.dir_util import copy_tree
 12 | 
 13 | warnings.filterwarnings(action='ignore', category=DeprecationWarning)
 14 | 
 15 | _DEFAULT_USER_ID = 'unknown'
 16 | 
 17 | 
 18 | class Experiment():
 19 |     def __init__(self, dataset='<data>'):
 20 |         """ Fundamental Lab class for managing a machine learning experiment.
 21 | 
 22 |         :param str dataset: description or tag of a dataset used for training.
 23 |         """
 24 | 
 25 |         self.dataset = dataset
 26 | 
 27 |     def create_run(self, run_uuid=None, user_id=None, home_dir=None,
 28 |                    timestamp=None, metrics=None, parameters=None,
 29 |                    source=None, feature_names=None, models=dict(),
 30 |                    artifacts=dict()):
 31 |         """ Initialise a Lab experiment run
 32 |         """
 33 | 
 34 |         self.uuid = str(uuid.uuid4())[:8]
 35 |         self.user_id = _get_user_id()
 36 |         self.timestamp = timestamp
 37 |         self.metrics = metrics
 38 |         self.parameters = parameters
 39 |         self.feature_names = feature_names
 40 |         self.source = ' '.join(sys.argv)
 41 |         self.home_dir = os.path.dirname(
 42 |                             os.path.dirname(
 43 |                                 os.path.dirname(sys.argv[0])))
 44 |         self.models = models
 45 |         self.artifacts = artifacts
 46 | 
 47 |     def start_run(self, fun):
 48 |         """ Start run and log experiment data as it becomes available.
 49 |         """
 50 |         self.create_run(user_id=_get_user_id(),
 51 |                         timestamp=datetime.datetime.now())
 52 |         run_uuid = self.uuid
 53 | 
 54 |         models_directory = os.path.join(self.home_dir, 'experiments', run_uuid)
 55 |         logs_directory = os.path.join(self.home_dir, 'logs', run_uuid)
 56 | 
 57 |         fun()
 58 | 
 59 |         os.makedirs(logs_directory)
 60 |         os.makedirs(models_directory)
 61 | 
 62 |         # Log run metadata
 63 |         meta_file = os.path.join(logs_directory, 'meta.yaml')
 64 |         with open(meta_file, 'w') as file:
 65 |             meta = {'artifact_uri': os.path.dirname(
 66 |                                         os.path.abspath(models_directory)),
 67 |                     'source': self.source,
 68 |                     'start_time': self.timestamp,
 69 |                     'end_time': datetime.datetime.now(),
 70 |                     'experiment_uuid': self.uuid,
 71 |                     'dataset': self.dataset,
 72 |                     'user_id': self.user_id}
 73 |             yaml.dump(meta, file, default_flow_style=False)
 74 | 
 75 |         # Log metrics
 76 |         metrics_file = os.path.join(models_directory, 'metrics.yaml')
 77 |         with open(metrics_file, 'w') as file:
 78 |             yaml.dump(self.metrics, file, default_flow_style=False)
 79 | 
 80 |         # Log parameters
 81 |         parameters_file = os.path.join(models_directory, 'parameters.yaml')
 82 |         with open(parameters_file, 'w') as file:
 83 |             yaml.dump(self.parameters, file, default_flow_style=False)
 84 | 
 85 |         # Log features
 86 |         feature_file = os.path.join(models_directory, 'features.yaml')
 87 |         with open(feature_file, 'w') as file:
 88 |             yaml.dump(self.feature_names, file, default_flow_style=False)
 89 | 
 90 |         # Log models
 91 |         for filename in self.models.keys():
 92 |             model_file = os.path.join(models_directory, filename+'.joblib')
 93 |             joblib.dump(self.models[filename], model_file)
 94 | 
 95 |         # Log artifacts
 96 |         for artifact in self.artifacts.keys():
 97 |             destination = os.path.join(models_directory, artifact)
 98 |             copy_tree(self.artifacts[artifact], destination)
 99 | 
100 |     def log_artifact(self, key, value):
101 |         """ Log model artifacts
102 |         """
103 |         self.artifacts[key] = value
104 | 
105 |     def log_features(self, feature_names):
106 |         """ Log feature names
107 |         """
108 |         self.feature_names = list(feature_names)
109 | 
110 |     def log_metric(self, key, value):
111 |         """ Log performance metrics
112 |         """
113 |         value = numpy.array(value)
114 |         logged_metric = {}
115 |         logged_metric[key] = value.tolist()
116 | 
117 |         if self.metrics is None:
118 |             self.metrics = logged_metric
119 |         else:
120 |             self.metrics[key] = value.tolist()
121 | 
122 |     def log_parameter(self, key, value):
123 |         """ Log model hyperparameters
124 |         """
125 |         value = numpy.array(value)
126 |         logged_parameter = {}
127 |         logged_parameter[key] = value.tolist()
128 | 
129 |         if self.parameters is None:
130 |             self.parameters = logged_parameter
131 |         else:
132 |             self.parameters[key] = value.tolist()
133 | 
134 |     def log_model(self, key, value):
135 |         """Serialize the model
136 |         """
137 |         self.models[key] = value
138 | 
139 |     def view(self):
140 |         """ View lab project as a graphviz graph.
141 |         """
142 |         return show_experiment(self.uuid)
143 | 
144 | 
145 | def show_experiment(experiment_id):
146 |     try:
147 |         logs = yaml.load(open(os.path.join('logs', experiment_id,
148 |                                            'meta.yaml'), 'r'))
149 |         if logs['dataset'] is None:
150 |             logs['dataset'] = 'N/A'
151 |     except FileNotFoundError:
152 |         print('Not a valid lab experiment')
153 | 
154 |     col = _get_graphviz_colour()
155 | 
156 |     try:
157 |         metrics = yaml.load(open(os.path.join('experiments', experiment_id,
158 |                                               'metrics.yaml'), 'r'))
159 |     except FileNotFoundError:
160 |         metrics = {'Metrics': 'None'}
161 | 
162 |     try:
163 |         parameters = yaml.load(open(os.path.join('experiments', experiment_id,
164 |                                                  'parameters.yaml'), 'r'))
165 |     except FileNotFoundError:
166 |         parameters = {'Parameter': 0.0}
167 | 
168 |     # Set defaults for empty values
169 |     if parameters is None:
170 |         parameters = {'Parameter': 0.0}
171 | 
172 |     # Extract only the source file name without arguments
173 |     source = logs['source'].split(' ')[0]
174 | 
175 |     dot = graphviz.Digraph(format='png',
176 |                            name=logs['experiment_uuid'],
177 |                            node_attr={'shape': 'record'})
178 | 
179 |     dot.attr('node', color=col)
180 |     dot.attr('edge', color=col)
181 | 
182 |     dataset_id = logs['dataset']
183 |     #source_id = experiment_id+'_'+logs['source']
184 |     source_id= source
185 |     parameters_id = 'struct_'+experiment_id+'_parameters'
186 |     metrics_id = experiment_id+'_performance'
187 | 
188 |     dot.node(experiment_id, logs['experiment_uuid'], shape='Mdiamond')
189 |     dot.node(dataset_id, logs['dataset'], shape='Msquare')
190 |     dot.node(source_id, source, shape='rectangle')
191 | 
192 |     dot.edge(experiment_id, dataset_id)
193 |     dot.edge(dataset_id, source_id)
194 | 
195 |     with dot.subgraph(name='cluster_hyperparameters_'+experiment_id) as c:
196 |         c.attr(label='Hyperparameters')
197 |         c.attr('node', shape='Mrecord')
198 |         c.attr(color='transparent')
199 |         text = '{'+json.dumps(parameters).replace(',', '|')+'}'
200 |         text = text.replace('"', '')
201 |         c.node(parameters_id, text)
202 | 
203 |     with dot.subgraph(name='cluster_performance_'+experiment_id) as c:
204 |         c.attr(label='Metrics')
205 |         c.attr('node', shape='Mrecord')
206 |         c.attr(color='transparent')
207 |         text = '{'+json.dumps(metrics).replace(',', '|')+'}'
208 |         text = text.replace('"', '')
209 |         c.node(metrics_id, text)
210 | 
211 |     dot.edge(source_id, parameters_id)
212 |     dot.edge(parameters_id, metrics_id)
213 | 
214 |     return dot
215 | 
216 | 
217 | def _get_user_id():
218 |     """Get the ID of the user for the current run."""
219 |     try:
220 |         import pwd
221 |         import os
222 |         return pwd.getpwuid(os.getuid())[0]
223 |     except ImportError:
224 |         return _DEFAULT_USER_ID
225 | 
226 | 
227 | def _get_graphviz_colour():
228 |     colour_list = ['antiquewhite4', 'aquamarine4', 'azure4', 'bisque4',
229 |                    'black', 'blue', 'blueviolet', 'brown', 'burlywood',
230 |                    'cadetblue', 'chartreuse3', 'chartreuse4', 'chocolate4',
231 |                    'coral', 'coral3', 'cornflowerblue', 'cornsilk4',
232 |                    'crimson', 'cyan', 'darkgreen', 'darkorange1', 'deeppink1',
233 |                    'deepskyblue1', 'dodgerblue', 'firebrick', 'forestgreen',
234 |                    'goldenrod', 'goldenrod4', 'hotpink', 'indigo',
235 |                    'khaki4', 'lightcoral', 'lightslateblue', 'lightsteelblue4',
236 |                    'maroon', 'midnightblue', 'orangered4', 'palevioletred',
237 |                    'sienna3', 'tomato', 'violetred1']
238 |     choice = numpy.random.choice(list(range(len(colour_list))),
239 |                                  replace=False)
240 |     return colour_list[choice]
241 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | Copyright 2020 Bering Limited.  All rights reserved.
  2 | 
  3 | 				Apache License
  4 |                            Version 2.0, January 2004
  5 |                         http://www.apache.org/licenses/
  6 | 
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 
178 |    END OF TERMS AND CONDITIONS
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.


--------------------------------------------------------------------------------
/lab/project/cli.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | import uuid
  3 | import glob
  4 | import os
  5 | import sys
  6 | import datetime
  7 | import yaml
  8 | import subprocess
  9 | import shutil
 10 | from minio import Minio
 11 | from minio.error import S3Error
 12 | 
 13 | import tabulate
 14 | import pandas as pd
 15 | import numpy as np
 16 | 
 17 | from lab import is_empty_project, is_lab_project, create_venv,\
 18 |     check_minio_config
 19 | 
 20 | 
 21 | @click.command('ls')
 22 | @click.argument('sort_by', required=False)
 23 | def lab_ls(sort_by=None):
 24 |     """ Compare multiple Lab Experiments """
 25 |     models_directory = 'experiments'
 26 |     logs_directory = 'logs'
 27 |     TICK = '█'
 28 | 
 29 |     is_lab_project()
 30 |     is_empty_project()
 31 | 
 32 |     experiments = next(os.walk(models_directory))[1]
 33 |     comparisons = []
 34 | 
 35 |     # Get unique metric names
 36 |     metrics_names = []
 37 |     for e in experiments:
 38 |         metrics_file = os.path.join(models_directory, e, 'metrics.yaml')
 39 |         with open(metrics_file, 'r') as file:
 40 |             metrics = yaml.load(file)
 41 |             metrics_names.append(list(metrics.keys()))
 42 | 
 43 |     metrics_names = list(set(metrics_names[0]).intersection(*metrics_names))
 44 | 
 45 | 
 46 |     # Get all experiments
 47 |     for e in experiments:
 48 |         metrics_file = os.path.join(models_directory, e, 'metrics.yaml')
 49 |         try:
 50 |             with open(metrics_file, 'r') as file:
 51 |                 metrics = yaml.load(file)
 52 |             for k, v in metrics.items():
 53 |                 metrics[k] = round(v, 2)
 54 | 
 55 |             metrics = {k: metrics[k] for k in metrics_names}
 56 |             metrics_list = list(metrics.values())       
 57 | 
 58 |             meta_file = os.path.join(logs_directory, e, 'meta.yaml')
 59 |             with open(meta_file, 'r') as file:
 60 |                 meta = yaml.load(file)
 61 | 
 62 |             # Truncate source name if too long
 63 |             source_name = meta['source']
 64 |             meta['source'] = (source_name[:20] +
 65 |                               '..') if len(source_name) > 20 else source_name
 66 | 
 67 |             record = [meta['experiment_uuid'], meta['source'],
 68 |                       meta['start_time'].strftime("%m/%d/%Y, %H:%M:%S")] + \
 69 |                 metrics_list
 70 |             comparisons.append(record)
 71 |         except FileNotFoundError:
 72 |             pass
 73 | 
 74 |     # Create visualisation of numeric metrics
 75 |     A = pd.DataFrame(comparisons)
 76 |     meta_data = A[[0, 1, 2]]
 77 |     metrics_data = A.drop([0, 1, 2], axis=1)
 78 | 
 79 |     row_max = metrics_data.abs().max(axis=0)
 80 |     scaled_metrics_data = metrics_data.abs().divide(row_max, axis=1)
 81 |     scaled_metrics_data = scaled_metrics_data.fillna(value=0)
 82 | 
 83 |     sparklines = np.empty(shape=metrics_data.shape, dtype=object)
 84 |     for row in range(metrics_data.shape[0]):
 85 |         for column in range(metrics_data.shape[1]):
 86 |             value = metrics_data.iloc[row, column]
 87 |             scaled_value = scaled_metrics_data.iloc[row, column]
 88 |             scaled_value = scaled_value
 89 |             spark = (format(value, '.2f') + ': ' +
 90 |                      TICK * int(round(scaled_value*10)))
 91 |             sparklines[row, column] = spark
 92 | 
 93 |     result = pd.concat([meta_data, pd.DataFrame(sparklines)], axis=1)
 94 |     result.columns = (['Experiment', 'Source', 'Date'] +
 95 |                       list(metrics.keys()))
 96 | 
 97 |     result.sort_values(by=['Date'], axis=0, ascending=False,
 98 |                        inplace=True)
 99 | 
100 |     if sort_by is not None:
101 |         result.sort_values(by=[sort_by], axis=0,
102 |                            ascending=False, inplace=True)
103 | 
104 |     header = ['Experiment', 'Source', 'Date'] + list(metrics.keys())
105 |     click.echo('')
106 |     click.echo(tabulate.tabulate(result.values, headers=header))
107 | 
108 |     # Check the last time lab project was synced with minio
109 |     with open(os.path.join('config', 'runtime.yaml'), 'r') as file:
110 |             minio_config = yaml.load(file)
111 |             push_time = datetime.datetime.fromtimestamp(0)
112 |             try:
113 |                 push_time = \
114 |                     datetime.datetime.strptime(
115 |                         minio_config['last_push'],
116 |                         '%Y-%m-%d %H:%M:%S.%f')
117 | 
118 |                 now_time = datetime.datetime.now()
119 |                 td = now_time-push_time
120 |                 (days, hours) = (td.days, td.seconds//3600)
121 |             except Exception:
122 |                 (days, hours) = (0, 0)
123 | 
124 |     click.secho('\nLast push: '+str(days)+'d, ' + str(hours)+'h ago',
125 |                 fg='yellow')
126 | 
127 |     # Find the latest file and print its timestamp
128 |     list_of_files = glob.glob(os.path.join(os.getcwd(), '*'))
129 |     latest_file = max(list_of_files, key=os.path.getctime)
130 |     latest_file_timestamp = \
131 |         datetime.datetime.fromtimestamp(os.path.getmtime(latest_file))
132 | 
133 |     recommend = '| Project is in sync with remote'
134 |     if latest_file_timestamp > push_time:
135 |         recommend = ' | Recommend to run <lab push>'
136 |     click.secho('Last modified: '+str(latest_file_timestamp)+recommend,
137 |                 fg='yellow')
138 | 
139 | 
140 | @click.command(name='notebook')
141 | def lab_notebook():
142 |     """ Publish Lab project as a jupyter kernel """
143 |     is_lab_project()
144 | 
145 |     with open(os.path.join(os.getcwd(),
146 |               'config', 'runtime.yaml'), 'r') as file:
147 |         config = yaml.load(file)
148 |     project_name = config['name'] + '_' +\
149 |         ''.join(e for e in config['timestamp'] if e.isalnum())
150 | 
151 |     click.secho('Generating jupyter kernel for ' + config['name'] + '...',
152 |                 fg='cyan')
153 | 
154 |     try:
155 |         _install_jupyter_kernel(project_name)
156 |         click.secho('Kernel generated: ' + project_name)
157 |     except Exception as e:
158 |         print(e)
159 |         click.secho('Failed to generate kernel.', fg='red')
160 | 
161 | 
162 | def _install_jupyter_kernel(project_name):
163 | 
164 |     venv_dir = os.path.join(os.getcwd(), '.venv')
165 |     subprocess.call([venv_dir + '/bin/pip', 'install', 'ipykernel'])
166 |     subprocess.call([venv_dir + '/bin/ipython', 'kernel', 'install',
167 |                      '--user', '--name='+project_name])
168 | 
169 | 
170 | @click.command(name='init')
171 | @click.option('--name', type=str, default=str(uuid.uuid4()),
172 |               help='environment name')
173 | def lab_init(name):
174 |     """ Initialise a new Lab Project """
175 |     if not os.path.isfile('requirements.txt'):
176 |         click.secho('requirements.txt is not found in the '
177 |                     'current working directory.', fg='red')
178 |         raise click.Abort()
179 | 
180 |     if os.path.isdir(name):
181 |         click.secho('Project '+name+' already exists.', fg='red')
182 |         raise click.Abort()
183 |     else:
184 |         try:
185 |             _project_init(name)
186 |         except Exception as e:
187 |             print(e)
188 |             click.secho('Errors encountered during project initialisation.'
189 |                         'Rolling back..', fg='red')
190 |             raise click.Abort()
191 | 
192 | 
193 | @click.command(name='update')
194 | def lab_update():
195 |     """ Update Lab Environment from Project's requirements.txt """
196 |     if not os.path.isfile('requirements.txt'):
197 |         click.secho('requirements.txt file is missing.', fg='red')
198 |         raise click.Abort()
199 | 
200 |     # Update project directory if it hasn't been updated
201 |     try:
202 |         with open(os.path.join(os.getcwd(),
203 |                                'config', 'runtime.yaml'), 'r') as file:
204 |             config = yaml.load(file)
205 |             home_dir = config['path']
206 | 
207 |             if home_dir != os.getcwd():
208 |                 config['path'] = os.getcwd()
209 |                 with open(os.path.join(os.getcwd(),
210 |                                        'config', 'runtime.yaml'), 'w') as file:
211 |                     yaml.dump(config, file, default_flow_style=False)
212 |     except FileNotFoundError:
213 |         click.secho('Having trouble parsing configuration file for this '
214 |                     "project. It's likely that this is either not a "
215 |                     'Lab Project or the Project was created with an older '
216 |                     'version of Lab.\n',
217 |                     fg='red')
218 |         raise click.Abort()
219 | 
220 |     if not os.path.isdir('.venv'):
221 |         click.secho("Couldn't find .venv. Creating one for you...",
222 |                     fg='blue')
223 |         create_venv('')
224 | 
225 |     home_dir = os.getcwd()
226 |     venv_dir = os.path.join(home_dir, '.venv')
227 | 
228 |     click.secho('Updating lab', fg='cyan')
229 |     subprocess.call([venv_dir + '/bin/pip',
230 |                      'install', '--upgrade', 'lab-ml'])
231 | 
232 |     click.secho('Updating environment using requirements.txt', fg='cyan')
233 |     subprocess.call([venv_dir + '/bin/pip', 'install', '--upgrade',
234 |                     '-r', 'requirements.txt'])
235 | 
236 | 
237 | @click.command('pull')
238 | @click.option('--tag', type=str,
239 |               help='minio host nickname', required=False, default=None)
240 | @click.option('--bucket', type=str, required=False, default=None,
241 |               help='minio bucket name')
242 | @click.option('--project', type=str, required=False, default=None,
243 |               help='Lab Project name')
244 | @click.option('--force', is_flag=True)
245 | def lab_pull(tag, bucket, project, force):
246 |     """ Pulls Lab Experiment from minio to current directory """
247 |     home_dir = os.path.expanduser('~')
248 | 
249 |     lab_dir = os.path.join(home_dir, '.lab')
250 | 
251 |     if not os.path.exists(lab_dir):
252 |         click.secho('Lab is not configured to connect to minio. '
253 |                     'Run <lab config> to set up access points.',
254 |                     fg='red')
255 |         raise click.Abort()
256 | 
257 |     if project is not None:
258 |         if os.path.exists(project):
259 |             click.secho('Directory '+project+' already exists.', fg='red')
260 |             raise click.Abort()
261 | 
262 |     _pull_from_minio(tag, bucket, project, force)
263 | 
264 | 
265 | @click.command('push')
266 | @click.option('--info', is_flag=True)
267 | @click.option('--tag', type=str, help='minio host nickname', default=None)
268 | @click.option('--bucket', type=str, default=None,
269 |               help='minio bucket name')
270 | @click.option('--force', is_flag=True)
271 | @click.argument('path', type=str, default='.')
272 | def lab_push(info, tag, bucket, path, force):
273 |     """ Push Lab Experiment to minio """
274 |     models_directory = 'experiments'
275 |     logs_directory = 'logs'
276 |     config_directory = 'config'
277 | 
278 |     home_dir = os.path.expanduser('~')
279 |     lab_dir = os.path.join(home_dir, '.lab')
280 |     if not os.path.exists(lab_dir):
281 |         click.secho('Lab is not configured to connect to minio. '
282 |                     'Run <lab config> to set up access points.',
283 |                     fg='red')
284 |         raise click.Abort()
285 | 
286 |     if not (os.path.exists(models_directory) &
287 |             os.path.exists(logs_directory) & os.path.exists(config_directory)):
288 |         click.secho('This directory lacks a valid Lab Project directory '
289 |                     'structure. Run <lab init> to create one.',
290 |                     fg='blue')
291 |         raise click.Abort()
292 | 
293 |     if info:
294 |         with open(os.path.join(config_directory, 'runtime.yaml'), 'r') as file:
295 |             minio_config = yaml.load(file)
296 |         click.secho('Last push: '+minio_config['last_push'], fg='blue')
297 |     else:
298 |         if (tag is None) & (bucket is None):
299 |             try:
300 |                 with open(os.path.join(config_directory, 'runtime.yaml'),
301 |                           'r') as file:
302 |                     minio_config = yaml.load(file)
303 |                     tag = minio_config['tag']
304 |                     bucket = minio_config['bucket']
305 |             except KeyError:
306 |                 click.secho(
307 |                     'Lab project does not have default tag and bucket configuration. '
308 |                     'Supply --tag and --bucket options and run lab push again.',
309 |                         fg='red')
310 |                 raise click.Abort()
311 |         else:
312 |             with open(os.path.join(config_directory, 'runtime.yaml'),
313 |                       'r') as file:
314 |                 minio_config = yaml.load(file)
315 |                 minio_config['tag'] = tag
316 |                 minio_config['bucket'] = bucket
317 |             with open(os.path.join(config_directory, 'runtime.yaml'),
318 |                       'w') as file:
319 |                 yaml.safe_dump(minio_config, file, default_flow_style=False)
320 | 
321 |         _push_to_minio(tag, bucket, path, force)
322 | 
323 | 
324 | def _pull_from_minio(tag, bucket, project_name, force):
325 |     click.secho('Looking up remote..', fg='cyan')
326 | 
327 |     home_dir = os.path.expanduser('~')
328 |     lab_dir = os.path.join(home_dir, '.lab')
329 |     project_dir = os.getcwd()
330 | 
331 |     _clone = True
332 | 
333 |     # Extract bucket name and project name from config if they are present
334 |     if (tag is None) & (bucket is None) & (project_name is None):
335 |         _clone = False
336 | 
337 |         with open(os.path.join(project_dir,
338 |                                'config', 'runtime.yaml'), 'r') as file:
339 |             project_config = yaml.load(file)
340 |             bucket = project_config['bucket']
341 |             project_name = project_config['name']
342 |             tag = project_config['tag']
343 | 
344 |     check_minio_config(tag)
345 | 
346 |     # Extract minio configuration
347 |     with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file:
348 |         minio_config = yaml.load(file)[tag]
349 | 
350 |     hostname = minio_config['minio_endpoint']
351 |     accesskey = minio_config['minio_accesskey']
352 |     secretkey = minio_config['minio_secretkey']
353 | 
354 |     minioClient = Minio(hostname,
355 |                         access_key=accesskey,
356 |                         secret_key=secretkey,
357 |                         secure=False)
358 | 
359 |     if not minioClient.bucket_exists(bucket):
360 |         click.secho('Bucket ' + bucket + ' is not found on remote', fg='red')
361 |         raise click.Abort()
362 |     try:
363 |         objects = minioClient.list_objects(bucket, prefix=project_name+'/',
364 |                                            recursive=True)
365 | 
366 |         remote_objects = [o.object_name for o in objects]
367 | 
368 |         if _clone is False:
369 |             if force:
370 |                 local_objects = []
371 |             else:
372 |                 local_objects = _list_dir('.')
373 | 
374 |                 local_objects = [l.replace('./', project_name+'/')
375 |                                  for l in local_objects]
376 | 
377 |             remote_objects = list(set(remote_objects) - set(local_objects))
378 | 
379 |         if len(remote_objects) == 0:
380 |             click.secho('Project is in sync with remote. '
381 |                         'Use <lab pull --force> to do a hard pull.',
382 |                         fg='yellow')
383 |             raise click.Abort()
384 | 
385 |         click.secho('Fetching '+str(len(remote_objects))+' remote objects.',
386 |                     fg='cyan')
387 | 
388 |         for obj in remote_objects:
389 |             if _clone:
390 |                 object_name = obj
391 |             else:
392 |                 object_name = ''.join(obj.split(project_name + '/')[1:])
393 |             print('Downloading ' + object_name)
394 |             minioClient.fget_object(bucket, obj,
395 |                                     os.path.join(os.getcwd(), object_name))
396 |     except S3Error as err:
397 |         print(err)
398 | 
399 | 
400 | def _list_dir(path):
401 |     files = []
402 |     # r=root, d=directories, f = files
403 |     for r, d, f in os.walk(path):
404 |         for file in f:
405 |             files.append(os.path.join(r, file))
406 |     return(files)
407 | 
408 | 
409 | def _push_to_minio(tag, bucket, path, force):
410 |     home_dir = os.path.expanduser('~')
411 |     lab_dir = os.path.join(home_dir, '.lab')
412 | 
413 |     try:
414 |         with open('.labignore') as f:
415 |             exclude = set(f.read().splitlines())
416 |     except Exception:
417 |         exclude = set(['.venv'])
418 | 
419 |     try:
420 |         with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file:
421 |             minio_config = yaml.load(file)[tag]
422 |     except KeyError as e:
423 |         print(str(e))
424 |         click.secho('Unable to connect to host '+tag, fg='red')
425 |         raise click.Abort()
426 | 
427 |     with open(os.path.join(path, 'config/runtime.yaml'), 'r') as file:
428 |         config = yaml.load(file)
429 | 
430 |     project_name = config['name']
431 | 
432 |     hostname = minio_config['minio_endpoint']
433 |     accesskey = minio_config['minio_accesskey']
434 |     secretkey = minio_config['minio_secretkey']
435 | 
436 |     input_objects = []
437 |     output_objects = []
438 | 
439 |     for root, d_names, f_names in os.walk(path, topdown=True):
440 |         d_names[:] = [d for d in d_names if d not in exclude]
441 |         for f in f_names:
442 |             input_objects.append(os.path.join(root, f))
443 |             output_objects.append(os.path.join(project_name,
444 |                                                root.strip('./'), f))
445 | 
446 |     minioClient = Minio(hostname,
447 |                         access_key=accesskey,
448 |                         secret_key=secretkey,
449 |                         secure=False)
450 | 
451 |     if not minioClient.bucket_exists(bucket):
452 |         minioClient.make_bucket(bucket, location='eu-west-1')
453 | 
454 |     # Prune remote if needed
455 |     if force:
456 |         if click.confirm(
457 |             'WARNING: force push will remove all remote files not '
458 |             'found in your current project. Do you want to continue?',
459 |                 abort=True):
460 |             try:
461 |                 remote_objects = minioClient.list_objects(bucket,
462 |                                                           prefix=project_name,
463 |                                                           recursive=True)
464 |                 remote_objects = [obj.object_name for obj in remote_objects]
465 |                 for del_err in minioClient.remove_objects(bucket,
466 |                                                           remote_objects):
467 |                     print("Deletion Error: {}".format(del_err))
468 |             except S3Error as err:
469 |                 print(err)
470 | 
471 |     try:
472 |         for i in range(len(input_objects)):
473 |             minioClient.fput_object(bucket, output_objects[i],
474 |                                     input_objects[i])
475 |             print('Succesfully processed '+input_objects[i])
476 | 
477 |         with open(os.path.join('config', 'runtime.yaml'), 'r') as file:
478 |             minio_config = yaml.load(file)
479 |         minio_config['last_push'] = str(datetime.datetime.now())
480 | 
481 |         with open(os.path.join('config', 'runtime.yaml'), 'w') as file:
482 |             yaml.safe_dump(minio_config, file, default_flow_style=False)
483 | 
484 |     except S3Error as err:
485 |         print(err)
486 | 
487 | 
488 | def _project_init(project_name):
489 |     pyversion = '%s.%s' % (sys.version_info[0], sys.version_info[1])
490 | 
491 |     # Create project structure
492 |     os.mkdir(project_name)
493 |     os.mkdir(os.path.join(project_name, 'notebooks'))
494 |     os.mkdir(os.path.join(project_name, 'data'))
495 |     os.mkdir(os.path.join(project_name, 'logs'))
496 |     os.mkdir(os.path.join(project_name, 'experiments'))
497 |     os.mkdir(os.path.join(project_name, 'config'))
498 | 
499 |     open(os.path.join(project_name, 'README.md'), 'a').close()
500 |     open(os.path.join(project_name, 'notebooks', 'README.md'), 'a').close()
501 | 
502 |     file = open(os.path.join(project_name, '.gitignore'), 'w')
503 |     file.write('.venv')
504 |     file.close()
505 | 
506 |     # ignore these files when pushing lab repo to minio
507 |     file = open(os.path.join(project_name, '.labignore'), 'w')
508 |     file.write('.venv\n')
509 |     file.write('.ipynb_checkpoints')
510 |     file.close()
511 | 
512 |     # Copy requirements.txt file
513 |     shutil.copyfile('requirements.txt', project_name+'/requirements.txt')
514 | 
515 |     # Create a virtual environment
516 |     create_venv(project_name)
517 | 
518 |     # Create runtime configuration
519 |     runtime = {'name': project_name,
520 |                'path': os.path.join(os.getcwd(), project_name),
521 |                'description': None,
522 |                'python': pyversion,
523 |                'timestamp': str(datetime.datetime.now()),
524 |                'last_push': '',
525 |                'venv': '.venv'}
526 | 
527 |     with open(os.path.join(project_name,
528 |               'config', 'runtime.yaml'), 'w') as file:
529 |         yaml.dump(runtime, file, default_flow_style=False)
530 | 


--------------------------------------------------------------------------------