├── lab ├── project │ ├── __init__.py │ └── cli.py ├── __init__.py ├── experiment │ ├── __init__.py │ ├── cli.py │ └── experiment.py ├── checks.py └── cli.py ├── docs_requirements.txt ├── examples ├── .DS_Store ├── README.txt ├── sklearn_randomforest.py └── keras_mnist_mlp.py ├── requirements.txt ├── docs ├── source │ ├── _static │ │ ├── lab-uml.png │ │ └── lab_screenshot.jpeg │ ├── auto_examples │ │ ├── auto_examples_jupyter.zip │ │ ├── auto_examples_python.zip │ │ ├── images │ │ │ └── thumb │ │ │ │ ├── sphx_glr_keras_mnist_mlp_thumb.png │ │ │ │ ├── sphx_glr_train_randomforest_thumb.png │ │ │ │ └── sphx_glr_sklearn_randomforest_thumb.png │ │ ├── index.rst │ │ ├── sklearn_randomforest.py │ │ ├── sklearn_randomforest.ipynb │ │ ├── keras_mnist_mlp.py │ │ ├── sklearn_randomforest.rst │ │ ├── keras_mnist_mlp.ipynb │ │ └── keras_mnist_mlp.rst │ ├── notebook.rst │ ├── faq.rst │ ├── index.rst │ ├── concepts.rst │ ├── conf.py │ ├── push.rst │ ├── quickstart.rst │ ├── dlexperiments.rst │ ├── cli.rst │ └── logging.rst ├── Makefile └── make.bat ├── setup.py ├── .gitignore ├── README.md └── LICENSE.txt /lab/project/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs_requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_materialdesign_theme 3 | sphinx_gallery 4 | -------------------------------------------------------------------------------- /examples/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/examples/.DS_Store -------------------------------------------------------------------------------- /lab/__init__.py: -------------------------------------------------------------------------------- 1 | from . import project 2 | from . import experiment 3 | 4 | from .checks import * 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | minio 2 | click 3 | pyyaml 4 | pandas 5 | numpy 6 | graphviz 7 | tabulate 8 | joblib 9 | -------------------------------------------------------------------------------- /lab/experiment/__init__.py: -------------------------------------------------------------------------------- 1 | from .experiment import Experiment 2 | from .experiment import show_experiment 3 | -------------------------------------------------------------------------------- /docs/source/_static/lab-uml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/_static/lab-uml.png -------------------------------------------------------------------------------- /docs/source/_static/lab_screenshot.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/_static/lab_screenshot.jpeg -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | Examples Gallery 2 | ===================== 3 | 4 | Several examples of how Lab can be used in common machine learning projects. 5 | -------------------------------------------------------------------------------- /docs/source/auto_examples/auto_examples_jupyter.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/auto_examples_jupyter.zip -------------------------------------------------------------------------------- /docs/source/auto_examples/auto_examples_python.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/auto_examples_python.zip -------------------------------------------------------------------------------- /docs/source/auto_examples/images/thumb/sphx_glr_keras_mnist_mlp_thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/images/thumb/sphx_glr_keras_mnist_mlp_thumb.png -------------------------------------------------------------------------------- /docs/source/auto_examples/images/thumb/sphx_glr_train_randomforest_thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/images/thumb/sphx_glr_train_randomforest_thumb.png -------------------------------------------------------------------------------- /docs/source/auto_examples/images/thumb/sphx_glr_sklearn_randomforest_thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beringresearch/lab/HEAD/docs/source/auto_examples/images/thumb/sphx_glr_sklearn_randomforest_thumb.png -------------------------------------------------------------------------------- /docs/source/notebook.rst: -------------------------------------------------------------------------------- 1 | .. _notebook: 2 | 3 | Working with Jupyter Notebooks 4 | ============================== 5 | 6 | Lab makes it easy to work with Jupyter notebooks by creating a kernel directly from a lab project 7 | 8 | .. code-block:: bash 9 | 10 | lab notebook 11 | 12 | Once the kernel is created, you can select it from any Jupyter session. 13 | 14 | Lab also provides a `notebooks` directory to organise and maintain all notebooks associated with a Lab Project. 15 | -------------------------------------------------------------------------------- /docs/source/faq.rst: -------------------------------------------------------------------------------- 1 | .. _faq: 2 | 3 | Frequently Asked Questions 4 | ========================== 5 | 6 | How can I include a ``github`` repository in a lab Project 7 | ---------------------------------------------------------- 8 | 9 | Like ``pip``, lab works with ``requirements.txt`` file. To let lab know that your virtual environment should contain a package maintained on github, add the following line to your ``requirements.txt``: 10 | 11 | .. code:: 12 | 13 | -e git+https://github.com/beringresearch/ivis#egg=ivis 14 | 15 | Modify repository and package information accordingly. 16 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Machine Learning Lab 2 | ==================== 3 | 4 | .. image:: _static/lab_screenshot.jpeg 5 | :height: 400 px 6 | :width: 800 px 7 | :scale: 100 % 8 | 9 | Lab is an open source platform for managing machine learning pipelines. It addresses three core concepts: **Reproducibility**, **Logging**, and **Model Persistence**. Lab is lightweight and was designed to easily integrate with your existing training scripts. 10 | 11 | .. warning:: 12 | 13 | Lab is in active development and the current version of Lab is a beta release. This means that APIs and storage formats are subject to breaking change. 14 | 15 | .. toctree:: 16 | :maxdepth: 1 17 | :caption: User Guide: 18 | 19 | Quickstart 20 | Core concepts 21 | Command Line Interface 22 | Tracking API 23 | 24 | .. toctree:: 25 | :maxdepth: 1 26 | :caption: Functionality: 27 | 28 | Managing Deep Learning Experiments 29 | Working with Jupyter Notebooks 30 | Model repository 31 | FAQ 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | :caption: Applications: 36 | 37 | Examples 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open('README.md', encoding='utf-8') as f: 4 | long_description = f.read() 5 | 6 | setup( 7 | name='lab-ml', 8 | version='0.83', 9 | long_description=long_description, 10 | long_description_content_type='text/markdown', 11 | packages=find_packages(), 12 | py_modules=['lab'], 13 | install_requires=[ 14 | 'click>=6.7', 15 | 'minio', 16 | 'numpy', 17 | 'pandas', 18 | 'pyyaml', 19 | 'tabulate', 20 | 'graphviz', 21 | 'joblib' 22 | ], 23 | entry_points=''' 24 | [console_scripts] 25 | lab=lab.cli:cli 26 | ''', 27 | author='Ignat Drozdov', 28 | author_email='idrozdov@beringresearch.com', 29 | description='Lab: a command line interface for the management of arbitrary machine learning tasks.', 30 | license='Apache License 2.0', 31 | classifiers=[ 32 | 'Intended Audience :: Developers', 33 | 'Programming Language :: Python :: 3.6', 34 | 'Operating System :: OS Independent', 35 | ], 36 | keywords='ml ai', 37 | url='https://github.com/beringresearch/lab' 38 | ) 39 | -------------------------------------------------------------------------------- /docs/source/auto_examples/index.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | 4 | 5 | .. _sphx_glr_auto_examples: 6 | 7 | Examples Gallery 8 | ===================== 9 | 10 | Several examples of how Lab can be used in common machine learning projects. 11 | 12 | 13 | 14 | .. raw:: html 15 | 16 |
17 | 18 | .. only:: html 19 | 20 | .. figure:: /auto_examples/images/thumb/sphx_glr_sklearn_randomforest_thumb.png 21 | 22 | :ref:`sphx_glr_auto_examples_sklearn_randomforest.py` 23 | 24 | .. raw:: html 25 | 26 |
27 | 28 | 29 | .. toctree:: 30 | :hidden: 31 | 32 | /auto_examples/sklearn_randomforest 33 | 34 | .. raw:: html 35 | 36 |
37 | 38 | .. only:: html 39 | 40 | .. figure:: /auto_examples/images/thumb/sphx_glr_keras_mnist_mlp_thumb.png 41 | 42 | :ref:`sphx_glr_auto_examples_keras_mnist_mlp.py` 43 | 44 | .. raw:: html 45 | 46 |
47 | 48 | 49 | .. toctree:: 50 | :hidden: 51 | 52 | /auto_examples/keras_mnist_mlp 53 | .. raw:: html 54 | 55 |
56 | 57 | 58 | 59 | .. only :: html 60 | 61 | .. container:: sphx-glr-footer 62 | :class: sphx-glr-footer-gallery 63 | 64 | 65 | .. container:: sphx-glr-download 66 | 67 | :download:`Download all examples in Python source code: auto_examples_python.zip ` 68 | 69 | 70 | 71 | .. container:: sphx-glr-download 72 | 73 | :download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip ` 74 | 75 | 76 | .. only:: html 77 | 78 | .. rst-class:: sphx-glr-signature 79 | 80 | `Gallery generated by Sphinx-Gallery `_ 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | develop-eggs/ 12 | dist/ 13 | downloads/ 14 | eggs/ 15 | .eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | MANIFEST 26 | 27 | _build 28 | docs/build/ 29 | 30 | __pycache__/ 31 | *.egg 32 | .DS_Store 33 | *.swp 34 | .ipynb_checkpoints 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | .static_storage/ 64 | .media/ 65 | local_settings.py 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # celery beat schedule file 87 | celerybeat-schedule 88 | 89 | # SageMath parsed files 90 | *.sage.py 91 | 92 | # Environments 93 | .env 94 | .venv 95 | env/ 96 | venv/ 97 | ENV/ 98 | env.bak/ 99 | venv.bak/ 100 | 101 | # Spyder project settings 102 | .spyderproject 103 | .spyproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | 114 | .vscode/* 115 | !.vscode/settings.json 116 | !.vscode/tasks.json 117 | !.vscode/launch.json 118 | !.vscode/extensions.json 119 | -------------------------------------------------------------------------------- /docs/source/concepts.rst: -------------------------------------------------------------------------------- 1 | .. _concepts: 2 | 3 | Concepts 4 | ======== 5 | 6 | Lab is centred around three core concepts: *Reproducibility*, *Logging*, and *Model Persistence*. Lab is designed to integrate with your existing 7 | training scripts, with imposing as few constraints as possible. 8 | 9 | 10 | Reproducibility 11 | --------------- 12 | 13 | Lab Projects are designed to be shared and re-used. This feature makes havy use of Python's ``virtualenv`` module, 14 | enabling users to precisely define modules and environments that are required to run the associated experiments. 15 | 16 | Every Project is initiated using a `requirements.txt `_ file. 17 | 18 | Logging 19 | ------- 20 | 21 | Lab was designed to benchmark multiple predictive models and hyperparameters. To accomplish this, it implements a simple API that stores: 22 | 23 | - Feature names 24 | - Hyperparameters 25 | - Performance metrics 26 | - Model files 27 | 28 | Model Persistence 29 | ----------------- 30 | 31 | Models are logged using the ``joblib`` module. This applies to both ``sklearn`` and ``keras`` experiments. This simple structure allows for a quick 32 | performance assessment and deployment of a model of choice into production. 33 | 34 | Example Use Cases 35 | ----------------- 36 | 37 | At Bering, we use Lab for a number of use cases: 38 | 39 | **Data Scientists** track individual experiments locally on their machine, consistently organising all files and artefacts for reproducibility. 40 | By setting up a naming schema, Teams can work together on the same datasets to benchmark performance of novel ML algorithms. 41 | 42 | **Production Engineers** assess model performances and decide on the best possible model to be served in production environments. Lab's strict model 43 | versioning serves as a link between research and development environment and evolving production components. 44 | 45 | **ML Researchers** can publish code to GitHub as a Lab Project, making it easy for others to reproduce findings. 46 | -------------------------------------------------------------------------------- /examples/sklearn_randomforest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Getting started with Lab and scikit-learn 3 | ========================================= 4 | 5 | This example illustrates how Lab can be used to create and run a simple 6 | classifier on the iris dataset. 7 | 8 | Begin by creating a new Lab Project: 9 | 10 | >>> echo "scikit-learn" > requirements.txt 11 | >>> lab init --name simple-iris 12 | 13 | """ 14 | 15 | import argparse 16 | from sklearn import datasets 17 | from sklearn.ensemble import RandomForestClassifier 18 | from sklearn.model_selection import train_test_split 19 | from sklearn.metrics import accuracy_score, precision_score 20 | 21 | from lab.experiment import Experiment 22 | 23 | parser = argparse.ArgumentParser('Test arguments') 24 | 25 | parser.add_argument('--n_estimators', type=int, dest='n_estimators') 26 | args = parser.parse_args() 27 | 28 | n_estimators=args.n_estimators 29 | 30 | if n_estimators is None: 31 | n_estimators=100 32 | max_depth=2 33 | 34 | if __name__ == "__main__": 35 | e = Experiment(dataset='iris_75') 36 | 37 | @e.start_run 38 | def train(): 39 | iris = datasets.load_iris() 40 | X = iris.data 41 | y = iris.target 42 | 43 | X_train, X_test, y_train, y_test = train_test_split(X, y, 44 | test_size=0.25, 45 | random_state=42) 46 | 47 | e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length', 48 | 'Petal Width']) 49 | clf = RandomForestClassifier(n_estimators=n_estimators) 50 | 51 | clf.fit(X_train, y_train) 52 | 53 | y_pred = clf.predict(X_test) 54 | accuracy = accuracy_score(y_test, y_pred) 55 | precision = precision_score(y_test, y_pred, average = 'macro') 56 | 57 | e.log_metric('accuracy_score', accuracy) 58 | e.log_metric('precision_score', precision) 59 | 60 | e.log_parameter('n_estimators', n_estimators) 61 | e.log_parameter('max_depth', max_depth) 62 | 63 | e.log_model('randomforest', clf) 64 | 65 | ############################################################## 66 | # After execute training script through the `lab run` command. 67 | # 68 | # >>> lab run train.py 69 | # >>> lab ls 70 | -------------------------------------------------------------------------------- /docs/source/auto_examples/sklearn_randomforest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Getting started with Lab and scikit-learn 3 | ========================================= 4 | 5 | This example illustrates how Lab can be used to create and run a simple 6 | classifier on the iris dataset. 7 | 8 | Begin by creating a new Lab Project: 9 | 10 | >>> echo "scikit-learn" > requirements.txt 11 | >>> lab init --name simple-iris 12 | 13 | """ 14 | 15 | import argparse 16 | from sklearn import datasets 17 | from sklearn.ensemble import RandomForestClassifier 18 | from sklearn.model_selection import train_test_split 19 | from sklearn.metrics import accuracy_score, precision_score 20 | 21 | from lab.experiment import Experiment 22 | 23 | parser = argparse.ArgumentParser('Test arguments') 24 | 25 | parser.add_argument('--n_estimators', type=int, dest='n_estimators') 26 | args = parser.parse_args() 27 | 28 | n_estimators=args.n_estimators 29 | 30 | if n_estimators is None: 31 | n_estimators=100 32 | max_depth=2 33 | 34 | if __name__ == "__main__": 35 | e = Experiment(dataset='iris_75') 36 | 37 | @e.start_run 38 | def train(): 39 | iris = datasets.load_iris() 40 | X = iris.data 41 | y = iris.target 42 | 43 | X_train, X_test, y_train, y_test = train_test_split(X, y, 44 | test_size=0.25, 45 | random_state=42) 46 | 47 | e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length', 48 | 'Petal Width']) 49 | clf = RandomForestClassifier(n_estimators=n_estimators) 50 | 51 | clf.fit(X_train, y_train) 52 | 53 | y_pred = clf.predict(X_test) 54 | accuracy = accuracy_score(y_test, y_pred) 55 | precision = precision_score(y_test, y_pred, average = 'macro') 56 | 57 | e.log_metric('accuracy_score', accuracy) 58 | e.log_metric('precision_score', precision) 59 | 60 | e.log_parameter('n_estimators', n_estimators) 61 | e.log_parameter('max_depth', max_depth) 62 | 63 | e.log_model('randomforest', clf) 64 | 65 | ############################################################## 66 | # After execute training script through the `lab run` command. 67 | # 68 | # >>> lab run train.py 69 | # >>> lab ls 70 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'lab' 21 | copyright = '2020, Bering Limited' 22 | author = 'Bering Limited' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = ['sphinx_gallery.gen_gallery'] 31 | 32 | master_doc = 'index' 33 | 34 | sphinx_gallery_conf = { 35 | 'examples_dirs': '../../examples', 36 | 'gallery_dirs': 'auto_examples', 37 | 'plot_gallery': True 38 | } 39 | 40 | html_theme_options = { 41 | 'header_links': [ 42 | ('Home', 'index', False, 'home'), 43 | ('GitHub', 'https://github.com/beringresearch/lab', True, 'link'), 44 | ('Bering', 'http://beringresearch.com', True, 'launch')], 45 | 'show_drawer_title': True, 46 | } 47 | 48 | # Add any paths that contain templates here, relative to this directory. 49 | templates_path = ['_templates'] 50 | 51 | # List of patterns, relative to source directory, that match files and 52 | # directories to ignore when looking for source files. 53 | # This pattern also affects html_static_path and html_extra_path. 54 | exclude_patterns = [] 55 | 56 | 57 | # -- Options for HTML output ------------------------------------------------- 58 | 59 | # The theme to use for HTML and HTML Help pages. See the documentation for 60 | # a list of builtin themes. 61 | # 62 | html_theme = 'sphinx_materialdesign_theme' 63 | #html_logo = '_static/lab-logo.jpeg' 64 | 65 | # Add any paths that contain custom static files (such as style sheets) here, 66 | # relative to this directory. They are copied after the builtin static files, 67 | # so a file named "default.css" will overwrite the builtin "default.css". 68 | html_static_path = ['_static'] 69 | -------------------------------------------------------------------------------- /lab/checks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import click 4 | import shutil 5 | import subprocess 6 | import venv as ve 7 | 8 | lab_project = ['experiments', 'data', 'logs', 'notebooks', 'config'] 9 | 10 | 11 | # Project 12 | def check_minio_config(minio_tag): 13 | """Check that minio configuration exists""" 14 | home_dir = os.path.expanduser('~') 15 | lab_dir = os.path.join(home_dir, '.lab') 16 | 17 | try: 18 | with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file: 19 | yaml.load(file)[minio_tag] 20 | except Exception as e: 21 | print(str(e)) 22 | click.secho('Invalid global minio connection tag.', fg='red') 23 | raise click.Abort() 24 | 25 | 26 | def is_venv(home_dir): 27 | """Check that virtual environment exists""" 28 | if not os.path.exists(os.path.join(home_dir, '.venv')): 29 | click.secho('Virtual environment not found. ' 30 | 'Creating one for this project', 31 | fg='blue') 32 | create_venv(home_dir) 33 | 34 | 35 | def is_empty_project(): 36 | """Check if there are any experiments in the project""" 37 | experiments = next(os.walk('experiments'))[1] 38 | if len(experiments) == 0: 39 | click.secho("It looks like you've started a brand new project. " 40 | 'Run your first experiment to generate a list of metrics.', 41 | fg='blue') 42 | raise click.Abort() 43 | 44 | 45 | def is_lab_project(): 46 | """Check if the current directory is a lab project""" 47 | _exists = [f for f in lab_project if os.path.exists(f)] 48 | 49 | if len(_exists) != len(lab_project): 50 | click.secho('This directory does not appear to be a valid ' 51 | 'Lab Project.\nRun to create one.', 52 | fg='red') 53 | raise click.Abort() 54 | 55 | 56 | def create_venv(project_name): 57 | """Create a lab virtual environment""" 58 | # Create a virtual environment 59 | venv_dir = os.path.join(project_name, '.venv') 60 | 61 | try: 62 | environment = ve.EnvBuilder(system_site_packages=False, 63 | symlinks=True, with_pip=True) 64 | environment.create(venv_dir) 65 | 66 | subprocess.call([venv_dir + '/bin/pip', 'install', 67 | '--upgrade', 'pip']) 68 | 69 | subprocess.call([venv_dir + '/bin/pip', 70 | 'install', '--upgrade', 'lab-ml']) 71 | 72 | subprocess.call([venv_dir + '/bin/pip', 'install', 73 | '-r', 'requirements.txt']) 74 | 75 | except Exception as e: 76 | shutil.rmtree(venv_dir) 77 | click.secho('Something went wrong during .venv creation.', 78 | fg='red') 79 | print(str(e)) 80 | raise click.Abort() 81 | -------------------------------------------------------------------------------- /docs/source/push.rst: -------------------------------------------------------------------------------- 1 | .. _push: 2 | 3 | Model Repository 4 | ================ 5 | 6 | Lab uses minio to store Projects. `Minio `_ is a high performance distributed object storage server, designed for large-scale private cloud infrastructure. This makes it a great fit as a storage environment for multiple Lab Projects and Experiments. Lab makes it trivial to back up completed Projects and share them across teams. 7 | 8 | .. image:: _static/lab-uml.png 9 | :height: 500 px 10 | :width: 800 px 11 | :scale: 100 % 12 | 13 | 14 | Configuring minio server 15 | ------------------------ 16 | 17 | There are a number of ways to `install minio `_ on a wide range of operating systems. See more details installation 18 | instructions in minio documentation pages. 19 | 20 | Setting up Lab minio interface 21 | ------------------------------ 22 | 23 | Once minio is up and running, you will need to make a note of the ``endpoint``, ``access key``, and ``secret key``. Lab supports multiple minio configurations 24 | through a convenient tagging system. Each configuration can be set up through CLI: 25 | 26 | .. code-block:: bash 27 | 28 | lab config minio --tag [MINIO_TAG] -- endpoint [TEXT] --accesskey [TEXT] --secretkey [TEXT] 29 | 30 | Note that the endpoint is simply an IP address and port of a minio host, e.g. ``192.168.1.50:9000``. 31 | 32 | Storing Lab Projects 33 | -------------------- 34 | 35 | Lab Projects can be pushed to a specific minio host by running a simple command from the Project root folder: 36 | 37 | .. code-block:: bash 38 | 39 | lab push --tag [MINIO_TAG] --bucket [TEXT] . 40 | 41 | Here, ``--tag`` specifies a nickname of an exisiting minio connection and ``--bucket`` refers to a unique destination name on minio host, analogous to an S3 bucket. 42 | 43 | Each project contains a `.labignore` file that specifies intentionally untracked files to ignore during a push. A default `.labignore` will omit the virtual environment directory `.venv`. Further omitions can be specified on each line: 44 | 45 | .. code:: 46 | 47 | .venv 48 | data 49 | experiments/abcdefgh/model.joblib 50 | 51 | 52 | Pruning remote repository 53 | ------------------------- 54 | 55 | Sometimes it may be desirable to prune a remote repository. Pruning simply replaces the entire content of a remote repository with local files. 56 | The user is warned just before proceding, as this operation can have undersirable consequences. 57 | 58 | .. code-block:: bash 59 | 60 | lab push --tag [MINIO_TAG] --bucket [TEXT] --force . 61 | 62 | Pulling from a remote repository 63 | -------------------------------- 64 | 65 | To retrieve a Lab Project from a minio host, run a simple command from folder into which you'd like to pull the Project: 66 | 67 | .. code-block:: bash 68 | 69 | lab pull --tag [MINIO_TAG] --bucket [TEXT] --project [TEXT]. 70 | 71 | 72 | In cases where connection with minio has already been establish, a project can be pushed/pulled directly from the project directory via ``lab push`` or ``lab pull`` without further options. 73 | -------------------------------------------------------------------------------- /docs/source/auto_examples/sklearn_randomforest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "\nGetting started with Lab and scikit-learn\n=========================================\n\nThis example illustrates how Lab can be used to create and run a simple\nclassifier on the iris dataset.\n\nBegin by creating a new Lab Project:\n\n >>> echo \"scikit-learn\" > requirements.txt\n >>> lab init --name simple-iris\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import argparse\nfrom sklearn import datasets\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, precision_score\n\nfrom lab.experiment import Experiment\n\nparser = argparse.ArgumentParser('Test arguments')\n\nparser.add_argument('--n_estimators', type=int, dest='n_estimators')\nargs = parser.parse_args()\n\nn_estimators=args.n_estimators\n\nif n_estimators is None:\n n_estimators=100\n max_depth=2\n\nif __name__ == \"__main__\":\n e = Experiment(dataset='iris_75')\n\n @e.start_run\n def train():\n iris = datasets.load_iris()\n X = iris.data\n y = iris.target\n\n X_train, X_test, y_train, y_test = train_test_split(X, y,\n test_size=0.25,\n random_state=42)\n\n e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length',\n 'Petal Width'])\n clf = RandomForestClassifier(n_estimators=n_estimators)\n\n clf.fit(X_train, y_train)\n\n y_pred = clf.predict(X_test)\n accuracy = accuracy_score(y_test, y_pred)\n precision = precision_score(y_test, y_pred, average = 'macro')\n\n e.log_metric('accuracy_score', accuracy)\n e.log_metric('precision_score', precision)\n\n e.log_parameter('n_estimators', n_estimators)\n e.log_parameter('max_depth', max_depth)\n\n e.log_model('randomforest', clf)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "After execute training script through the `lab run` command.\n\n>>> lab run train.py\n>>> lab ls\n\n" 37 | ] 38 | } 39 | ], 40 | "metadata": { 41 | "kernelspec": { 42 | "display_name": "Python 3", 43 | "language": "python", 44 | "name": "python3" 45 | }, 46 | "language_info": { 47 | "codemirror_mode": { 48 | "name": "ipython", 49 | "version": 3 50 | }, 51 | "file_extension": ".py", 52 | "mimetype": "text/x-python", 53 | "name": "python", 54 | "nbconvert_exporter": "python", 55 | "pygments_lexer": "ipython3", 56 | "version": "3.7.5" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 0 61 | } -------------------------------------------------------------------------------- /examples/keras_mnist_mlp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Running Keras models with Tensorboard 3 | ===================================== 4 | 5 | Lab integrates into a typical keras workflow. 6 | 7 | WARNING: model persistence in Keras can be complicated, especially when 8 | working with complext models. It is recommended to checkpoint each training 9 | epoch independently from Lab's ``log_model`` API. 10 | 11 | Bering by creating a new Lab Project: 12 | 13 | >>> echo "keras" > requirements.txt 14 | >>> lab init --name simple-keras 15 | """ 16 | 17 | import keras 18 | from keras.datasets import mnist 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Dropout 21 | from keras.optimizers import RMSprop 22 | from keras.callbacks import TensorBoard 23 | 24 | import tempfile 25 | 26 | from sklearn.metrics import accuracy_score, precision_score 27 | 28 | from lab.experiment import Experiment 29 | 30 | batch_size = 128 31 | num_classes = 10 32 | epochs = 20 33 | 34 | # the data, split between train and test sets 35 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 36 | 37 | x_train = x_train.reshape(60000, 784) 38 | x_test = x_test.reshape(10000, 784) 39 | x_train = x_train.astype('float32') 40 | x_test = x_test.astype('float32') 41 | x_train /= 255 42 | x_test /= 255 43 | print(x_train.shape[0], 'train samples') 44 | print(x_test.shape[0], 'test samples') 45 | 46 | # convert class vectors to binary class matrices 47 | y_train = keras.utils.to_categorical(y_train, num_classes) 48 | y_test = keras.utils.to_categorical(y_test, num_classes) 49 | 50 | model = Sequential() 51 | model.add(Dense(512, activation='relu', input_shape=(784,))) 52 | model.add(Dropout(0.2)) 53 | model.add(Dense(512, activation='relu')) 54 | model.add(Dropout(0.2)) 55 | model.add(Dense(num_classes, activation='softmax')) 56 | 57 | model.compile(loss='categorical_crossentropy', 58 | optimizer=RMSprop(), 59 | metrics=['accuracy']) 60 | 61 | e = Experiment() 62 | 63 | 64 | @e.start_run 65 | def train(): 66 | 67 | # Create a temporary directory for tensorboard logs 68 | output_dir = tempfile.mkdtemp() 69 | print("Writing TensorBoard events locally to %s\n" % output_dir) 70 | tensorboard = TensorBoard(log_dir=output_dir) 71 | 72 | # During Experiment execution, tensorboard can be viewed through: 73 | # tensorboard --logdir=[output_dir] 74 | 75 | model.fit(x_train, y_train, 76 | batch_size=batch_size, 77 | epochs=epochs, 78 | verbose=1, 79 | validation_data=(x_test, y_test), 80 | callbacks=[tensorboard]) 81 | 82 | y_prob = model.predict(x_test) 83 | y_classes = y_prob.argmax(axis=-1) 84 | actual = y_test.argmax(axis=-1) 85 | 86 | accuracy = accuracy_score(y_true=actual, y_pred=y_classes) 87 | precision = precision_score(y_true=actual, y_pred=y_classes, 88 | average='macro') 89 | 90 | # Log tensorboard 91 | e.log_artifacts('tensorboard', output_dir) 92 | 93 | # Log all metrics 94 | e.log_metric('accuracy_score', accuracy) 95 | e.log_metric('precision_score', precision) 96 | 97 | # Log parameters 98 | e.log_parameter('batch_size', batch_size) 99 | 100 | # Save model 101 | e.log_model('mnist-mlp', model) 102 | -------------------------------------------------------------------------------- /docs/source/auto_examples/keras_mnist_mlp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Running Keras models with Tensorboard 3 | ===================================== 4 | 5 | Lab integrates into a typical keras workflow. 6 | 7 | WARNING: model persistence in Keras can be complicated, especially when 8 | working with complext models. It is recommended to checkpoint each training 9 | epoch independently from Lab's ``log_model`` API. 10 | 11 | Bering by creating a new Lab Project: 12 | 13 | >>> echo "keras" > requirements.txt 14 | >>> lab init --name simple-keras 15 | """ 16 | 17 | import keras 18 | from keras.datasets import mnist 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Dropout 21 | from keras.optimizers import RMSprop 22 | from keras.callbacks import TensorBoard 23 | 24 | import tempfile 25 | 26 | from sklearn.metrics import accuracy_score, precision_score 27 | 28 | from lab.experiment import Experiment 29 | 30 | batch_size = 128 31 | num_classes = 10 32 | epochs = 20 33 | 34 | # the data, split between train and test sets 35 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 36 | 37 | x_train = x_train.reshape(60000, 784) 38 | x_test = x_test.reshape(10000, 784) 39 | x_train = x_train.astype('float32') 40 | x_test = x_test.astype('float32') 41 | x_train /= 255 42 | x_test /= 255 43 | print(x_train.shape[0], 'train samples') 44 | print(x_test.shape[0], 'test samples') 45 | 46 | # convert class vectors to binary class matrices 47 | y_train = keras.utils.to_categorical(y_train, num_classes) 48 | y_test = keras.utils.to_categorical(y_test, num_classes) 49 | 50 | model = Sequential() 51 | model.add(Dense(512, activation='relu', input_shape=(784,))) 52 | model.add(Dropout(0.2)) 53 | model.add(Dense(512, activation='relu')) 54 | model.add(Dropout(0.2)) 55 | model.add(Dense(num_classes, activation='softmax')) 56 | 57 | model.compile(loss='categorical_crossentropy', 58 | optimizer=RMSprop(), 59 | metrics=['accuracy']) 60 | 61 | e = Experiment() 62 | 63 | 64 | @e.start_run 65 | def train(): 66 | 67 | # Create a temporary directory for tensorboard logs 68 | output_dir = tempfile.mkdtemp() 69 | print("Writing TensorBoard events locally to %s\n" % output_dir) 70 | tensorboard = TensorBoard(log_dir=output_dir) 71 | 72 | # During Experiment execution, tensorboard can be viewed through: 73 | # tensorboard --logdir=[output_dir] 74 | 75 | model.fit(x_train, y_train, 76 | batch_size=batch_size, 77 | epochs=epochs, 78 | verbose=1, 79 | validation_data=(x_test, y_test), 80 | callbacks=[tensorboard]) 81 | 82 | y_prob = model.predict(x_test) 83 | y_classes = y_prob.argmax(axis=-1) 84 | actual = y_test.argmax(axis=-1) 85 | 86 | accuracy = accuracy_score(y_true=actual, y_pred=y_classes) 87 | precision = precision_score(y_true=actual, y_pred=y_classes, 88 | average='macro') 89 | 90 | # Log tensorboard 91 | e.log_artifacts('tensorboard', output_dir) 92 | 93 | # Log all metrics 94 | e.log_metric('accuracy_score', accuracy) 95 | e.log_metric('precision_score', precision) 96 | 97 | # Log parameters 98 | e.log_parameter('batch_size', batch_size) 99 | 100 | # Save model 101 | e.log_model('mnist-mlp', model) 102 | -------------------------------------------------------------------------------- /docs/source/auto_examples/sklearn_randomforest.rst: -------------------------------------------------------------------------------- 1 | .. note:: 2 | :class: sphx-glr-download-link-note 3 | 4 | Click :ref:`here ` to download the full example code 5 | .. rst-class:: sphx-glr-example-title 6 | 7 | .. _sphx_glr_auto_examples_sklearn_randomforest.py: 8 | 9 | 10 | Getting started with Lab and scikit-learn 11 | ========================================= 12 | 13 | This example illustrates how Lab can be used to create and run a simple 14 | classifier on the iris dataset. 15 | 16 | Begin by creating a new Lab Project: 17 | 18 | >>> echo "scikit-learn" > requirements.txt 19 | >>> lab init --name simple-iris 20 | 21 | 22 | .. code-block:: default 23 | 24 | 25 | import argparse 26 | from sklearn import datasets 27 | from sklearn.ensemble import RandomForestClassifier 28 | from sklearn.model_selection import train_test_split 29 | from sklearn.metrics import accuracy_score, precision_score 30 | 31 | from lab.experiment import Experiment 32 | 33 | parser = argparse.ArgumentParser('Test arguments') 34 | 35 | parser.add_argument('--n_estimators', type=int, dest='n_estimators') 36 | args = parser.parse_args() 37 | 38 | n_estimators=args.n_estimators 39 | 40 | if n_estimators is None: 41 | n_estimators=100 42 | max_depth=2 43 | 44 | if __name__ == "__main__": 45 | e = Experiment(dataset='iris_75') 46 | 47 | @e.start_run 48 | def train(): 49 | iris = datasets.load_iris() 50 | X = iris.data 51 | y = iris.target 52 | 53 | X_train, X_test, y_train, y_test = train_test_split(X, y, 54 | test_size=0.25, 55 | random_state=42) 56 | 57 | e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length', 58 | 'Petal Width']) 59 | clf = RandomForestClassifier(n_estimators=n_estimators) 60 | 61 | clf.fit(X_train, y_train) 62 | 63 | y_pred = clf.predict(X_test) 64 | accuracy = accuracy_score(y_test, y_pred) 65 | precision = precision_score(y_test, y_pred, average = 'macro') 66 | 67 | e.log_metric('accuracy_score', accuracy) 68 | e.log_metric('precision_score', precision) 69 | 70 | e.log_parameter('n_estimators', n_estimators) 71 | e.log_parameter('max_depth', max_depth) 72 | 73 | e.log_model('randomforest', clf) 74 | 75 | 76 | After execute training script through the `lab run` command. 77 | 78 | >>> lab run train.py 79 | >>> lab ls 80 | 81 | 82 | .. rst-class:: sphx-glr-timing 83 | 84 | **Total running time of the script:** ( 0 minutes 0.000 seconds) 85 | 86 | 87 | .. _sphx_glr_download_auto_examples_sklearn_randomforest.py: 88 | 89 | 90 | .. only :: html 91 | 92 | .. container:: sphx-glr-footer 93 | :class: sphx-glr-footer-example 94 | 95 | 96 | 97 | .. container:: sphx-glr-download 98 | 99 | :download:`Download Python source code: sklearn_randomforest.py ` 100 | 101 | 102 | 103 | .. container:: sphx-glr-download 104 | 105 | :download:`Download Jupyter notebook: sklearn_randomforest.ipynb ` 106 | 107 | 108 | .. only:: html 109 | 110 | .. rst-class:: sphx-glr-signature 111 | 112 | `Gallery generated by Sphinx-Gallery `_ 113 | -------------------------------------------------------------------------------- /docs/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. _quickstart: 2 | 3 | Quickstart 4 | ========== 5 | 6 | Installing Lab 7 | -------------- 8 | 9 | For the time being, lab is available through our github repository: 10 | 11 | .. code-block:: bash 12 | 13 | git clone https://github.com/beringresearch/lab 14 | cd lab 15 | pip install --editable . 16 | 17 | .. note:: 18 | 19 | You cannot install Lab on the MacOS system installation of Python. We recommend installing 20 | Python 3 through the `Homebrew `_ package manager using 21 | ``brew install python``. 22 | 23 | Setting up your first Project 24 | ----------------------------- 25 | Lab projects are initiated using a ``requirements.txt`` file. This ensures a consistent and reproducible environment. 26 | 27 | Let's create a simple environment that imports sklearn: 28 | 29 | .. code-block:: bash 30 | 31 | echo "scikit-learn" >> requirements.txt 32 | lab init --name test 33 | 34 | Lab will run through project initialisation and create a new **test** project with its own virtual environment. 35 | 36 | Creating your first Lab Experiment 37 | ---------------------------------- 38 | Training scripts can be placed directly into the *test/* directory. Here's an example training script, *train.py*, set up to train a Random Forest classifier with appropriate Lab logging API: 39 | 40 | .. code-block:: python 41 | 42 | from sklearn import datasets 43 | from sklearn.ensemble import RandomForestClassifier 44 | from sklearn.model_selection import train_test_split 45 | from sklearn.metrics import accuracy_score, precision_score 46 | 47 | from lab.experiment import Experiment # Import Experiment 48 | 49 | e = Experiment() # Initialise Lab Experiment 50 | 51 | @e.start_run # Indicate the start of the Experiment 52 | def train(): 53 | iris = datasets.load_iris() 54 | X = iris.data 55 | y = iris.target 56 | 57 | X_train, X_test, \ 58 | y_train, y_test = train_test_split(X, y, 59 | test_size=0.24, 60 | random_state=42) 61 | 62 | n_estimators = 100 63 | 64 | e.log_features(['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']) 65 | clf = RandomForestClassifier(n_estimators = n_estimators) 66 | clf.fit(X_train, y_train) 67 | 68 | y_pred = clf.predict(X_test) 69 | accuracy = accuracy_score(y_test, y_pred) 70 | precision = precision_score(y_test, y_pred, average = 'macro') 71 | 72 | e.log_metric('accuracy_score', accuracy) # Log accuracy 73 | e.log_metric('precision_score', precision) # Log aprecision 74 | 75 | e.log_parameter('n_estimators', n_estimators) # Log parameters of your choice 76 | 77 | e.log_model('randomforest', clf) # Log the actual model 78 | 79 | Running a Lab Experiment 80 | ------------------------ 81 | 82 | The Experiment can now be launched through: 83 | 84 | .. code-block:: bash 85 | 86 | lab run train.py 87 | 88 | Lab will log performance metrics and model files into appropriate Experiment folders. 89 | 90 | Compare Lab Experiments 91 | ------------------------ 92 | 93 | Multiple Experiments can be compared from the root of the Project folder: 94 | 95 | .. code-block:: bash 96 | 97 | lab ls 98 | 99 | Experiment Source Date accuracy_score precision_score 100 | ------------ ------------------ ---------- ---------------- ----------------- 101 | 49ffb76e train_mnist_mlp.py 2019-01-15 0.97: ██████████ 0.97: ██████████ 102 | 261a34e4 train_mnist_cnn.py 2019-01-15 0.98: ██████████ 0.98: ██████████ 103 | -------------------------------------------------------------------------------- /lab/experiment/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | import subprocess 4 | import yaml 5 | import shutil 6 | import sys 7 | import graphviz 8 | 9 | from lab.experiment import show_experiment 10 | from lab import is_lab_project, is_empty_project, is_venv 11 | 12 | 13 | @click.command('rm') 14 | @click.argument('experiment_id', required=True) 15 | def lab_rm(experiment_id): 16 | """ Remove a Lab Experiment """ 17 | 18 | is_lab_project() 19 | 20 | experiment_dir = os.path.join('experiments', experiment_id) 21 | logs_dir = os.path.join('logs', experiment_id) 22 | 23 | if not os.path.exists(experiment_dir): 24 | click.secho("Can't find experiment ["+experiment_id+'] in the current ' 25 | 'directory.\nEnsure that you are in Lab Project root', 26 | fg='red') 27 | else: 28 | shutil.rmtree(experiment_dir) 29 | shutil.rmtree(logs_dir) 30 | click.secho('['+experiment_id+'] removed', fg='blue') 31 | 32 | 33 | @click.command('show') 34 | @click.argument('experiment_id', required=False) 35 | def lab_show(experiment_id=None): 36 | """ Show a Lab Experiment """ 37 | 38 | is_lab_project() 39 | is_empty_project() 40 | 41 | models_directory = 'experiments' 42 | 43 | experiments = next(os.walk(models_directory))[1] 44 | 45 | if experiment_id is None: 46 | experiments = next(os.walk('experiments'))[1] 47 | p = graphviz.Digraph(name='lab_project', format='png') 48 | p.graph_attr['rankdir'] = 'LR' 49 | 50 | for e in experiments: 51 | p.subgraph(show_experiment(e)) 52 | else: 53 | experiment_dir = os.path.join('experiments', experiment_id) 54 | if not os.path.exists(experiment_dir): 55 | click.secho( 56 | "Can't find experiment ["+experiment_id+'] in the current ' 57 | 'directory.\nEnsure that you are in Lab Project root', 58 | fg='red') 59 | click.Abort() 60 | else: 61 | p = show_experiment(experiment_id) 62 | 63 | p.render() 64 | 65 | 66 | @click.command('run', context_settings=dict( 67 | ignore_unknown_options=True, 68 | )) 69 | @click.argument('script', required=False, 70 | nargs=-1, type=click.UNPROCESSED) 71 | def lab_run(script): 72 | """ Run a training script """ 73 | 74 | home_dir = os.getcwd() 75 | 76 | is_lab_project() 77 | is_venv(home_dir) 78 | 79 | try: 80 | with open(os.path.join(os.getcwd(), 81 | 'config', 'runtime.yaml'), 'r') as file: 82 | config = yaml.load(file) 83 | home_dir = config['path'] 84 | 85 | # Update project directory if it hasn't been updated 86 | if home_dir != os.getcwd(): 87 | config['path'] = os.getcwd() 88 | home_dir = config['path'] 89 | 90 | with open(os.path.join(os.getcwd(), 91 | 'config', 'runtime.yaml'), 'w') as file: 92 | yaml.dump(config, file, default_flow_style=False) 93 | 94 | except KeyError: 95 | click.secho('Looks like this Project was configured with an earlier ' 96 | 'version of Lab. Check that config/runtime.yaml file ' 97 | 'has a valid path key and value.', fg='red') 98 | raise click.Abort() 99 | 100 | # Extract lab version from virtual environment 101 | click.secho('Intializing', fg='cyan') 102 | 103 | python_bin = os.path.join(home_dir, '.venv', 'bin/python') 104 | 105 | click.secho('Running '+str(script), fg='green') 106 | subprocess.call([python_bin] + list(script)) 107 | click.secho('Finished!', fg='green') 108 | -------------------------------------------------------------------------------- /docs/source/dlexperiments.rst: -------------------------------------------------------------------------------- 1 | .. _dlexperiments: 2 | 3 | Managing Deep Learning Experiments 4 | ================================== 5 | 6 | Deep Learning experiment lifecycle generates a rich set of data artifacts, e.g., expansive datasets, complex model architectures, varied hyperparameters, learned weights, and training logs. To produce an effective model, a researcher often has to iterate over multiple scripts, making it challenging to reproduce complex experiments. 7 | 8 | Lab functionality offers a clean and standardised interface for managing the many moving parts of a Deep Learning experiment. 9 | 10 | MNIST Example 11 | ~~~~~~~~~~~~~~~~ 12 | 13 | Consider the following lab training script. Let's set up our hyperparameters and training, validation, testing sets: 14 | 15 | .. code-block:: python 16 | 17 | import keras 18 | from keras.datasets import mnist 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Dropout 21 | from keras.optimizers import RMSprop 22 | from keras.callbacks import TensorBoard 23 | 24 | import tempfile 25 | 26 | from sklearn.metrics import accuracy_score, precision_score 27 | 28 | from lab.experiment import Experiment 29 | 30 | BATCH_SIZE = 128 31 | EPOCHS = 20 32 | CHECKPOINT_PATH = 'tf/weights' 33 | num_classes = 10 34 | 35 | 36 | # the data, split between train and test sets 37 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 38 | 39 | x_train = x_train.reshape(60000, 784) 40 | x_test = x_test.reshape(10000, 784) 41 | x_train = x_train.astype('float32') 42 | x_test = x_test.astype('float32') 43 | x_train /= 255 44 | x_test /= 255 45 | print(x_train.shape[0], 'train samples') 46 | print(x_test.shape[0], 'test samples') 47 | 48 | # convert class vectors to binary class matrices 49 | y_train = keras.utils.to_categorical(y_train, num_classes) 50 | y_test = keras.utils.to_categorical(y_test, num_classes) 51 | 52 | 53 | Set up a simple model and train: 54 | 55 | .. code-block:: python 56 | 57 | e = Experiment() 58 | 59 | 60 | @e.start_run 61 | def train(): 62 | 63 | # Create a temporary directory for tensorboard logs 64 | output_dir = tempfile.mkdtemp() 65 | print("Writing TensorBoard events locally to %s\n" % output_dir) 66 | tensorboard = TensorBoard(log_dir=output_dir) 67 | 68 | # During Experiment execution, tensorboard can be viewed through: 69 | # tensorboard --logdir=[output_dir] 70 | 71 | model.fit(x_train, y_train, 72 | batch_size=BATCH_SIZE, 73 | epochs=EPOCHS, 74 | verbose=1, 75 | validation_data=(x_test, y_test), 76 | callbacks=[tensorboard]) 77 | 78 | model.save_weights(CHECKPOINT_PATH) 79 | 80 | y_prob = model.predict(x_test) 81 | y_classes = y_prob.argmax(axis=-1) 82 | actual = y_test.argmax(axis=-1) 83 | 84 | accuracy = accuracy_score(y_true=actual, y_pred=y_classes) 85 | precision = precision_score(y_true=actual, y_pred=y_classes, 86 | average='macro') 87 | 88 | # Log tensorboard 89 | e.log_artifacts('tensorboard', output_dir) 90 | e.log_artifacts('weights', CHECKPOINT_PATH) 91 | 92 | # Log all metrics 93 | e.log_metric('accuracy_score', accuracy) 94 | e.log_metric('precision_score', precision) 95 | 96 | # Log parameters 97 | e.log_parameter('batch_size', BATCH_SIZE) 98 | e.log_parameter('epochs', EPOCHS) 99 | 100 | When training on distributed systems with Horovod, `model.fit` element can be abstracted into a file, say `horovod-train.py` and called directly from the `train()` method: 101 | 102 | .. code-block:: python 103 | 104 | import subprocess 105 | 106 | args = ['-np', str(8), # 8 GPUs 107 | '-H', 'localhost:8', 'python', 108 | 'horovod-train.py', 109 | '--checkpoint', CHECKPOINT_PATH, 110 | '--batch-size', BATCH, 111 | '--epochs', EPOCHS] 112 | 113 | Note that you need to enable your Horovod script to accept some basic model hyperparameters that you wish to log downstream. -------------------------------------------------------------------------------- /docs/source/auto_examples/keras_mnist_mlp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "\nRunning Keras models with Tensorboard\n=====================================\n\nLab integrates into a typical keras workflow.\n\nWARNING: model persistence in Keras can be complicated, especially when\nworking with complext models. It is recommended to checkpoint each training\nepoch independently from Lab's ``log_model`` API.\n\nBering by creating a new Lab Project:\n\n >>> echo \"keras\" > requirements.txt\n >>> lab init --name simple-keras\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import keras\nfrom keras.datasets import mnist\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Dropout\nfrom keras.optimizers import RMSprop\nfrom keras.callbacks import TensorBoard\n\nimport tempfile\n\nfrom sklearn.metrics import accuracy_score, precision_score\n\nfrom lab.experiment import Experiment\n\nbatch_size = 128\nnum_classes = 10\nepochs = 20\n\n# the data, split between train and test sets\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nx_train = x_train.reshape(60000, 784)\nx_test = x_test.reshape(10000, 784)\nx_train = x_train.astype('float32')\nx_test = x_test.astype('float32')\nx_train /= 255\nx_test /= 255\nprint(x_train.shape[0], 'train samples')\nprint(x_test.shape[0], 'test samples')\n\n# convert class vectors to binary class matrices\ny_train = keras.utils.to_categorical(y_train, num_classes)\ny_test = keras.utils.to_categorical(y_test, num_classes)\n\nmodel = Sequential()\nmodel.add(Dense(512, activation='relu', input_shape=(784,)))\nmodel.add(Dropout(0.2))\nmodel.add(Dense(512, activation='relu'))\nmodel.add(Dropout(0.2))\nmodel.add(Dense(num_classes, activation='softmax'))\n\nmodel.compile(loss='categorical_crossentropy',\n optimizer=RMSprop(),\n metrics=['accuracy'])\n\ne = Experiment()\n\n\n@e.start_run\ndef train():\n\n # Create a temporary directory for tensorboard logs\n output_dir = tempfile.mkdtemp()\n print(\"Writing TensorBoard events locally to %s\\n\" % output_dir)\n tensorboard = TensorBoard(log_dir=output_dir)\n\n # During Experiment execution, tensorboard can be viewed through:\n # tensorboard --logdir=[output_dir]\n\n model.fit(x_train, y_train,\n batch_size=batch_size,\n epochs=epochs,\n verbose=1,\n validation_data=(x_test, y_test),\n callbacks=[tensorboard])\n\n y_prob = model.predict(x_test)\n y_classes = y_prob.argmax(axis=-1)\n actual = y_test.argmax(axis=-1)\n\n accuracy = accuracy_score(y_true=actual, y_pred=y_classes)\n precision = precision_score(y_true=actual, y_pred=y_classes,\n average='macro')\n\n # Log tensorboard\n e.log_artifacts('tensorboard', output_dir)\n\n # Log all metrics\n e.log_metric('accuracy_score', accuracy)\n e.log_metric('precision_score', precision)\n\n # Log parameters\n e.log_parameter('batch_size', batch_size)\n\n # Save model\n e.log_model('mnist-mlp', model)" 30 | ] 31 | } 32 | ], 33 | "metadata": { 34 | "kernelspec": { 35 | "display_name": "Python 3", 36 | "language": "python", 37 | "name": "python3" 38 | }, 39 | "language_info": { 40 | "codemirror_mode": { 41 | "name": "ipython", 42 | "version": 3 43 | }, 44 | "file_extension": ".py", 45 | "mimetype": "text/x-python", 46 | "name": "python", 47 | "nbconvert_exporter": "python", 48 | "pygments_lexer": "ipython3", 49 | "version": "3.7.5" 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 0 54 | } -------------------------------------------------------------------------------- /lab/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | import warnings 4 | import yaml 5 | import sys 6 | import pkg_resources 7 | 8 | from minio import Minio 9 | from urllib3.exceptions import MaxRetryError 10 | 11 | from lab.project import cli as lab_project 12 | from lab.experiment import cli as lab_experiment 13 | 14 | working_directory = os.getcwd() 15 | warnings.filterwarnings("ignore") 16 | 17 | 18 | @click.group() 19 | def cli(): 20 | """ 21 | Bering's Machine Learning Lab 22 | 23 | Copyright 2020 Bering Limited. https://beringresearch.com 24 | """ 25 | 26 | 27 | # Project 28 | cli.add_command(lab_project.lab_init) 29 | cli.add_command(lab_project.lab_push) 30 | cli.add_command(lab_project.lab_pull) 31 | cli.add_command(lab_project.lab_ls) 32 | cli.add_command(lab_project.lab_update) 33 | cli.add_command(lab_project.lab_notebook) 34 | 35 | # Experiment 36 | cli.add_command(lab_experiment.lab_run) 37 | cli.add_command(lab_experiment.lab_rm) 38 | cli.add_command(lab_experiment.lab_show) 39 | 40 | 41 | # Lab configuration 42 | @click.group() 43 | def config(): 44 | """ Global Lab configuration """ 45 | pass 46 | 47 | 48 | @click.command('info') 49 | def lab_info(): 50 | """ Display system-wide information """ 51 | import multiprocessing 52 | import platform 53 | 54 | lab_version = pkg_resources.require('lab-ml')[0].version 55 | 56 | system_version = str(sys.version_info[0]) + '.' + \ 57 | str(sys.version_info[1]) + \ 58 | '.' + str(sys.version_info[2]) 59 | home_dir = os.path.expanduser('~') 60 | lab_dir = os.path.join(home_dir, '.lab') 61 | 62 | # Test connection 63 | if not os.path.exists(lab_dir): 64 | n_minio_hosts = 0 65 | else: 66 | with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file: 67 | minio_config = yaml.safe_load(file) 68 | n_minio_hosts = len(minio_config.keys()) 69 | 70 | click.echo('😎 Lab version: '+str(lab_version)) 71 | click.echo('Minio hosts: '+str(n_minio_hosts)+'\n') 72 | click.echo('Operating System: '+platform.system()) 73 | click.echo('Python version: '+system_version) 74 | click.echo('CPUs: '+str(multiprocessing.cpu_count())) 75 | 76 | 77 | @click.command('minio') 78 | @click.option('--tag', type=str, help='helpful minio host tag', required=True) 79 | @click.option('--endpoint', type=str, help='minio endpoint address', 80 | required=True) 81 | @click.option('--accesskey', type=str, help='minio access key', required=True) 82 | @click.option('--secretkey', type=str, help='minio secret key', required=True) 83 | def minio_config(tag, endpoint, accesskey, secretkey): 84 | """ Setup remote minio host """ 85 | home_dir = os.path.expanduser('~') 86 | lab_dir = os.path.join(home_dir, '.lab') 87 | 88 | # Test connection 89 | if not os.path.exists(lab_dir): 90 | os.makedirs(lab_dir) 91 | 92 | try: 93 | minioClient = Minio(endpoint, 94 | access_key=accesskey, 95 | secret_key=secretkey, 96 | secure=False) 97 | minioClient.list_buckets() 98 | except MaxRetryError: 99 | click.secho('Cannot connect to minio instance. Check your credentials ' 100 | 'and hostname. Ensure that endpoint is not prefixed with' 101 | 'http or https.', fg='red') 102 | raise click.Abort() 103 | 104 | # Create configuration 105 | config = {'minio_endpoint': endpoint, 106 | 'minio_accesskey': accesskey, 107 | 'minio_secretkey': secretkey} 108 | 109 | if os.path.exists(os.path.join(lab_dir, 'config.yaml')): 110 | with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file: 111 | minio_config = yaml.safe_load(file) 112 | if tag in minio_config.keys(): 113 | click.secho('Host tag '+tag+' already exists in your ' 114 | 'configuration. Try a different name.', fg='red') 115 | raise click.Abort() 116 | 117 | minio_config[tag] = config 118 | else: 119 | minio_config = {} 120 | minio_config[tag] = config 121 | 122 | with open(os.path.join(lab_dir, 'config.yaml'), 'w') as file: 123 | yaml.safe_dump(minio_config, file, default_flow_style=False) 124 | 125 | 126 | cli.add_command(config) 127 | cli.add_command(lab_info) 128 | config.add_command(minio_config) 129 | 130 | if __name__ == '__main__': 131 | cli() 132 | -------------------------------------------------------------------------------- /docs/source/cli.rst: -------------------------------------------------------------------------------- 1 | .. _cli: 2 | 3 | ====================== 4 | Command Line Interface 5 | ====================== 6 | 7 | Lab is invoked through a simple Command Line Interface (CLI). 8 | 9 | .. code:: 10 | 11 | lab --help 12 | 13 | Usage: lab [OPTIONS] COMMAND [ARGS]... 14 | 15 | Bering's Machine Learning Lab 16 | 17 | Copyright 2020 Bering Limited. https://beringresearch.com 18 | 19 | Options: 20 | --help Show this message and exit. 21 | 22 | Commands: 23 | config Global Lab configuration 24 | info Display system-wide information 25 | init Initialise a new Lab Project 26 | ls Compare multiple Lab Experiments 27 | notebook Launch a jupyter notebook 28 | pull Pulls Lab Experiment from minio to current... 29 | push Push Lab Experiment to minio 30 | rm Remove a Lab Experiment 31 | run Run a training script 32 | show Show a Lab Experiment 33 | update Update Lab Environment from Project's... 34 | 35 | 36 | General Parameters 37 | ------------------ 38 | 39 | ``config`` ``minio`` 40 | ^^^^^^^^^^^^^^^^^^^^ 41 | 42 | Setup remote minio host 43 | 44 | .. code:: 45 | 46 | Usage: lab config minio [OPTIONS] 47 | 48 | Setup remote minio host 49 | 50 | Options: 51 | --tag TEXT helpful minio host tag [required] 52 | --endpoint TEXT minio endpoint address [required] 53 | --accesskey TEXT minio access key [required] 54 | --secretkey TEXT minio secret key [required] 55 | --help Show this message and exit. 56 | 57 | `tag` option is a helpful name to identify a minio endpoint. It can be used to quickly access push and pull APIs. 58 | 59 | ``info`` 60 | ^^^^^^^^ 61 | 62 | Display system-wide information, including Lab version, number of CPUs, etc. 63 | 64 | .. code:: 65 | 66 | Usage: lab info [OPTIONS] 67 | 68 | Project 69 | ------- 70 | 71 | ``init`` 72 | ^^^^^^^^ 73 | 74 | Initialise a new Lab Project. 75 | 76 | .. code:: 77 | 78 | Usage: lab init [OPTIONS] 79 | 80 | Options: 81 | --name TEXT environment name 82 | --help Show this message and exit. 83 | 84 | Command is run in the presence of a ``requirements.txt`` file that describes the Project environment. Lab will create a dedicate virtual environemnt in a ``.venv`` directory. 85 | 86 | ``ls`` 87 | ^^^^^^ 88 | 89 | List Lab Experiments and their performance metrics. 90 | 91 | .. code:: 92 | 93 | Usage: lab ls [OPTIONS] [SORT_BY] 94 | 95 | Options: 96 | --help Show this message and exit. 97 | 98 | Optional ``SORT_BY`` option is a string column name in the results table. For example, if a Lab Experiment logged a metric AUC, calling ``lab ls AUC`` sort all Experiments by decreasing AUC values. The default is to show the most recently completed Lab run. 99 | 100 | ``show`` 101 | ^^^^^^^^ 102 | 103 | Create a PNG file of experiment-data-script-hyperparameter-performance diagram. 104 | 105 | .. code:: 106 | 107 | Usage: lab show 108 | 109 | Options: 110 | --help Show this message and exit. 111 | 112 | ``notebook`` 113 | ^^^^^^^^^^^^ 114 | Lancuhes a jupyter notebook, pointing to the ``notebooks`` directory. If this is the first time launching the notebook, Lab will automatically create a jupyter kernel using the ``requirements.txt`` file. Kernel name is stored on your system as TIMESTAMP_PROJECT_NAME. 115 | 116 | ``update`` 117 | ^^^^^^^^^^ 118 | Updates the Lab project. Can be run if the local Lab version was updated or if ``requirements.txt`` has been modified with additional dependencies. 119 | 120 | Experiment 121 | ---------- 122 | 123 | ``run`` 124 | ^^^^^^^ 125 | Execute a Lab Experiment. 126 | 127 | .. code:: 128 | 129 | Usage: lab run [OPTIONS] [SCRIPT]... 130 | 131 | Options: 132 | --help Show this message and exit. 133 | 134 | ``rm`` 135 | ^^^^^^ 136 | 137 | Remove a Lab Experiment 138 | 139 | .. code:: 140 | 141 | Usage: lab rm [OPTIONS] EXPERIMENT_ID 142 | 143 | EXPERIMENT_ID can be obtained by running ``lab ls`` inside the Project directory. 144 | 145 | Model Management 146 | ---------------- 147 | 148 | ``push`` 149 | ^^^^^^^^ 150 | 151 | Push Lab Project to a configured minio repository. 152 | 153 | .. code:: 154 | 155 | lab push --tag [MINIO_TAG] --bucket [TEXT] --force. 156 | 157 | 158 | ``pull`` 159 | ^^^^^^^^ 160 | 161 | Pull a Lab Project from a configured minio repository. 162 | 163 | .. code:: 164 | 165 | lab pull --tag [MINIO_TAG] --bucket [TEXT] --project [TEXT] --force. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Documentation Status](https://readthedocs.org/projects/bering-ml-lab/badge/?version=latest)](https://bering-ml-lab.readthedocs.io/en/latest/?badge=latest) 2 | 3 | # Machine Learning Lab 4 | 5 | A lightweight command line interface for the management of arbitrary machine learning tasks. 6 | 7 | Documentation is available at: 8 | 9 | NOTE: Lab is in active development - expect a bumpy ride! 10 | 11 | ![alt text](https://github.com/beringresearch/lab/blob/master/docs/source/_static/lab_screenshot.jpeg "Bering's Lab") 12 | 13 | ## Installation 14 | 15 | The latest stable version can be installed directly from PyPi: 16 | 17 | ```bash 18 | pip install lab-ml 19 | ``` 20 | 21 | Development version can be installed from github. 22 | 23 | ```bash 24 | git clone https://github.com/beringresearch/lab 25 | cd lab 26 | pip install --editable . 27 | ``` 28 | 29 | ## Concepts 30 | 31 | Lab employs three concepts: __reproducible environment__, __logging__, and __model persistence__. 32 | A typical machine learning workflow can be turned into a Lab Experiment by adding a single decorator. 33 | 34 | ## Creating a new Lab Project 35 | 36 | ```bash 37 | lab init --name [NAME] 38 | ``` 39 | 40 | Lab will look for a **requirements.txt** file in the working directory to generate a portable virtual environment for ML experiments. 41 | 42 | ## Setting up a Lab Experiment 43 | 44 | Here's a simple script that trains an SVM classifier on the iris data set: 45 | 46 | ```python 47 | from sklearn import svm, datasets 48 | from sklearn.model_selection import train_test_split 49 | from sklearn.metrics import accuracy_score, precision_score 50 | 51 | C = 1.0 52 | gamma = 0.7 53 | iris = datasets.load_iris() 54 | X = iris.data 55 | y = iris.target 56 | 57 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.24, random_state=42) 58 | 59 | clf = svm.SVC(C, 'rbf', gamma=gamma, probability=True) 60 | clf.fit(X_train, y_train) 61 | 62 | y_pred = clf.predict(X_test) 63 | accuracy = accuracy_score(y_test, y_pred) 64 | precision = precision_score(y_test, y_pred, average = 'macro') 65 | ``` 66 | 67 | It's trivial to create a Lab Experiment using a simple decorator: 68 | 69 | ```python 70 | from sklearn import svm, datasets 71 | from sklearn.model_selection import train_test_split 72 | from sklearn.metrics import accuracy_score, precision_score 73 | 74 | from lab.experiment import Experiment ## New Line 75 | 76 | e = Experiment() ## New Line 77 | 78 | @e.start_run ## New Line 79 | def train(): 80 | C = 1.0 81 | gamma = 0.7 82 | iris = datasets.load_iris() 83 | X = iris.data 84 | y = iris.target 85 | 86 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.24, random_state=42) 87 | 88 | clf = svm.SVC(C, 'rbf', gamma=gamma, probability=True) 89 | clf.fit(X_train, y_train) 90 | 91 | y_pred = clf.predict(X_test) 92 | accuracy = accuracy_score(y_test, y_pred) 93 | precision = precision_score(y_test, y_pred, average = 'macro') 94 | 95 | e.log_metric('accuracy_score', accuracy) ## New Line 96 | e.log_metric('precision_score', precision) ## New Line 97 | 98 | e.log_parameter('C', C) ## New Line 99 | e.log_parameter('gamma', gamma) ## New Line 100 | 101 | e.log_model('svm', clf) ## New Line 102 | ``` 103 | 104 | ## Running an Experiment 105 | 106 | Lab Experiments can be run as: 107 | 108 | ```bash 109 | lab run 110 | ``` 111 | 112 | ## Comparing models 113 | 114 | Lab assumes that all Experiments associated with a Project log consistent performance metrics. We can quickly assess performance of each experiment by running: 115 | 116 | ```bash 117 | lab ls 118 | 119 | Experiment Source Date accuracy_score precision_score 120 | ------------ ------------------ ---------- ---------------- ----------------- 121 | 49ffb76e train_mnist_mlp.py 2019-01-15 0.97: ██████████ 0.97: ██████████ 122 | 261a34e4 train_mnist_cnn.py 2019-01-15 0.98: ██████████ 0.98: ██████████ 123 | ``` 124 | 125 | ## Pushing models to a centralised repository 126 | 127 | Lab experiments can be pushed to a centralised filesystem through integration with [minio](https://minio.io). Lab assumes that you have setup minio on a private cloud. 128 | 129 | Lab can be configured once to interface with a remote minio instance: 130 | 131 | ```bash 132 | lab config minio --tag my-minio --endpoint [URL:PORT] --accesskey [STRING] --secretkey [STRING] 133 | ``` 134 | 135 | To push a local lab experiment to minio: 136 | 137 | ```bash 138 | lab push --tag my-minio --bucket [BUCKETNAME] . 139 | ``` 140 | 141 | Copyright 2020, Bering Limited 142 | -------------------------------------------------------------------------------- /docs/source/auto_examples/keras_mnist_mlp.rst: -------------------------------------------------------------------------------- 1 | .. note:: 2 | :class: sphx-glr-download-link-note 3 | 4 | Click :ref:`here ` to download the full example code 5 | .. rst-class:: sphx-glr-example-title 6 | 7 | .. _sphx_glr_auto_examples_keras_mnist_mlp.py: 8 | 9 | 10 | Running Keras models with Tensorboard 11 | ===================================== 12 | 13 | Lab integrates into a typical keras workflow. 14 | 15 | WARNING: model persistence in Keras can be complicated, especially when 16 | working with complext models. It is recommended to checkpoint each training 17 | epoch independently from Lab's ``log_model`` API. 18 | 19 | Bering by creating a new Lab Project: 20 | 21 | >>> echo "keras" > requirements.txt 22 | >>> lab init --name simple-keras 23 | 24 | 25 | .. code-block:: default 26 | 27 | 28 | import keras 29 | from keras.datasets import mnist 30 | from keras.models import Sequential 31 | from keras.layers import Dense, Dropout 32 | from keras.optimizers import RMSprop 33 | from keras.callbacks import TensorBoard 34 | 35 | import tempfile 36 | 37 | from sklearn.metrics import accuracy_score, precision_score 38 | 39 | from lab.experiment import Experiment 40 | 41 | batch_size = 128 42 | num_classes = 10 43 | epochs = 20 44 | 45 | # the data, split between train and test sets 46 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 47 | 48 | x_train = x_train.reshape(60000, 784) 49 | x_test = x_test.reshape(10000, 784) 50 | x_train = x_train.astype('float32') 51 | x_test = x_test.astype('float32') 52 | x_train /= 255 53 | x_test /= 255 54 | print(x_train.shape[0], 'train samples') 55 | print(x_test.shape[0], 'test samples') 56 | 57 | # convert class vectors to binary class matrices 58 | y_train = keras.utils.to_categorical(y_train, num_classes) 59 | y_test = keras.utils.to_categorical(y_test, num_classes) 60 | 61 | model = Sequential() 62 | model.add(Dense(512, activation='relu', input_shape=(784,))) 63 | model.add(Dropout(0.2)) 64 | model.add(Dense(512, activation='relu')) 65 | model.add(Dropout(0.2)) 66 | model.add(Dense(num_classes, activation='softmax')) 67 | 68 | model.compile(loss='categorical_crossentropy', 69 | optimizer=RMSprop(), 70 | metrics=['accuracy']) 71 | 72 | e = Experiment() 73 | 74 | 75 | @e.start_run 76 | def train(): 77 | 78 | # Create a temporary directory for tensorboard logs 79 | output_dir = tempfile.mkdtemp() 80 | print("Writing TensorBoard events locally to %s\n" % output_dir) 81 | tensorboard = TensorBoard(log_dir=output_dir) 82 | 83 | # During Experiment execution, tensorboard can be viewed through: 84 | # tensorboard --logdir=[output_dir] 85 | 86 | model.fit(x_train, y_train, 87 | batch_size=batch_size, 88 | epochs=epochs, 89 | verbose=1, 90 | validation_data=(x_test, y_test), 91 | callbacks=[tensorboard]) 92 | 93 | y_prob = model.predict(x_test) 94 | y_classes = y_prob.argmax(axis=-1) 95 | actual = y_test.argmax(axis=-1) 96 | 97 | accuracy = accuracy_score(y_true=actual, y_pred=y_classes) 98 | precision = precision_score(y_true=actual, y_pred=y_classes, 99 | average='macro') 100 | 101 | # Log tensorboard 102 | e.log_artifacts('tensorboard', output_dir) 103 | 104 | # Log all metrics 105 | e.log_metric('accuracy_score', accuracy) 106 | e.log_metric('precision_score', precision) 107 | 108 | # Log parameters 109 | e.log_parameter('batch_size', batch_size) 110 | 111 | # Save model 112 | e.log_model('mnist-mlp', model) 113 | 114 | 115 | .. rst-class:: sphx-glr-timing 116 | 117 | **Total running time of the script:** ( 0 minutes 0.000 seconds) 118 | 119 | 120 | .. _sphx_glr_download_auto_examples_keras_mnist_mlp.py: 121 | 122 | 123 | .. only :: html 124 | 125 | .. container:: sphx-glr-footer 126 | :class: sphx-glr-footer-example 127 | 128 | 129 | 130 | .. container:: sphx-glr-download 131 | 132 | :download:`Download Python source code: keras_mnist_mlp.py ` 133 | 134 | 135 | 136 | .. container:: sphx-glr-download 137 | 138 | :download:`Download Jupyter notebook: keras_mnist_mlp.ipynb ` 139 | 140 | 141 | .. only:: html 142 | 143 | .. rst-class:: sphx-glr-signature 144 | 145 | `Gallery generated by Sphinx-Gallery `_ 146 | -------------------------------------------------------------------------------- /docs/source/logging.rst: -------------------------------------------------------------------------------- 1 | .. _logging: 2 | 3 | Tracking Machine Learning Experiments 4 | ===================================== 5 | 6 | The Lab logging component was designed to interface directly with your training code without disrupting the machine learning workflow. 7 | Currently, users can keep track of the following experiment artfacts: 8 | 9 | - ``e.log_features``: Feature names 10 | - ``e.log_parameter``: Hyperparameters 11 | - ``e.log_metric``: Performance metrics 12 | - ``e.log_artifact``: Experimental artifacts 13 | - ``e.log_model``: Model persistence 14 | 15 | Feature names 16 | ------------- 17 | Data features are simply lists of feature names or column indices. Consider the snippet: 18 | 19 | .. code-block:: python 20 | 21 | from sklearn import datasets 22 | 23 | iris = datasets.load_iris() 24 | feature_names = iris['feature_names'] 25 | 26 | print(feature_names) 27 | 28 | ['sepal length (cm)', 29 | 'sepal width (cm)', 30 | 'petal length (cm)', 31 | 'petal width (cm)'] 32 | 33 | We can log these features by adding a few lines of code: 34 | 35 | .. code-block:: python 36 | 37 | from sklearn import datasets 38 | from lab.experiment import Experiment #import lab Experiment 39 | 40 | e = Experiment() 41 | 42 | # Initialize Lab Experiment 43 | @e.start_run 44 | def train(): 45 | iris = datasets.load_iris() 46 | feature_names = iris['feature_names'] 47 | 48 | # Log features 49 | e.log_features(feature_names) 50 | 51 | Hyperparameters: ``e.log_parameter`` 52 | ------------------------------------- 53 | 54 | Let's carry on with the Iris dataset and consider a Random Forest Classifier with an exhaustive grid search along the number of trees and maximum depth of a tree: 55 | 56 | .. code-block:: python 57 | 58 | from sklearn import datasets 59 | from sklearn.ensemble import RandomForestClassifier 60 | from sklearn.model_selection import GridSearchCV 61 | from lab.experiment import Experiment #import lab Experiment 62 | 63 | e = Experiment() 64 | 65 | # Initialize Lab Experiment 66 | @e.start_run 67 | def train(): 68 | iris = datasets.load_iris() 69 | 70 | feature_names = iris['feature_names'] 71 | 72 | # Log features 73 | e.log_features(feature_names) 74 | 75 | parameters = {'n_estimators': [10, 50, 100], 76 | 'max_depth': [2, 4]} 77 | 78 | rfc = RandomForestClassifier() 79 | 80 | # Run a grid search 81 | clf = GridSearchCV(rfc, parameters) 82 | clf.fit(iris.data, iris.target) 83 | 84 | best_parameters = clf.best_estimator_.get_params() 85 | 86 | # Log parameters 87 | e.log_parameter('n_estimators', best_parameters['n_estimators']) 88 | e.log_parameter('max_depth', best_parameters['max_depth']) 89 | 90 | Performance Metrics: ``e.log_metric`` 91 | ------------------------------------- 92 | 93 | Lab was designed to easily compare multiple machine lerning experiments through consistent performance metrics. 94 | Let's expand our example and assess model accuracy and precision. 95 | 96 | .. code-block:: python 97 | 98 | from sklearn import datasets 99 | from sklearn.ensemble import RandomForestClassifier 100 | from sklearn.model_selection import GridSearchCV 101 | from sklearn.model_selection import train_test_split 102 | from sklearn.metrics import accuracy_score, precision_score 103 | from lab.experiment import Experiment 104 | 105 | e = Experiment() 106 | 107 | # Initialize Lab Experiment 108 | @e.start_run 109 | def train(): 110 | iris = datasets.load_iris() 111 | 112 | feature_names = iris['feature_names'] 113 | 114 | # Log features 115 | e.log_features(feature_names) 116 | 117 | parameters = {'n_estimators': [10, 50, 100], 118 | 'max_depth': [2, 4]} 119 | 120 | # Run a grid search 121 | rfc = RandomForestClassifier() 122 | clf = GridSearchCV(rfc, parameters) 123 | clf.fit(iris.data, iris.target) 124 | 125 | best_parameters = clf.best_estimator_.get_params() 126 | 127 | # Log parameters 128 | e.log_parameter('n_estimators', best_parameters['n_estimators']) 129 | e.log_parameter('max_depth', best_parameters['max_depth']) 130 | 131 | X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, 132 | test_size=0.25, random_state=42) 133 | 134 | rfc = RandomForestClassifer(n_estimators = best_parameters['n_estimators'], 135 | max_depth = best_parameters['max_depth']) 136 | rfc.fit(X_train, y_train) 137 | 138 | # Generate predictions 139 | y_pred = rfc.predict(X_test) 140 | accuracy = accuracy_score(y_test, y_pred) 141 | precision = precision_score(y_test, y_pred, average = 'macro') 142 | 143 | # Log performance metrics 144 | e.log_metric('accuracy_score', accuracy) 145 | e.log_metric('precision_score', precision) 146 | 147 | Experiment Artifacts: ``e.log_artifact`` 148 | ---------------------------------------- 149 | 150 | In certain cases, it may be desirable for a Lab Experiment to write certain artifacts to a temporary folder - e.g. 151 | ROC curves or Tensorboard log directory. Lab naturally bundles these artifacts within each respective experiment for subsequent exploration. 152 | 153 | Let's explore an example where Lab logs Tensorboard outputs: 154 | 155 | .. code-block:: python 156 | 157 | # Additional imports would go here 158 | from keras.callbacks import TensorBoard 159 | import tempfile 160 | 161 | from lab.experiment import Experiment 162 | 163 | e = Experiment() 164 | 165 | @e.start_run 166 | def train(): 167 | 168 | # ... Further training code goes here 169 | 170 | # Create a temporary directory for tensorboard logs 171 | output_dir = dirpath = tempfile.mkdtemp() 172 | print("Writing TensorBoard events locally to %s\n" % output_dir) 173 | 174 | tensorboard = TensorBoard(log_dir=output_dir) 175 | 176 | model.fit(x_train, y_train, 177 | batch_size=batch_size, 178 | epochs=epochs, 179 | verbose=1, 180 | validation_data=(x_test, y_test), 181 | callbacks=[tensorboard]) 182 | 183 | # Log tensorboard artifact 184 | e.log_artifact('tensorboard', output_dir) 185 | 186 | 187 | In this example, Tensorboard logs are written to a temporary folder, which can be tracked in real-time. Once the run is complete, 188 | Lab moves all the directory content into a subdirectory of the current Lab Experiment. 189 | 190 | 191 | Model Persistence: ``e.log_model`` 192 | ---------------------------------- 193 | 194 | Finally, it's useful to store model objects themselves for future use. Consider our fitted GridSearchCV object ``clf`` from an earlier example. 195 | It can now be logged using a simple expression: 196 | 197 | .. code-block:: python 198 | 199 | e.log_model('GridSearchCV', clf) 200 | -------------------------------------------------------------------------------- /lab/experiment/experiment.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import uuid 3 | import os 4 | import sys 5 | import yaml 6 | import numpy 7 | import warnings 8 | import joblib 9 | import graphviz 10 | import json 11 | from distutils.dir_util import copy_tree 12 | 13 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 14 | 15 | _DEFAULT_USER_ID = 'unknown' 16 | 17 | 18 | class Experiment(): 19 | def __init__(self, dataset=''): 20 | """ Fundamental Lab class for managing a machine learning experiment. 21 | 22 | :param str dataset: description or tag of a dataset used for training. 23 | """ 24 | 25 | self.dataset = dataset 26 | 27 | def create_run(self, run_uuid=None, user_id=None, home_dir=None, 28 | timestamp=None, metrics=None, parameters=None, 29 | source=None, feature_names=None, models=dict(), 30 | artifacts=dict()): 31 | """ Initialise a Lab experiment run 32 | """ 33 | 34 | self.uuid = str(uuid.uuid4())[:8] 35 | self.user_id = _get_user_id() 36 | self.timestamp = timestamp 37 | self.metrics = metrics 38 | self.parameters = parameters 39 | self.feature_names = feature_names 40 | self.source = ' '.join(sys.argv) 41 | self.home_dir = os.path.dirname( 42 | os.path.dirname( 43 | os.path.dirname(sys.argv[0]))) 44 | self.models = models 45 | self.artifacts = artifacts 46 | 47 | def start_run(self, fun): 48 | """ Start run and log experiment data as it becomes available. 49 | """ 50 | self.create_run(user_id=_get_user_id(), 51 | timestamp=datetime.datetime.now()) 52 | run_uuid = self.uuid 53 | 54 | models_directory = os.path.join(self.home_dir, 'experiments', run_uuid) 55 | logs_directory = os.path.join(self.home_dir, 'logs', run_uuid) 56 | 57 | fun() 58 | 59 | os.makedirs(logs_directory) 60 | os.makedirs(models_directory) 61 | 62 | # Log run metadata 63 | meta_file = os.path.join(logs_directory, 'meta.yaml') 64 | with open(meta_file, 'w') as file: 65 | meta = {'artifact_uri': os.path.dirname( 66 | os.path.abspath(models_directory)), 67 | 'source': self.source, 68 | 'start_time': self.timestamp, 69 | 'end_time': datetime.datetime.now(), 70 | 'experiment_uuid': self.uuid, 71 | 'dataset': self.dataset, 72 | 'user_id': self.user_id} 73 | yaml.dump(meta, file, default_flow_style=False) 74 | 75 | # Log metrics 76 | metrics_file = os.path.join(models_directory, 'metrics.yaml') 77 | with open(metrics_file, 'w') as file: 78 | yaml.dump(self.metrics, file, default_flow_style=False) 79 | 80 | # Log parameters 81 | parameters_file = os.path.join(models_directory, 'parameters.yaml') 82 | with open(parameters_file, 'w') as file: 83 | yaml.dump(self.parameters, file, default_flow_style=False) 84 | 85 | # Log features 86 | feature_file = os.path.join(models_directory, 'features.yaml') 87 | with open(feature_file, 'w') as file: 88 | yaml.dump(self.feature_names, file, default_flow_style=False) 89 | 90 | # Log models 91 | for filename in self.models.keys(): 92 | model_file = os.path.join(models_directory, filename+'.joblib') 93 | joblib.dump(self.models[filename], model_file) 94 | 95 | # Log artifacts 96 | for artifact in self.artifacts.keys(): 97 | destination = os.path.join(models_directory, artifact) 98 | copy_tree(self.artifacts[artifact], destination) 99 | 100 | def log_artifact(self, key, value): 101 | """ Log model artifacts 102 | """ 103 | self.artifacts[key] = value 104 | 105 | def log_features(self, feature_names): 106 | """ Log feature names 107 | """ 108 | self.feature_names = list(feature_names) 109 | 110 | def log_metric(self, key, value): 111 | """ Log performance metrics 112 | """ 113 | value = numpy.array(value) 114 | logged_metric = {} 115 | logged_metric[key] = value.tolist() 116 | 117 | if self.metrics is None: 118 | self.metrics = logged_metric 119 | else: 120 | self.metrics[key] = value.tolist() 121 | 122 | def log_parameter(self, key, value): 123 | """ Log model hyperparameters 124 | """ 125 | value = numpy.array(value) 126 | logged_parameter = {} 127 | logged_parameter[key] = value.tolist() 128 | 129 | if self.parameters is None: 130 | self.parameters = logged_parameter 131 | else: 132 | self.parameters[key] = value.tolist() 133 | 134 | def log_model(self, key, value): 135 | """Serialize the model 136 | """ 137 | self.models[key] = value 138 | 139 | def view(self): 140 | """ View lab project as a graphviz graph. 141 | """ 142 | return show_experiment(self.uuid) 143 | 144 | 145 | def show_experiment(experiment_id): 146 | try: 147 | logs = yaml.load(open(os.path.join('logs', experiment_id, 148 | 'meta.yaml'), 'r')) 149 | if logs['dataset'] is None: 150 | logs['dataset'] = 'N/A' 151 | except FileNotFoundError: 152 | print('Not a valid lab experiment') 153 | 154 | col = _get_graphviz_colour() 155 | 156 | try: 157 | metrics = yaml.load(open(os.path.join('experiments', experiment_id, 158 | 'metrics.yaml'), 'r')) 159 | except FileNotFoundError: 160 | metrics = {'Metrics': 'None'} 161 | 162 | try: 163 | parameters = yaml.load(open(os.path.join('experiments', experiment_id, 164 | 'parameters.yaml'), 'r')) 165 | except FileNotFoundError: 166 | parameters = {'Parameter': 0.0} 167 | 168 | # Set defaults for empty values 169 | if parameters is None: 170 | parameters = {'Parameter': 0.0} 171 | 172 | # Extract only the source file name without arguments 173 | source = logs['source'].split(' ')[0] 174 | 175 | dot = graphviz.Digraph(format='png', 176 | name=logs['experiment_uuid'], 177 | node_attr={'shape': 'record'}) 178 | 179 | dot.attr('node', color=col) 180 | dot.attr('edge', color=col) 181 | 182 | dataset_id = logs['dataset'] 183 | #source_id = experiment_id+'_'+logs['source'] 184 | source_id= source 185 | parameters_id = 'struct_'+experiment_id+'_parameters' 186 | metrics_id = experiment_id+'_performance' 187 | 188 | dot.node(experiment_id, logs['experiment_uuid'], shape='Mdiamond') 189 | dot.node(dataset_id, logs['dataset'], shape='Msquare') 190 | dot.node(source_id, source, shape='rectangle') 191 | 192 | dot.edge(experiment_id, dataset_id) 193 | dot.edge(dataset_id, source_id) 194 | 195 | with dot.subgraph(name='cluster_hyperparameters_'+experiment_id) as c: 196 | c.attr(label='Hyperparameters') 197 | c.attr('node', shape='Mrecord') 198 | c.attr(color='transparent') 199 | text = '{'+json.dumps(parameters).replace(',', '|')+'}' 200 | text = text.replace('"', '') 201 | c.node(parameters_id, text) 202 | 203 | with dot.subgraph(name='cluster_performance_'+experiment_id) as c: 204 | c.attr(label='Metrics') 205 | c.attr('node', shape='Mrecord') 206 | c.attr(color='transparent') 207 | text = '{'+json.dumps(metrics).replace(',', '|')+'}' 208 | text = text.replace('"', '') 209 | c.node(metrics_id, text) 210 | 211 | dot.edge(source_id, parameters_id) 212 | dot.edge(parameters_id, metrics_id) 213 | 214 | return dot 215 | 216 | 217 | def _get_user_id(): 218 | """Get the ID of the user for the current run.""" 219 | try: 220 | import pwd 221 | import os 222 | return pwd.getpwuid(os.getuid())[0] 223 | except ImportError: 224 | return _DEFAULT_USER_ID 225 | 226 | 227 | def _get_graphviz_colour(): 228 | colour_list = ['antiquewhite4', 'aquamarine4', 'azure4', 'bisque4', 229 | 'black', 'blue', 'blueviolet', 'brown', 'burlywood', 230 | 'cadetblue', 'chartreuse3', 'chartreuse4', 'chocolate4', 231 | 'coral', 'coral3', 'cornflowerblue', 'cornsilk4', 232 | 'crimson', 'cyan', 'darkgreen', 'darkorange1', 'deeppink1', 233 | 'deepskyblue1', 'dodgerblue', 'firebrick', 'forestgreen', 234 | 'goldenrod', 'goldenrod4', 'hotpink', 'indigo', 235 | 'khaki4', 'lightcoral', 'lightslateblue', 'lightsteelblue4', 236 | 'maroon', 'midnightblue', 'orangered4', 'palevioletred', 237 | 'sienna3', 'tomato', 'violetred1'] 238 | choice = numpy.random.choice(list(range(len(colour_list))), 239 | replace=False) 240 | return colour_list[choice] 241 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2020 Bering Limited. All rights reserved. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /lab/project/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | import uuid 3 | import glob 4 | import os 5 | import sys 6 | import datetime 7 | import yaml 8 | import subprocess 9 | import shutil 10 | from minio import Minio 11 | from minio.error import S3Error 12 | 13 | import tabulate 14 | import pandas as pd 15 | import numpy as np 16 | 17 | from lab import is_empty_project, is_lab_project, create_venv,\ 18 | check_minio_config 19 | 20 | 21 | @click.command('ls') 22 | @click.argument('sort_by', required=False) 23 | def lab_ls(sort_by=None): 24 | """ Compare multiple Lab Experiments """ 25 | models_directory = 'experiments' 26 | logs_directory = 'logs' 27 | TICK = '█' 28 | 29 | is_lab_project() 30 | is_empty_project() 31 | 32 | experiments = next(os.walk(models_directory))[1] 33 | comparisons = [] 34 | 35 | # Get unique metric names 36 | metrics_names = [] 37 | for e in experiments: 38 | metrics_file = os.path.join(models_directory, e, 'metrics.yaml') 39 | with open(metrics_file, 'r') as file: 40 | metrics = yaml.load(file) 41 | metrics_names.append(list(metrics.keys())) 42 | 43 | metrics_names = list(set(metrics_names[0]).intersection(*metrics_names)) 44 | 45 | 46 | # Get all experiments 47 | for e in experiments: 48 | metrics_file = os.path.join(models_directory, e, 'metrics.yaml') 49 | try: 50 | with open(metrics_file, 'r') as file: 51 | metrics = yaml.load(file) 52 | for k, v in metrics.items(): 53 | metrics[k] = round(v, 2) 54 | 55 | metrics = {k: metrics[k] for k in metrics_names} 56 | metrics_list = list(metrics.values()) 57 | 58 | meta_file = os.path.join(logs_directory, e, 'meta.yaml') 59 | with open(meta_file, 'r') as file: 60 | meta = yaml.load(file) 61 | 62 | # Truncate source name if too long 63 | source_name = meta['source'] 64 | meta['source'] = (source_name[:20] + 65 | '..') if len(source_name) > 20 else source_name 66 | 67 | record = [meta['experiment_uuid'], meta['source'], 68 | meta['start_time'].strftime("%m/%d/%Y, %H:%M:%S")] + \ 69 | metrics_list 70 | comparisons.append(record) 71 | except FileNotFoundError: 72 | pass 73 | 74 | # Create visualisation of numeric metrics 75 | A = pd.DataFrame(comparisons) 76 | meta_data = A[[0, 1, 2]] 77 | metrics_data = A.drop([0, 1, 2], axis=1) 78 | 79 | row_max = metrics_data.abs().max(axis=0) 80 | scaled_metrics_data = metrics_data.abs().divide(row_max, axis=1) 81 | scaled_metrics_data = scaled_metrics_data.fillna(value=0) 82 | 83 | sparklines = np.empty(shape=metrics_data.shape, dtype=object) 84 | for row in range(metrics_data.shape[0]): 85 | for column in range(metrics_data.shape[1]): 86 | value = metrics_data.iloc[row, column] 87 | scaled_value = scaled_metrics_data.iloc[row, column] 88 | scaled_value = scaled_value 89 | spark = (format(value, '.2f') + ': ' + 90 | TICK * int(round(scaled_value*10))) 91 | sparklines[row, column] = spark 92 | 93 | result = pd.concat([meta_data, pd.DataFrame(sparklines)], axis=1) 94 | result.columns = (['Experiment', 'Source', 'Date'] + 95 | list(metrics.keys())) 96 | 97 | result.sort_values(by=['Date'], axis=0, ascending=False, 98 | inplace=True) 99 | 100 | if sort_by is not None: 101 | result.sort_values(by=[sort_by], axis=0, 102 | ascending=False, inplace=True) 103 | 104 | header = ['Experiment', 'Source', 'Date'] + list(metrics.keys()) 105 | click.echo('') 106 | click.echo(tabulate.tabulate(result.values, headers=header)) 107 | 108 | # Check the last time lab project was synced with minio 109 | with open(os.path.join('config', 'runtime.yaml'), 'r') as file: 110 | minio_config = yaml.load(file) 111 | push_time = datetime.datetime.fromtimestamp(0) 112 | try: 113 | push_time = \ 114 | datetime.datetime.strptime( 115 | minio_config['last_push'], 116 | '%Y-%m-%d %H:%M:%S.%f') 117 | 118 | now_time = datetime.datetime.now() 119 | td = now_time-push_time 120 | (days, hours) = (td.days, td.seconds//3600) 121 | except Exception: 122 | (days, hours) = (0, 0) 123 | 124 | click.secho('\nLast push: '+str(days)+'d, ' + str(hours)+'h ago', 125 | fg='yellow') 126 | 127 | # Find the latest file and print its timestamp 128 | list_of_files = glob.glob(os.path.join(os.getcwd(), '*')) 129 | latest_file = max(list_of_files, key=os.path.getctime) 130 | latest_file_timestamp = \ 131 | datetime.datetime.fromtimestamp(os.path.getmtime(latest_file)) 132 | 133 | recommend = '| Project is in sync with remote' 134 | if latest_file_timestamp > push_time: 135 | recommend = ' | Recommend to run ' 136 | click.secho('Last modified: '+str(latest_file_timestamp)+recommend, 137 | fg='yellow') 138 | 139 | 140 | @click.command(name='notebook') 141 | def lab_notebook(): 142 | """ Publish Lab project as a jupyter kernel """ 143 | is_lab_project() 144 | 145 | with open(os.path.join(os.getcwd(), 146 | 'config', 'runtime.yaml'), 'r') as file: 147 | config = yaml.load(file) 148 | project_name = config['name'] + '_' +\ 149 | ''.join(e for e in config['timestamp'] if e.isalnum()) 150 | 151 | click.secho('Generating jupyter kernel for ' + config['name'] + '...', 152 | fg='cyan') 153 | 154 | try: 155 | _install_jupyter_kernel(project_name) 156 | click.secho('Kernel generated: ' + project_name) 157 | except Exception as e: 158 | print(e) 159 | click.secho('Failed to generate kernel.', fg='red') 160 | 161 | 162 | def _install_jupyter_kernel(project_name): 163 | 164 | venv_dir = os.path.join(os.getcwd(), '.venv') 165 | subprocess.call([venv_dir + '/bin/pip', 'install', 'ipykernel']) 166 | subprocess.call([venv_dir + '/bin/ipython', 'kernel', 'install', 167 | '--user', '--name='+project_name]) 168 | 169 | 170 | @click.command(name='init') 171 | @click.option('--name', type=str, default=str(uuid.uuid4()), 172 | help='environment name') 173 | def lab_init(name): 174 | """ Initialise a new Lab Project """ 175 | if not os.path.isfile('requirements.txt'): 176 | click.secho('requirements.txt is not found in the ' 177 | 'current working directory.', fg='red') 178 | raise click.Abort() 179 | 180 | if os.path.isdir(name): 181 | click.secho('Project '+name+' already exists.', fg='red') 182 | raise click.Abort() 183 | else: 184 | try: 185 | _project_init(name) 186 | except Exception as e: 187 | print(e) 188 | click.secho('Errors encountered during project initialisation.' 189 | 'Rolling back..', fg='red') 190 | raise click.Abort() 191 | 192 | 193 | @click.command(name='update') 194 | def lab_update(): 195 | """ Update Lab Environment from Project's requirements.txt """ 196 | if not os.path.isfile('requirements.txt'): 197 | click.secho('requirements.txt file is missing.', fg='red') 198 | raise click.Abort() 199 | 200 | # Update project directory if it hasn't been updated 201 | try: 202 | with open(os.path.join(os.getcwd(), 203 | 'config', 'runtime.yaml'), 'r') as file: 204 | config = yaml.load(file) 205 | home_dir = config['path'] 206 | 207 | if home_dir != os.getcwd(): 208 | config['path'] = os.getcwd() 209 | with open(os.path.join(os.getcwd(), 210 | 'config', 'runtime.yaml'), 'w') as file: 211 | yaml.dump(config, file, default_flow_style=False) 212 | except FileNotFoundError: 213 | click.secho('Having trouble parsing configuration file for this ' 214 | "project. It's likely that this is either not a " 215 | 'Lab Project or the Project was created with an older ' 216 | 'version of Lab.\n', 217 | fg='red') 218 | raise click.Abort() 219 | 220 | if not os.path.isdir('.venv'): 221 | click.secho("Couldn't find .venv. Creating one for you...", 222 | fg='blue') 223 | create_venv('') 224 | 225 | home_dir = os.getcwd() 226 | venv_dir = os.path.join(home_dir, '.venv') 227 | 228 | click.secho('Updating lab', fg='cyan') 229 | subprocess.call([venv_dir + '/bin/pip', 230 | 'install', '--upgrade', 'lab-ml']) 231 | 232 | click.secho('Updating environment using requirements.txt', fg='cyan') 233 | subprocess.call([venv_dir + '/bin/pip', 'install', '--upgrade', 234 | '-r', 'requirements.txt']) 235 | 236 | 237 | @click.command('pull') 238 | @click.option('--tag', type=str, 239 | help='minio host nickname', required=False, default=None) 240 | @click.option('--bucket', type=str, required=False, default=None, 241 | help='minio bucket name') 242 | @click.option('--project', type=str, required=False, default=None, 243 | help='Lab Project name') 244 | @click.option('--force', is_flag=True) 245 | def lab_pull(tag, bucket, project, force): 246 | """ Pulls Lab Experiment from minio to current directory """ 247 | home_dir = os.path.expanduser('~') 248 | 249 | lab_dir = os.path.join(home_dir, '.lab') 250 | 251 | if not os.path.exists(lab_dir): 252 | click.secho('Lab is not configured to connect to minio. ' 253 | 'Run to set up access points.', 254 | fg='red') 255 | raise click.Abort() 256 | 257 | if project is not None: 258 | if os.path.exists(project): 259 | click.secho('Directory '+project+' already exists.', fg='red') 260 | raise click.Abort() 261 | 262 | _pull_from_minio(tag, bucket, project, force) 263 | 264 | 265 | @click.command('push') 266 | @click.option('--info', is_flag=True) 267 | @click.option('--tag', type=str, help='minio host nickname', default=None) 268 | @click.option('--bucket', type=str, default=None, 269 | help='minio bucket name') 270 | @click.option('--force', is_flag=True) 271 | @click.argument('path', type=str, default='.') 272 | def lab_push(info, tag, bucket, path, force): 273 | """ Push Lab Experiment to minio """ 274 | models_directory = 'experiments' 275 | logs_directory = 'logs' 276 | config_directory = 'config' 277 | 278 | home_dir = os.path.expanduser('~') 279 | lab_dir = os.path.join(home_dir, '.lab') 280 | if not os.path.exists(lab_dir): 281 | click.secho('Lab is not configured to connect to minio. ' 282 | 'Run to set up access points.', 283 | fg='red') 284 | raise click.Abort() 285 | 286 | if not (os.path.exists(models_directory) & 287 | os.path.exists(logs_directory) & os.path.exists(config_directory)): 288 | click.secho('This directory lacks a valid Lab Project directory ' 289 | 'structure. Run to create one.', 290 | fg='blue') 291 | raise click.Abort() 292 | 293 | if info: 294 | with open(os.path.join(config_directory, 'runtime.yaml'), 'r') as file: 295 | minio_config = yaml.load(file) 296 | click.secho('Last push: '+minio_config['last_push'], fg='blue') 297 | else: 298 | if (tag is None) & (bucket is None): 299 | try: 300 | with open(os.path.join(config_directory, 'runtime.yaml'), 301 | 'r') as file: 302 | minio_config = yaml.load(file) 303 | tag = minio_config['tag'] 304 | bucket = minio_config['bucket'] 305 | except KeyError: 306 | click.secho( 307 | 'Lab project does not have default tag and bucket configuration. ' 308 | 'Supply --tag and --bucket options and run lab push again.', 309 | fg='red') 310 | raise click.Abort() 311 | else: 312 | with open(os.path.join(config_directory, 'runtime.yaml'), 313 | 'r') as file: 314 | minio_config = yaml.load(file) 315 | minio_config['tag'] = tag 316 | minio_config['bucket'] = bucket 317 | with open(os.path.join(config_directory, 'runtime.yaml'), 318 | 'w') as file: 319 | yaml.safe_dump(minio_config, file, default_flow_style=False) 320 | 321 | _push_to_minio(tag, bucket, path, force) 322 | 323 | 324 | def _pull_from_minio(tag, bucket, project_name, force): 325 | click.secho('Looking up remote..', fg='cyan') 326 | 327 | home_dir = os.path.expanduser('~') 328 | lab_dir = os.path.join(home_dir, '.lab') 329 | project_dir = os.getcwd() 330 | 331 | _clone = True 332 | 333 | # Extract bucket name and project name from config if they are present 334 | if (tag is None) & (bucket is None) & (project_name is None): 335 | _clone = False 336 | 337 | with open(os.path.join(project_dir, 338 | 'config', 'runtime.yaml'), 'r') as file: 339 | project_config = yaml.load(file) 340 | bucket = project_config['bucket'] 341 | project_name = project_config['name'] 342 | tag = project_config['tag'] 343 | 344 | check_minio_config(tag) 345 | 346 | # Extract minio configuration 347 | with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file: 348 | minio_config = yaml.load(file)[tag] 349 | 350 | hostname = minio_config['minio_endpoint'] 351 | accesskey = minio_config['minio_accesskey'] 352 | secretkey = minio_config['minio_secretkey'] 353 | 354 | minioClient = Minio(hostname, 355 | access_key=accesskey, 356 | secret_key=secretkey, 357 | secure=False) 358 | 359 | if not minioClient.bucket_exists(bucket): 360 | click.secho('Bucket ' + bucket + ' is not found on remote', fg='red') 361 | raise click.Abort() 362 | try: 363 | objects = minioClient.list_objects(bucket, prefix=project_name+'/', 364 | recursive=True) 365 | 366 | remote_objects = [o.object_name for o in objects] 367 | 368 | if _clone is False: 369 | if force: 370 | local_objects = [] 371 | else: 372 | local_objects = _list_dir('.') 373 | 374 | local_objects = [l.replace('./', project_name+'/') 375 | for l in local_objects] 376 | 377 | remote_objects = list(set(remote_objects) - set(local_objects)) 378 | 379 | if len(remote_objects) == 0: 380 | click.secho('Project is in sync with remote. ' 381 | 'Use to do a hard pull.', 382 | fg='yellow') 383 | raise click.Abort() 384 | 385 | click.secho('Fetching '+str(len(remote_objects))+' remote objects.', 386 | fg='cyan') 387 | 388 | for obj in remote_objects: 389 | if _clone: 390 | object_name = obj 391 | else: 392 | object_name = ''.join(obj.split(project_name + '/')[1:]) 393 | print('Downloading ' + object_name) 394 | minioClient.fget_object(bucket, obj, 395 | os.path.join(os.getcwd(), object_name)) 396 | except S3Error as err: 397 | print(err) 398 | 399 | 400 | def _list_dir(path): 401 | files = [] 402 | # r=root, d=directories, f = files 403 | for r, d, f in os.walk(path): 404 | for file in f: 405 | files.append(os.path.join(r, file)) 406 | return(files) 407 | 408 | 409 | def _push_to_minio(tag, bucket, path, force): 410 | home_dir = os.path.expanduser('~') 411 | lab_dir = os.path.join(home_dir, '.lab') 412 | 413 | try: 414 | with open('.labignore') as f: 415 | exclude = set(f.read().splitlines()) 416 | except Exception: 417 | exclude = set(['.venv']) 418 | 419 | try: 420 | with open(os.path.join(lab_dir, 'config.yaml'), 'r') as file: 421 | minio_config = yaml.load(file)[tag] 422 | except KeyError as e: 423 | print(str(e)) 424 | click.secho('Unable to connect to host '+tag, fg='red') 425 | raise click.Abort() 426 | 427 | with open(os.path.join(path, 'config/runtime.yaml'), 'r') as file: 428 | config = yaml.load(file) 429 | 430 | project_name = config['name'] 431 | 432 | hostname = minio_config['minio_endpoint'] 433 | accesskey = minio_config['minio_accesskey'] 434 | secretkey = minio_config['minio_secretkey'] 435 | 436 | input_objects = [] 437 | output_objects = [] 438 | 439 | for root, d_names, f_names in os.walk(path, topdown=True): 440 | d_names[:] = [d for d in d_names if d not in exclude] 441 | for f in f_names: 442 | input_objects.append(os.path.join(root, f)) 443 | output_objects.append(os.path.join(project_name, 444 | root.strip('./'), f)) 445 | 446 | minioClient = Minio(hostname, 447 | access_key=accesskey, 448 | secret_key=secretkey, 449 | secure=False) 450 | 451 | if not minioClient.bucket_exists(bucket): 452 | minioClient.make_bucket(bucket, location='eu-west-1') 453 | 454 | # Prune remote if needed 455 | if force: 456 | if click.confirm( 457 | 'WARNING: force push will remove all remote files not ' 458 | 'found in your current project. Do you want to continue?', 459 | abort=True): 460 | try: 461 | remote_objects = minioClient.list_objects(bucket, 462 | prefix=project_name, 463 | recursive=True) 464 | remote_objects = [obj.object_name for obj in remote_objects] 465 | for del_err in minioClient.remove_objects(bucket, 466 | remote_objects): 467 | print("Deletion Error: {}".format(del_err)) 468 | except S3Error as err: 469 | print(err) 470 | 471 | try: 472 | for i in range(len(input_objects)): 473 | minioClient.fput_object(bucket, output_objects[i], 474 | input_objects[i]) 475 | print('Succesfully processed '+input_objects[i]) 476 | 477 | with open(os.path.join('config', 'runtime.yaml'), 'r') as file: 478 | minio_config = yaml.load(file) 479 | minio_config['last_push'] = str(datetime.datetime.now()) 480 | 481 | with open(os.path.join('config', 'runtime.yaml'), 'w') as file: 482 | yaml.safe_dump(minio_config, file, default_flow_style=False) 483 | 484 | except S3Error as err: 485 | print(err) 486 | 487 | 488 | def _project_init(project_name): 489 | pyversion = '%s.%s' % (sys.version_info[0], sys.version_info[1]) 490 | 491 | # Create project structure 492 | os.mkdir(project_name) 493 | os.mkdir(os.path.join(project_name, 'notebooks')) 494 | os.mkdir(os.path.join(project_name, 'data')) 495 | os.mkdir(os.path.join(project_name, 'logs')) 496 | os.mkdir(os.path.join(project_name, 'experiments')) 497 | os.mkdir(os.path.join(project_name, 'config')) 498 | 499 | open(os.path.join(project_name, 'README.md'), 'a').close() 500 | open(os.path.join(project_name, 'notebooks', 'README.md'), 'a').close() 501 | 502 | file = open(os.path.join(project_name, '.gitignore'), 'w') 503 | file.write('.venv') 504 | file.close() 505 | 506 | # ignore these files when pushing lab repo to minio 507 | file = open(os.path.join(project_name, '.labignore'), 'w') 508 | file.write('.venv\n') 509 | file.write('.ipynb_checkpoints') 510 | file.close() 511 | 512 | # Copy requirements.txt file 513 | shutil.copyfile('requirements.txt', project_name+'/requirements.txt') 514 | 515 | # Create a virtual environment 516 | create_venv(project_name) 517 | 518 | # Create runtime configuration 519 | runtime = {'name': project_name, 520 | 'path': os.path.join(os.getcwd(), project_name), 521 | 'description': None, 522 | 'python': pyversion, 523 | 'timestamp': str(datetime.datetime.now()), 524 | 'last_push': '', 525 | 'venv': '.venv'} 526 | 527 | with open(os.path.join(project_name, 528 | 'config', 'runtime.yaml'), 'w') as file: 529 | yaml.dump(runtime, file, default_flow_style=False) 530 | --------------------------------------------------------------------------------