├── {{cookiecutter.repo_name}}
    ├── src
    │   └── {{cookiecutter.package_name}}
    │   │   ├── __init__.py
    │   │   ├── pipeline.py
    │   │   ├── features.py
    │   │   ├── examplemodule.py
    │   │   └── io.py
    ├── docs
    │   ├── writeup
    │   │   ├── info
    │   │   │   ├── code_of_conduct.rst
    │   │   │   ├── process_documentation.rst
    │   │   │   └── setup.rst
    │   │   ├── results
    │   │   │   └── eda.rst
    │   │   ├── api-{{cookiecutter.package_name}}.rst
    │   │   ├── Makefile
    │   │   ├── make.bat
    │   │   ├── index.rst
    │   │   └── conf.py
    │   ├── resources
    │   │   ├── DS_FlowChart.jpg
    │   │   ├── ReviewTable.jpg
    │   │   └── DS_ProcessFlow.jpg
    │   ├── data_science_code_of_conduct.md
    │   └── process_documentation.md
    ├── requirements.txt
    ├── data
    │   ├── temp
    │   │   └── .gitignore
    │   ├── raw
    │   │   └── .gitignore
    │   ├── processed
    │   │   └── .gitignore
    │   ├── training
    │   │   └── .gitignore
    │   └── interim_[desc]
    │   │   └── .gitignore
    ├── conda_env.yml
    ├── scripts
    │   ├── train
    │   │   ├── amlrun.py
    │   │   ├── submit-train-local.py
    │   │   ├── submit-train.py
    │   │   └── train.py
    │   ├── example.py
    │   └── deploy
    │   │   └── score.py
    ├── tests
    │   ├── {{cookiecutter.package_name}}
    │   │   └── examplemodule
    │   │   │   ├── test_hello_world.py
    │   │   │   └── test_add_value_to_numpy.py
    │   └── test_notebook.py
    ├── azure-pipelines.yml
    ├── setup.py
    ├── extras
    │   └── add_explorer_context_shortcuts.reg
    ├── notebooks
    │   └── example.ipynb
    ├── .gitignore
    ├── README.md
    └── LICENSE
├── requirements.txt
├── data
    └── training
    │   └── .gitignore
├── cookiecutter.json
├── LICENSE
├── azure-pipelines.yml
├── hooks
    └── post_gen_project.py
├── .travis.yml
├── tests
    └── test_create.py
├── .gitignore
└── README.md


/{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cookiecutter
2 | flake8
3 | pytest
4 | pytest-cookies
5 | pytest-cov


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/info/code_of_conduct.rst:
--------------------------------------------------------------------------------
1 | .. mdinclude:: ../../data_science_code_of_conduct.md


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/info/process_documentation.rst:
--------------------------------------------------------------------------------
1 | .. mdinclude:: ../../process_documentation.md


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/requirements.txt:
--------------------------------------------------------------------------------
1 | flake8
2 | m2r
3 | nbformat
4 | numpy
5 | pandas
6 | pytest
7 | pytest-cookies
8 | pytest-cov
9 | sphinx


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/resources/DS_FlowChart.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/equinor/data-science-template/HEAD/{{cookiecutter.repo_name}}/docs/resources/DS_FlowChart.jpg


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/resources/ReviewTable.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/equinor/data-science-template/HEAD/{{cookiecutter.repo_name}}/docs/resources/ReviewTable.jpg


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/resources/DS_ProcessFlow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/equinor/data-science-template/HEAD/{{cookiecutter.repo_name}}/docs/resources/DS_ProcessFlow.jpg


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/data/temp/.gitignore:
--------------------------------------------------------------------------------
 1 | # The .gitignore file specifies things that git should ignore. 
 2 | #
 3 | # Temporary folder for your own usage
 4 | #
 5 | # Git should typically ignore everything in this directory  (except for this file)
 6 | 
 7 | *
 8 | !.gitignore
 9 | #!SomeOtherFileToInclude
10 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/info/setup.rst:
--------------------------------------------------------------------------------
 1 | Setup
 2 | =====
 3 | 
 4 | Introduction
 5 | ------------
 6 | 
 7 | .. note::
 8 |    These documentation page are for your own use as you best see fit for your project.
 9 | 
10 |    Here you could add setup information, or details on how you run things from an
11 |    operational perspective.
12 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/conda_env.yml:
--------------------------------------------------------------------------------
 1 | name: {{cookiecutter.conda_name}}
 2 | dependencies:
 3 |   - pandas=0.23.4
 4 |   - pytest=5.3.1
 5 |   - pytest-cov=2.8.1
 6 |   - numpy=1.17.4
 7 |   - nbconvert=5.6.1
 8 |   - nbformat=4.4.0
 9 |   - pip:
10 |     - azureml-sdk
11 |     - joblib==0.14.1
12 |     - matplotlib==3.1.2
13 |     - scikit-learn==0.22.1
14 | 


--------------------------------------------------------------------------------
/data/training/.gitignore:
--------------------------------------------------------------------------------
 1 | # The .gitignore file specifies things that git should ignore. 
 2 | #
 3 | # Whilst data should typically be consumed from the datalake or some other source, 
 4 | # this folder could contain local raw data files that should be copied in.
 5 | #
 6 | # Git should typically ignore everything in this directory  (except for this file)
 7 | 
 8 | *
 9 | !.gitignore
10 | #!SomeOtherFileToInclude
11 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/data/raw/.gitignore:
--------------------------------------------------------------------------------
 1 | # The .gitignore file specifies things that git should ignore. 
 2 | #
 3 | # Whilst data should typically be consumed from the datalake or some other source, 
 4 | # this folder could contain local raw data files that should be copied in.
 5 | #
 6 | # Git should typically ignore everything in this directory  (except for this file)
 7 | 
 8 | *
 9 | !.gitignore
10 | #!SomeOtherFileToInclude
11 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/data/processed/.gitignore:
--------------------------------------------------------------------------------
 1 | # The .gitignore file specifies things that git should ignore. 
 2 | #
 3 | # Whilst data should typically be consumed from the datalake or some other source, 
 4 | # this folder could contain local raw data files that should be copied in.
 5 | #
 6 | # Git should typically ignore everything in this directory  (except for this file)
 7 | 
 8 | *
 9 | !.gitignore
10 | #!SomeOtherFileToInclude
11 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/data/training/.gitignore:
--------------------------------------------------------------------------------
 1 | # The .gitignore file specifies things that git should ignore. 
 2 | #
 3 | # Whilst data should typically be consumed from the datalake or some other source, 
 4 | # this folder could contain local raw data files that should be copied in.
 5 | #
 6 | # Git should typically ignore everything in this directory  (except for this file)
 7 | 
 8 | *
 9 | !.gitignore
10 | #!SomeOtherFileToInclude
11 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/data/interim_[desc]/.gitignore:
--------------------------------------------------------------------------------
 1 | # The .gitignore file specifies things that git should ignore. 
 2 | #
 3 | # Whilst data should typically be consumed from the datalake or some other source, 
 4 | # this folder could contain local raw data files that should be copied in.
 5 | #
 6 | # Git should typically ignore everything in this directory  (except for this file)
 7 | 
 8 | *
 9 | !.gitignore
10 | #!SomeOtherFileToInclude
11 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/results/eda.rst:
--------------------------------------------------------------------------------
 1 | Exploratory Analysis
 2 | ====================
 3 | 
 4 | Introduction
 5 | ------------
 6 | 
 7 | .. note::
 8 |    These documentation page are for your own use as you best see fit for your project.
 9 | 
10 |    Here you could add information and links to present results from EDA or
11 |    one of your other experiments. This might include links back to notebooks
12 |    or other artifacts.
13 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/scripts/train/amlrun.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from azureml.core import Run
 3 | except ImportError:
 4 |     pass
 5 | 
 6 | 
 7 | # access the Azure ML run
 8 | # init run param to check if running within AML
 9 | def get_AMLRun():
10 |     """Try and get the Azure Machine Learning run
11 | 
12 |     Returns:
13 |         Run: The Experiment run or None if no active run.
14 |     """
15 |     try:
16 |         run = Run.get_context(allow_offline=False)
17 |         return run
18 |     except Exception as e:
19 |         print("Caught = {}".format(e.message))
20 |         return None
21 | 


--------------------------------------------------------------------------------
/cookiecutter.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "project_name": "project_name",
 3 |     "project_description": "A short description of the project.",
 4 |     "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
 5 |     "conda_name": "{{ cookiecutter.project_name.lower().replace(' ', '-') }}",
 6 |     "package_name": "{{ cookiecutter.project_name.lower().replace(' ', '').replace('-', '') }}",
 7 |     "mlops_name": "{{ cookiecutter.project_name.lower().replace(' ', '-') }}",
 8 |     "mlops_compute_name": "cpu-compute",
 9 |     "author": "Equinor ASA",
10 |     "open_source_license": ["Not open source", "MIT", "LGPL3", "GPL3"],
11 |     "devops_organisation": ""
12 | }


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/api-{{cookiecutter.package_name}}.rst:
--------------------------------------------------------------------------------
 1 | api-{{cookiecutter.package_name}} package
 2 | ================================================================================
 3 | 
 4 | This page contains information about the mhewtest package.
 5 | 
 6 | .. note::
 7 |    Edit this page to add additional description above and list whatever modules
 8 |    you would like included below. Documentation pages for the individual modules
 9 |    will be automatically generated when you 'make' the documentation.
10 | 
11 | .. autosummary::
12 |    :toctree: _generated
13 | 
14 |    {{cookiecutter.package_name}}.examplemodule
15 |    {{cookiecutter.package_name}}.features
16 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/scripts/example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """An example python script template.
 4 | 
 5 | Argv:
 6 |     infile: Input file
 7 |     outfile: Output file
 8 | 
 9 | """
10 | 
11 | import sys
12 | import argparse
13 | 
14 | 
15 | def main(arguments):
16 | 
17 |     parser = argparse.ArgumentParser(
18 |         description="Put your description here",
19 |         formatter_class=argparse.RawDescriptionHelpFormatter)
20 |     parser.add_argument('infile', help="Input file", type=argparse.FileType('r'))
21 |     parser.add_argument('-o', '--outfile', help="Output file",
22 |                         default=sys.stdout, type=argparse.FileType('w'))
23 | 
24 |     args = parser.parse_args(arguments)
25 | 
26 |     print(args)
27 | 
28 |     # Add your code here
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     sys.exit(main(sys.argv[1:]))
33 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/pipeline.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module should contain your main project pipeline(s).
 3 | 
 4 | Whilst the pipeline may change during the analysis phases, any more stable pipeline should be implemented here so
 5 | that it can be reused and easily reproduced.
 6 | """
 7 | # import pandas as pd
 8 | 
 9 | # from examplepackage import features
10 | # from examplepackage.io import IO
11 | 
12 | 
13 | def run_pipeline(local_data_path: str):
14 |     """
15 |     Run the main processing pipeline.
16 | 
17 |     Returns:
18 |         A dataframe containing the output of the pipeline
19 |     """
20 | 
21 |     # io = IO(path)
22 |     # df = io.load_cleaned_file(download_always=False)
23 |     # df = add_choke_events(df)
24 | 
25 |     # Add calls to features.Xxx here
26 | 
27 |     # save (or return) dataframe here?
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/tests/{{cookiecutter.package_name}}/examplemodule/test_hello_world.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | # Explicitly set path so don't need to run setup.py - if we have multiple copies of the code we would otherwise need
 5 | # to setup a separate environment for each to ensure the code pointers are correct.
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'src')))  # noqa
 7 | 
 8 | # from pandas.util.testing import assert_frame_equal
 9 | from {{cookiecutter.package_name}} import examplemodule
10 | 
11 | 
12 | def test_something():
13 |     # print(os.getcwd())
14 |     assert True, "A comment to show if the test fails"
15 | 
16 | 
17 | # def test_that_fails():
18 | #     assert False, "We expected this to fail"
19 | 
20 | 
21 | def test_hello_world():
22 |     assert examplemodule.hello_world() == "Hello World", "The Hello World strings should be the same"
23 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/features.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module should contain project specific feature engineering functionality.
 3 | 
 4 | You should avoid engineering features in a notebook as it is not transferable later if you want to automate the
 5 | process. Add functions here to create your features, such functions should include those to generate specific features
 6 | along with any more generic functions.
 7 | 
 8 | Consider moving generic functions into the shared statoilds package.
 9 | """
10 | import pandas as pd
11 | 
12 | 
13 | def my_feature_xxx(df: pd.DataFrame):
14 |     """
15 |     Description goes here.
16 |     You might also add additional arguments such as column etc...
17 |     Would be nice with some test cases also :)
18 | 
19 |     Args:
20 |         df: Dataframe upon which to operate
21 | 
22 |     Returns:
23 |         A dataframe with the Xxx feature appended
24 |     """
25 | 
26 |     # CODE HERE
27 | 
28 |     return df
29 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/tests/{{cookiecutter.package_name}}/examplemodule/test_add_value_to_numpy.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | # Explicitly set path so don't need to run setup.py - if we have multiple copies of the code we would otherwise need
 7 | # to setup a separate environment for each to ensure the code pointers are correct.
 8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'src')))  # noqa
 9 | 
10 | # from pandas.util.testing import assert_frame_equal
11 | from {{cookiecutter.package_name}} import examplemodule
12 | 
13 | 
14 | def test_add_value_to_numpy():
15 |     array = np.array([1, 1, 1, 1, 1])
16 |     expected_result = np.array([2, 2, 2, 2, 2])
17 |     result_array = examplemodule.add_value_to_numpy(array, 1)
18 |     assert np.array_equal(expected_result, result_array), "The Hello World strings should be the same"
19 | 
20 | 
21 | def test_add_value_to_numpy_wrong_type():
22 |     with pytest.raises(ValueError) as _:
23 |         examplemodule.add_value_to_numpy([1, 1], 1)
24 | 
25 | 
26 | def test_add_value_to_numpy_empty():
27 |     with pytest.raises(ValueError) as _:
28 |         examplemodule.add_value_to_numpy(None, 1)
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/examplemodule.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example module shows some simple methods and best practices for documentation
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def hello_world() -> str:
 9 |     """
10 |     Method description - A simple method to get the hello world string
11 | 
12 |     Returns:
13 |        The string "Hello World"
14 |     """
15 |     return "Hello World"
16 | 
17 | 
18 | def add_value_to_numpy(array: np.ndarray, amount: float = 1) -> np.ndarray:
19 |     """
20 |     A sample method to add a value to every element in a pandas DataFrame.
21 | 
22 |     Args:
23 |         array: The source DataFrame to work on.
24 |         amount: The amount to add to each element in the DataFrame
25 | 
26 |     Returns:
27 |         A new DataFrame with each value increased by amount.
28 | 
29 |     Examples:
30 |         Examples should be written in doctest format, and should illustrate how
31 |         to use the function.
32 |         >>> array = np.array([1, 1, 1 ,1, 1])
33 |         >>> result_array = add_value_to_numpy(array, 1)
34 | 
35 |     """
36 |     if array is None or \
37 |             not isinstance(array, np.ndarray):
38 |         raise ValueError("array must be a valid ndarray")
39 |     # if isinstance(a, np.ndarray):
40 | 
41 |     return array + amount
42 | 


--------------------------------------------------------------------------------
/azure-pipelines.yml:
--------------------------------------------------------------------------------
 1 | jobs:
 2 | - job: Build_and_Test
 3 |   displayName: Build and Test
 4 |   condition: succeeded()
 5 |   pool:
 6 |     name: Hosted Ubuntu 1604
 7 |   strategy:
 8 |     matrix:
 9 |       Python36:
10 |         python.version: '3.6'
11 |       Python37:
12 |         python.version: '3.7'
13 |     maxParallel: 3
14 | 
15 |   steps:
16 |   - task: UsePythonVersion@0
17 |     displayName: 'Use Python $(python.version)'
18 |     inputs:
19 |       versionSpec: '$(python.version)'
20 | 
21 |   - script: python -m pip install --upgrade pip
22 |     displayName: 'Upgrade pip'
23 | 
24 |   - script: pip install -r requirements.txt
25 |     displayName: 'Install requirements'
26 | 
27 |   - script: |
28 |       flake8 --max-line-length=120 *.py hooks/ tests/
29 |     displayName: 'Run lint (flake8) tests'
30 | 
31 |   - script: |
32 |       pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html
33 |     displayName: pytest
34 | 
35 |   - task: PublishTestResults@2
36 |     displayName: 'Publish Test Results **/test-results.xml'
37 |     inputs:
38 |       testResultsFiles: '**/test-results.xml'
39 |       testRunTitle: 'Python $(python.version)'
40 | 
41 |   - task: PublishCodeCoverageResults@1
42 |     inputs:
43 |       codeCoverageTool: Cobertura
44 |       summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
45 |       reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'
46 |     


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/azure-pipelines.yml:
--------------------------------------------------------------------------------
 1 | jobs:
 2 | - job: Build_and_Test
 3 |   displayName: Build and Test
 4 |   condition: succeeded()
 5 |   pool:
 6 |     name: Hosted Ubuntu 1604
 7 |   strategy:
 8 |     matrix:
 9 |       Python36:
10 |         python.version: '3.6'
11 |       Python37:
12 |         python.version: '3.7'
13 |     maxParallel: 3
14 | 
15 |   steps:
16 |   - task: UsePythonVersion@0
17 |     displayName: 'Use Python $(python.version)'
18 |     inputs:
19 |       versionSpec: '$(python.version)'
20 | 
21 |   - script: python -m pip install --upgrade pip
22 |     displayName: 'Upgrade pip'
23 | 
24 |   - script: pip install -r requirements.txt
25 |     displayName: 'Install requirements'
26 | 
27 |   - script: |
28 |       flake8 --max-line-length=120 *.py tests/ scripts/
29 |     displayName: 'Run lint (flake8) tests'
30 | 
31 |   - script: |
32 |       pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html
33 |     displayName: pytest
34 | 
35 |   - task: PublishTestResults@2
36 |     displayName: 'Publish Test Results **/test-results.xml'
37 |     inputs:
38 |       testResultsFiles: '**/test-results.xml'
39 |       testRunTitle: 'Python $(python.version)'
40 | 
41 |   - task: PublishCodeCoverageResults@1
42 |     inputs:
43 |       codeCoverageTool: Cobertura
44 |       summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
45 |       reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'
46 |     


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/tests/test_notebook.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import tempfile
 4 | import nbformat
 5 | 
 6 | 
 7 | def run_notebook(filename):
 8 |     """
 9 |     Execute the specified notebook via jupyter nbconvert and collect output.
10 |     :returns (parsed nb object, execution errors)
11 |     """
12 |     os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
13 | 
14 |     # get temporary file ( and then close to avoid multiple write problems
15 |     with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout:
16 |         temp_name = fout.name
17 | 
18 |     # run jupyter nbconvert
19 |     args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
20 |             "--ExecutePreprocessor.timeout=60", "--output", fout.name, filename]
21 |     subprocess.check_call(args, shell=True)
22 | 
23 |     # read and parse notebook
24 |     with open(temp_name, "r") as fout:
25 |         fout.seek(0)
26 |         nb = nbformat.read(fout, nbformat.current_nbformat)
27 | 
28 |     errors = [output for cell in nb.cells if "outputs" in cell
29 |               for output in cell["outputs"]
30 |               if output.output_type == "error"]
31 | 
32 |     return nb, errors
33 | 
34 | 
35 | # Commented out for now pending an update to automatically set the conda environment.
36 | # This will work, but only if all libraries are in your default python environment. As I am using miniconda and the
37 | # example notebook uses numpy from within a separate environment this doesn't work for me.
38 | # def test_notebook():
39 | #    nb, errors = run_notebook('notebooks\example.ipynb')
40 | #    assert errors == []
41 | 


--------------------------------------------------------------------------------
/hooks/post_gen_project.py:
--------------------------------------------------------------------------------
 1 | # import datetime
 2 | import os
 3 | # import shutil
 4 | # from os.path import join
 5 | 
 6 | 
 7 | def replace_contents(filename: str, what: str, replacement: str) -> None:
 8 |     """
 9 |     Replace instances of a given string in a file
10 | 
11 |     Args:
12 |         filename: The filename to replace within
13 |         what: The text that should be matched
14 |         replacement: The text that what should be replaced with
15 |     """
16 |     with open(filename) as fh:
17 |         changelog = fh.read()
18 |     with open(filename, 'w') as fh:
19 |         fh.write(changelog.replace(what, replacement))
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     # today = datetime.date.today()
24 |     # replace_contents('LICENSE', '<YEAR>', today.strftime("%Y"))
25 | 
26 |     if '{{ cookiecutter.open_source_license }}' == "Not open source":
27 |         os.remove('LICENSE')
28 |         # shutil.rmtree('LICENSE')
29 | 
30 |     # Print out some information on setup and next steps
31 |     print("""
32 | 
33 | Data Science Project '{{ cookiecutter.repo_name }}' created using the following
34 | parameters:
35 | 
36 | {% for key, value in cookiecutter.items()|sort %}
37 |     {{ "{0:26}".format(key + ":") }} {{ "{0!r}".format(value).strip("u") }}
38 | {%- endfor %}
39 | 
40 | You are now ready to get started, however you should create a new github
41 | repository for your new project and add your project using the following
42 | commands (substitute REMOTE-REPOSITORY-URL with the remote repository url).
43 | 
44 |     cd {{ cookiecutter.repo_name }}
45 |     git init
46 |     git add --all
47 |     git commit -m "Initial commit"
48 |     git remote add origin REMOTE-REPOSITORY-URL
49 |     git push -u origin master
50 | """)
51 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/io.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module should contain project specific io functionality.
 3 | 
 4 | Loading and saving of files should be deferred to this class for easy and consistent file handling between different
 5 | sources and to have a single location where file references are held.
 6 | """
 7 | import os
 8 | import pandas as pd
 9 | from statoilds import datalake
10 | 
11 | 
12 | class IO:
13 |     local_data_path = '.'
14 | 
15 |     def __init__(self, local_data_path: str):
16 |         """
17 |         Constructor that can set the data path from where we will access local data..
18 | 
19 |         Args:
20 |             path: Path to the data folder.
21 |         """
22 |         self.local_data_path = local_data_path
23 | 
24 |     def load_cleaned_file(self, download_always: bool = True):
25 |         """
26 |         Load the cleaned file, optionally logging into to Azure to download.
27 | 
28 |         If token is passed then this will only login if token isn't already valid
29 | 
30 |         Args:
31 |             download_always: Whether to always download the file even if it exists locally
32 | 
33 |         Returns:
34 |             A dataframe used for logging in and the login token
35 |         """
36 |         local_path = os.path.join(self.local_data_path, self.cleaned_file_local)
37 | 
38 |         token = datalake.login_and_download_file(self.cleaned_file_remote,
39 |                                                  local_path,
40 |                                                  download_always=download_always)
41 | 
42 |         df = pd.read_csv(local_path,
43 |                          dtype={'Well_name': 'category'},
44 |                          parse_dates=['start'])
45 |         return df
46 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # This file contains build instructions for Travis CI.
 2 | 
 3 | # language and versions (note if using conda we don't use Travis python but add here for consistency
 4 | language: python
 5 | python:
 6 |     - "3.6"
 7 | 
 8 | # install dependencies
 9 | install:
10 |     # if using pip then either install dependencies directly or add on seperate lines in a pip_requirements.txt file
11 |     #- pip install pytest pytest-cov
12 |     #- pip install coveralls
13 |     #- pip install -r pip_requirements.txt
14 | 
15 |     # if using conda then we setup an environment from an conda_env.yml file
16 |     # first install miniconda
17 |     - sudo apt-get update
18 |     - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
19 |     - bash miniconda.sh -b -p $HOME/miniconda
20 |     - export PATH="$HOME/miniconda/bin:$PATH"
21 |     - hash -r
22 |     - conda config --set always_yes yes --set changeps1 no
23 |     - conda update -q conda
24 |     - conda info -a     # Useful for debugging any issues with conda
25 |     # now create the environment and install any extra packages
26 |     - conda env create -n test-environment -f conda_env.yml
27 |     - source activate test-environment
28 |     # pip install and non conda packages
29 |     - pip install coveralls
30 | 
31 |     # setup packages
32 |     - python setup.py install
33 | 
34 | # command to run tests
35 | script:
36 | #    - py.test --cov-report term-missing --cov=maths
37 | #    - py.test --doctest-modules --cov=maths3 --cov-report term-missing
38 |     - py.test --cov . --cov-report term-missing
39 | branches:
40 |   only:
41 |    - master
42 | after_success:
43 |     - coveralls
44 | #notifications:
45 | #  email:
46 | #    recipients:
47 | #      - xxx@statoil.com
48 | #    on_success: always
49 | #    on_failure: always


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import find_packages, setup
 3 | 
 4 | 
 5 | # Utility function to read the README file.
 6 | # Used for the long_description.  It's nice, because now 1) we have a top level
 7 | # README file and 2) it's easier to type in the README file than to put a raw
 8 | # string in below ...
 9 | def read(file_name):
10 |     return open(os.path.join(os.path.dirname(__file__), file_name)).read()
11 | 
12 | {%- set license_classifiers = {
13 |     'MIT': 'License :: OSI Approved :: MIT License',
14 |     'LGPL3': 'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)',
15 |     'GPL3': 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)'
16 | } %}
17 | 
18 | 
19 | install_requires = [
20 |     'numpy',
21 |     'pandas',
22 |     'pytest'
23 | ]
24 | 
25 | setup_requirements = [
26 |     'pytest-runner',
27 |     'better-setuptools-git-version'
28 | ]
29 | 
30 | test_requirements = [
31 |     'pytest',
32 |     'nbformat'
33 | ]
34 | 
35 | setup(
36 |     author='{{cookiecutter.author}}',
37 |     author_email="Name@equinor.com",
38 |     classifiers=[
39 |         "Development Status :: 3 - Alpha",
40 |         "Topic :: Utilities",
41 | {%- if cookiecutter.open_source_license in license_classifiers %}
42 |         '{{ license_classifiers[cookiecutter.open_source_license] }}',
43 | {%- endif %}
44 |     ],
45 | 
46 |     name="{{cookiecutter.project_name}}",
47 |     # version="0.0.1",
48 |     version_config={
49 |       "version_format": "{tag}.dev{sha}",
50 |       "starting_version": "0.0.1"
51 |     },
52 |     description="{{cookiecutter.project_description}}",
53 |     long_description=open('README.md').read(),
54 |     packages=find_packages('src'),
55 |     package_dir={'': 'src'},
56 |     setup_requires=setup_requirements,
57 |     test_suite='tests',
58 |     tests_require=test_requirements,
59 |     install_requires=install_requires
60 | )
61 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/extras/add_explorer_context_shortcuts.reg:
--------------------------------------------------------------------------------
 1 | Windows Registry Editor Version 5.00
 2 | ;
 3 | ; This adds additional explorer menus. Verify any path's below and add / remove conda environments as needed
 4 | ;
 5 | 
 6 | ;
 7 | ; Right click on folder commands
 8 | ;
 9 | 
10 | [HKEY_CLASSES_ROOT\Directory\Shell\Data Science]
11 | "MUIVerb"="Data Science"
12 | "SubCommands"=""
13 | "Position"=-
14 | 
15 | [HKEY_CLASSES_ROOT\Directory\Shell\Data Science\Shell\Run Jupyter Here]
16 | @="Run Jupyter Here"
17 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico"
18 | 
19 | [HKEY_CLASSES_ROOT\Directory\Shell\Data Science\Shell\Run Jupyter Here\command]
20 | @="cmd.exe /K jupyter notebook"
21 | 
22 | ;
23 | ; Right click on background commands
24 | ;
25 | 
26 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science]
27 | "MUIVerb"="Data Science"
28 | "SubCommands"=""
29 | "Position"=-
30 | 
31 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here]
32 | @="Run Jupyter Here"
33 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico"
34 | 
35 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here\command]
36 | @="cmd.exe /K jupyter notebook"
37 | 
38 | 
39 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (deeplearning env)]
40 | @="Run Jupyter Here (deeplearning env)"
41 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico"
42 | 
43 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (deeplearning env)\command]
44 | @="cmd.exe /K activate deeplearning & jupyter notebook"
45 | 
46 | 
47 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (anaconda env)]
48 | @="Run Jupyter Here (anaconda env)"
49 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico"
50 | 
51 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (anaconda env)\command]
52 | @="cmd.exe /K activate anaconda & jupyter notebook"
53 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/scripts/deploy/score.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Model scoring (WIP) - Contributions welcome!!
 4 | """
 5 | # import argparse
 6 | import joblib
 7 | import json
 8 | import numpy
 9 | 
10 | from azureml.core.model import Model
11 | 
12 | from sklearn import datasets
13 | from sklearn.metrics import accuracy_score
14 | from sklearn.model_selection import train_test_split
15 | 
16 | 
17 | def init():
18 |     global model
19 |     model_path = Model.get_model_path('{{cookiecutter.mlops_name}}')
20 |     # deserialize the model file back into a sklearn model
21 |     model = joblib.load(model_path)
22 | 
23 | 
24 | # note you can pass in multiple rows for scoring
25 | def run(raw_data):
26 |     try:
27 |         data = json.loads(raw_data)['data']
28 |         data = numpy.array(data)
29 |         result = model.predict(data)
30 |         # you can return any datatype if it is JSON-serializable
31 |         return result.tolist()
32 |     except Exception as e:
33 |         error = str(e)
34 |         return error
35 | 
36 | 
37 | def main():
38 |     # parser = argparse.ArgumentParser()
39 |     # environment parameters
40 |     # parser.add_argument(
41 |     #     '--data-folder',
42 |     #     help="local path to training data",
43 |     #     required=True
44 |     # )
45 |     # parser.add_argument(
46 |     #     "--output-dir", type=str, default=os.path.join('..', 'outputs'),
47 |     #     help='location to writeoutput relative to this script'
48 |     # )
49 | 
50 |     # parse the arguments
51 |     # args = parser.parse_args()
52 | 
53 |     # ws = Workspace.from_config()
54 |     # model = Model(ws, 'sklearn_mnist')
55 | 
56 |     # model.download(target_dir=os.getcwd(), exist_ok=True)
57 | 
58 |     # verify the downloaded model file
59 |     file_path = "ml-service/{{cookiecutter.mlops_name}}.joblib"
60 |     model = joblib.load(file_path)
61 | 
62 |     # loading the iris dataset
63 |     iris = datasets.load_iris()
64 | 
65 |     # X -> features, y -> label
66 |     X = iris.data
67 |     y = iris.target
68 | 
69 |     # dividing X, y into train and test data
70 |     _, X_test, _, y_test = train_test_split(X, y, random_state=0)
71 | 
72 |     # training a linear SVM classifier
73 |     y_pred = model.predict(X_test)
74 | 
75 |     # model accuracy
76 |     accuracy = accuracy_score(y_test, y_pred)
77 |     print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/index.rst:
--------------------------------------------------------------------------------
 1 | {{cookiecutter.project_name}}
 2 | ======================================================================
 3 | 
 4 | .. note::
 5 |    This documentation page is for your own use as you best see fit for your project.
 6 | 
 7 |    In some cases it might be enough with the README.md in the project root, however
 8 |    you might use this if you want to publish API documentation, or have a website
 9 |    where you want to make detailed project information available (you can e.g.
10 |    publish direct from blob storage).
11 | 
12 |    To generate documentation install make and from the docs folder run:
13 | 
14 |       .. code-block::
15 | 
16 |          make html
17 | 
18 |    On Windows you can use the .bat file so from the docs folder just run:
19 | 
20 |       .. code-block::
21 | 
22 |          make html
23 | 
24 | Usage and setup
25 | ---------------
26 | 
27 | Information about this project including steps on how to setup, run examples
28 | to reproduce results, and other guidelines.
29 | 
30 | .. note::
31 |    Here you might include information about this project including steps on how to
32 |    setup and reproduce results and findings, and other guidelines. As default we
33 |    include the Equinor code of conduct, process documentation and any .rst files
34 |    under the info folder. Edit / add / remove as needed. The table of contents is
35 |    generated automatically based upon the referenced document headings.
36 | 
37 | .. toctree::
38 |    :glob:
39 |    :maxdepth: 2
40 | 
41 |    info/*
42 | 
43 | Results and findings
44 | --------------------
45 | 
46 | Results and findings generated during the course of this project.
47 | 
48 | .. note::
49 |    Here you might include a write up of results or links to notebooks or other
50 |    information that contain results or other findings. As default we
51 |    include any .rst files under the results folder. Edit / add / remove as needed.
52 |    The table of contents is generated automatically based upon the referenced
53 |    document headings.
54 | 
55 | .. toctree::
56 |    :glob:
57 |    :maxdepth: 2
58 | 
59 |    results/*
60 | 
61 | API Documentation
62 | -----------------
63 | 
64 | Information on the underlying API including function, class and method
65 | documentation.
66 | 
67 | .. note::
68 |    If you don't want this, then your project probably isn't written according
69 |    to best practices and likely not production ready. If you disagree, just
70 |    edit and remove this section.
71 | 
72 | .. toctree::
73 |    :maxdepth: 2
74 | 
75 |    api-{{cookiecutter.package_name}}
76 | 
77 | Indices and tables
78 | ------------------
79 | 
80 | * :ref:`genindex`
81 | * :ref:`modindex`
82 | * :ref:`search`
83 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/scripts/train/submit-train-local.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Train a model locally using Azure ML.
 4 | 
 5 | This will re-use the current python environment.
 6 | 
 7 | Argv:
 8 |     output-dir: A folder to store any output to
 9 |     kernel: Kernel type to be used in the algorithm
10 |     penalty: Penalty parameter of the error term
11 | """
12 | import argparse
13 | import sys
14 | 
15 | import azureml.core
16 | from azureml.core import Experiment, ScriptRunConfig, Workspace
17 | 
18 | 
19 | def submit(experiment_name: str,
20 |            kernal: str,
21 |            penalty: float):
22 | 
23 |     print("This notebook was created using version 1.0.83 of the Azure ML SDK")
24 |     print("You are using version", azureml.core.VERSION, "of the SDK")
25 | 
26 |     # Get a reference to the workspace. Be sure to download the config.json
27 |     # from your workspace and place in the parent folder.
28 |     ws = Workspace.from_config()
29 |     print('Loaded workspace', ws.name)
30 | 
31 |     # Reference the experiment
32 |     experiment = Experiment(workspace=ws, name=experiment_name)
33 |     print('Logging to experiment', experiment_name)
34 | 
35 |     # Create the RunConfiguration that will be used
36 |     arguments = [
37 |         '--output-dir', "outputs",
38 |         '--kernel', kernal,
39 |         '--penalty', penalty,
40 |     ]
41 |     script_run_config = ScriptRunConfig(source_directory='.',
42 |                                         script='train.py',
43 |                                         arguments=arguments)
44 | 
45 |     # As we will run locally we can use our existing python environment
46 |     script_run_config.run_config.environment. \
47 |         python.user_managed_dependencies = True
48 | 
49 |     # Submit the experiment to get a run and wait for completion
50 |     run = experiment.submit(script_run_config)
51 |     print('Submitted please wait...')
52 |     run.wait_for_completion(show_output=True)
53 | 
54 |     # register the trained model
55 |     model = run.register_model(
56 |         model_name='{{cookiecutter.mlops_name}}',
57 |         model_path='outputs/model/{{cookiecutter.mlops_name}}.joblib')
58 | 
59 |     print('Run number:', run.number)
60 |     print('Run id:', run.id)
61 |     print("Run details are available at:", run.get_portal_url())
62 |     print("Model: {} v{}".format(model.name, model.version))
63 | 
64 |     if 'azureml.git.dirty' in run.properties:
65 |         if run.properties['azureml.git.dirty']:
66 |             print("WARNNG: You have uncomitted changes. To ensure "
67 |                   "reproducability check in your code before you train.")
68 |     else:
69 |         print('WARNNG: To ensure reproducability you should be using git!')
70 | 
71 | 
72 | def main(arguments: list):
73 |     parser = argparse.ArgumentParser(
74 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
75 | 
76 |     # environment parameters
77 |     parser.add_argument('--experiment', type=str,
78 |                         default='{{cookiecutter.mlops_name}}-local',
79 |                         help='The name of the Azure ML Experiment')
80 | 
81 |     # training specific parameters
82 |     parser.add_argument('--kernel', type=str, default='linear',
83 |                         help='Kernel type to be used in the algorithm')
84 |     parser.add_argument('--penalty', type=float, default=1.0,
85 |                         help='Penalty parameter of the error term')
86 | 
87 |     # parse the arguments
88 |     args = parser.parse_args(arguments)
89 | 
90 |     # submit the job
91 |     submit(args.experiment,
92 |            args.kernel,
93 |            args.penalty)
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     sys.exit(main(sys.argv[1:]))
98 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/scripts/train/submit-train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Train a model remotely using Azure ML compute.
  4 | 
  5 | This will re-use the current python environment.
  6 | 
  7 | Argv:
  8 |     output-dir: A folder to store any output to
  9 |     kernel: Kernel type to be used in the algorithm
 10 |     penalty: Penalty parameter of the error term
 11 | """
 12 | import argparse
 13 | import sys
 14 | 
 15 | import azureml.core
 16 | from azureml.core import Experiment, Workspace
 17 | from azureml.train.sklearn import SKLearn
 18 | 
 19 | 
 20 | def submit(experiment_name: str,
 21 |            compute_name: str,
 22 |            kernal: str,
 23 |            penalty: float):
 24 | 
 25 |     print("This notebook was created using version 1.0.83 of the Azure ML SDK")
 26 |     print("You are using version", azureml.core.VERSION, "of the SDK")
 27 | 
 28 |     # Get a reference to the workspace. Be sure to download the config.json
 29 |     # from your workspace and place in the parent folder.
 30 |     ws = Workspace.from_config()
 31 |     print('Loaded workspace', ws.name)
 32 | 
 33 |     # Reference the experiment
 34 |     experiment = Experiment(workspace=ws, name=experiment_name)
 35 |     print('Logging to experiment', experiment_name)
 36 | 
 37 |     # Get a reference to an existing the compute target.
 38 |     compute_target = ws.compute_targets[compute_name]
 39 | 
 40 |     # Setup an Estimator for submitting the job. An Estimator further wraps
 41 |     # RunConfig with additional configuration for specific cases. There are
 42 |     # Estimators provided for many common runtimes such as PyTorch and
 43 |     # Tensorflow. In this case we use the SKLearn specific estimator.
 44 |     script_params = {
 45 |         '--output-dir': "outputs",
 46 |         '--kernel': kernal,
 47 |         '--penalty': penalty,
 48 |     }
 49 | 
 50 |     # NOTE: scikit-learn added below until default image includes v22.1+
 51 |     estimator = SKLearn(source_directory=".",
 52 |                         entry_script='train.py',
 53 |                         script_params=script_params,
 54 |                         compute_target=compute_target,
 55 |                         pip_packages=['matplotlib', 'scikit-learn'])
 56 | 
 57 |     # Submit the experiment to get a run and wait for completion
 58 |     run = experiment.submit(estimator)
 59 |     print('Submitted please wait...')
 60 |     run.wait_for_completion(show_output=True)
 61 | 
 62 |     # register the trained model
 63 |     model = run.register_model(
 64 |         model_name='{{cookiecutter.mlops_name}}',
 65 |         model_path='outputs/model/{{cookiecutter.mlops_name}}.joblib')
 66 | 
 67 |     print('Run number:', run.number)
 68 |     print('Run id:', run.id)
 69 |     print("Run details are available at:", run.get_portal_url())
 70 |     print("Model: {} v{}".format(model.name, model.version))
 71 | 
 72 |     if 'azureml.git.dirty' in run.properties:
 73 |         if run.properties['azureml.git.dirty']:
 74 |             print("WARNNG: You have uncomitted changes. To ensure "
 75 |                   "reproducability check in your code before you train.")
 76 |     else:
 77 |         print('WARNNG: To ensure reproducability you should be using git!')
 78 | 
 79 | 
 80 | def main(arguments: list):
 81 |     parser = argparse.ArgumentParser(
 82 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 83 | 
 84 |     # environment parameters
 85 |     parser.add_argument('--experiment', type=str,
 86 |                         default='{{cookiecutter.mlops_name}}',
 87 |                         help='The name of the Azure ML Experiment')
 88 |     parser.add_argument('--compute-name', type=str,
 89 |                         default='{{cookiecutter.mlops_compute_name}}',
 90 |                         help='The name of the Azure ML compute cluster')
 91 | 
 92 |     # training specific parameters
 93 |     parser.add_argument('--kernel', type=str, default='linear',
 94 |                         help='Kernel type to be used in the algorithm')
 95 |     parser.add_argument('--penalty', type=float, default=1.0,
 96 |                         help='Penalty parameter of the error term')
 97 | 
 98 |     # parse the arguments
 99 |     args = parser.parse_args(arguments)
100 | 
101 |     # submit the job
102 |     submit(args.experiment,
103 |            args.compute_name,
104 |            args.kernel,
105 |            args.penalty)
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     sys.exit(main(sys.argv[1:]))
110 | 


--------------------------------------------------------------------------------
/tests/test_create.py:
--------------------------------------------------------------------------------
  1 | # Some original code Copyright (c) Audrey Roy Greenfeld and individual contributors -
  2 | # see https://github.com/audreyr/cookiecutter-pypackage/blob/master/LICENSE
  3 | 
  4 | from contextlib import contextmanager
  5 | import shlex
  6 | import os
  7 | import subprocess
  8 | import datetime
  9 | from cookiecutter.utils import rmtree
 10 | 
 11 | 
 12 | @contextmanager
 13 | def inside_dir(dir_path):
 14 |     """
 15 |     Execute code from inside the given directory
 16 |     :param dir_path: String, path of the directory the command is being run.
 17 |     """
 18 |     old_path = os.getcwd()
 19 |     try:
 20 |         os.chdir(dir_path)
 21 |         yield
 22 |     finally:
 23 |         os.chdir(old_path)
 24 | 
 25 | 
 26 | @contextmanager
 27 | def bake_in_temp_dir(cookies, *args, **kwargs):
 28 |     """
 29 |     Delete the temporal directory that is created when executing the tests
 30 |     :param cookies: pytest_cookies.Cookies,
 31 |         cookie to be baked and its temporal files will be removed
 32 |     """
 33 |     result = cookies.bake(*args, **kwargs)
 34 |     try:
 35 |         yield result
 36 |     finally:
 37 |         rmtree(str(result.project))
 38 | 
 39 | 
 40 | def run_inside_dir(command, dir_path):
 41 |     """
 42 |     Run a command from inside a given directory, returning the exit status
 43 |     :param command: Command that will be executed
 44 |     :param dir_path: String, path of the directory the command is being run.
 45 |     """
 46 |     with inside_dir(dir_path):
 47 |         return subprocess.check_call(shlex.split(command))
 48 | 
 49 | 
 50 | def check_output_inside_dir(command, dir_path):
 51 |     """Run a command from inside a given directory, returning the command output"""
 52 |     with inside_dir(dir_path):
 53 |         return subprocess.check_output(shlex.split(command))
 54 | 
 55 | 
 56 | def test_bake_with_defaults(cookies):
 57 |     with bake_in_temp_dir(cookies) as result:
 58 |         assert result.project.isdir()
 59 |         assert result.exit_code == 0
 60 |         assert result.exception is None
 61 | 
 62 |         found_top_level_files = [f.basename for f in result.project.listdir()]
 63 |         assert '.gitignore' in found_top_level_files
 64 |         assert 'conda_env.yml' in found_top_level_files
 65 |         assert 'README.md' in found_top_level_files
 66 |         assert 'setup.py' in found_top_level_files
 67 | 
 68 |         assert os.path.isdir(os.path.join(result.project, 'src', 'project_name'))
 69 |         assert os.path.isdir(os.path.join(result.project, 'tests', 'project_name'))
 70 | 
 71 | 
 72 | def test_bake_and_run_tests(cookies):
 73 |     with bake_in_temp_dir(cookies) as result:
 74 |         assert result.project.isdir()
 75 |         assert run_inside_dir('python setup.py pytest', str(result.project)) == 0
 76 |         print("test_bake_and_run_tests path", str(result.project))
 77 | 
 78 | 
 79 | def test_bake_selecting_license(cookies):
 80 |     license_strings = {
 81 |         'MIT': ('MIT ', 'MIT License', True),
 82 |         'LGPL3': ('GNU LESSER GENERAL PUBLIC LICENSE', 'GNU Lesser General Public License v3 (LGPLv3)', False),
 83 |         'GPL3': ('GNU GENERAL PUBLIC LICENSE', 'GNU General Public License v3 (GPLv3)', False)
 84 |     }
 85 |     for project_license, (license_subtext, setup_subtext, should_contain_year) in license_strings.items():
 86 |         with bake_in_temp_dir(cookies, extra_context={'open_source_license': project_license}) as result:
 87 |             license_file_path = result.project.join('LICENSE')
 88 |             print(license_file_path)
 89 |             assert license_subtext in license_file_path.read()
 90 |             if should_contain_year:
 91 |                 now = datetime.datetime.now()
 92 |                 assert str(now.year) in license_file_path.read()
 93 |             assert setup_subtext in result.project.join('setup.py').read()
 94 | 
 95 | 
 96 | def test_bake_not_open_source(cookies):
 97 |     with bake_in_temp_dir(cookies, extra_context={'open_source_license': 'Not open source'}) as result:
 98 |         found_top_level_files = [f.basename for f in result.project.listdir()]
 99 |         assert 'setup.py' in found_top_level_files
100 |         assert 'LICENSE' not in found_top_level_files
101 | 
102 | 
103 | def test_bake_package_name(cookies):
104 |     with bake_in_temp_dir(cookies, extra_context={'package_name': 'my_package'}) as result:
105 |         with inside_dir(result.project):
106 |             assert os.path.isdir(os.path.join('src', 'my_package'))
107 |             assert os.path.isdir(os.path.join('tests', 'my_package'))
108 | 
109 |             'from my_package import examplemodule' in \
110 |                 open(os.path.join('tests', 'my_package', 'examplemodule', 'test_add_value_to_numpy.py')).read()
111 |             'from my_package import examplemodule' in \
112 |                 open(os.path.join('tests', 'my_package', 'examplemodule', 'test_hello_world.py')).read()
113 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/scripts/train/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Train a model.
  4 | 
  5 | Argv:
  6 |     output-dir: A folder to store any output to
  7 |     kernel: Kernel type to be used in the algorithm
  8 |     penalty: Penalty parameter of the error term
  9 | """
 10 | import argparse
 11 | import joblib
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | import os
 15 | import sys
 16 | 
 17 | from sklearn import datasets
 18 | from sklearn.metrics import classification_report
 19 | from sklearn.metrics import plot_confusion_matrix
 20 | from sklearn.model_selection import train_test_split
 21 | from sklearn.model_selection import cross_val_score
 22 | from sklearn.model_selection import StratifiedKFold
 23 | from sklearn.svm import SVC
 24 | 
 25 | from amlrun import get_AMLRun
 26 | 
 27 | 
 28 | def train(output_dir='outputs', kernel='linear', penalty=1.0):
 29 |     # make sure output directory exist
 30 |     os.makedirs(output_dir, exist_ok=True)
 31 | 
 32 |     # Safely get the Azure ML run
 33 |     run = get_AMLRun()
 34 | 
 35 |     # loading the iris dataset
 36 |     iris = datasets.load_iris()
 37 | 
 38 |     # X -> features, y -> label
 39 |     X = iris.data
 40 |     y = iris.target
 41 |     class_names = iris.target_names
 42 | 
 43 |     # dividing X, y into train and test data. Random seed for reproducability
 44 |     X_train, X_test, y_train, y_test = \
 45 |         train_test_split(X, y, test_size=0.20, random_state=0)
 46 | 
 47 |     # create our model - a linear SVM classifier
 48 |     svm_model_linear = SVC(kernel=kernel, C=penalty)
 49 | 
 50 |     # evaluate each model in turn
 51 |     kfold = StratifiedKFold(n_splits=10, random_state=1)
 52 |     cv_results = cross_val_score(svm_model_linear, X_train, y_train,
 53 |                                  cv=kfold, scoring='accuracy')
 54 | 
 55 |     print('Cross Validation Mean: ', cv_results.mean())
 56 |     print('Cross Validation Std: ', cv_results.std())
 57 |     if run is not None:
 58 |         run.log_list('Cross Validation Accuracies', cv_results)
 59 |         run.log('Cross Validation Mean', cv_results.mean())
 60 |         run.log('Cross Validation Std', cv_results.std())
 61 | 
 62 |     # now training on the full dataset
 63 |     svm_model_linear.fit(X_train, y_train)
 64 |     y_pred = svm_model_linear.predict(X_test)
 65 | 
 66 |     # model accuracy for X_test
 67 |     accuracy = svm_model_linear.score(X_test, y_test)
 68 |     print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))
 69 |     if run is not None:
 70 |         run.log('Accuracy', np.float(accuracy))
 71 | 
 72 |     # Plot non-normalized confusion matrix
 73 |     title = 'Test confusion matrix'
 74 |     disp = plot_confusion_matrix(svm_model_linear, X_test, y_test,
 75 |                                  display_labels=class_names,
 76 |                                  cmap=plt.cm.Blues)
 77 |     disp.ax_.set_title(title)
 78 |     print(title)
 79 |     print(disp.confusion_matrix)
 80 | 
 81 |     if run is not None:
 82 |         run.log_image(title, plot=plt)
 83 |     else:
 84 |         plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
 85 | 
 86 |     # Plot normalized confusion matrix
 87 |     title = 'Normalized test confusion matrix'
 88 |     disp = plot_confusion_matrix(svm_model_linear, X_test, y_test,
 89 |                                  display_labels=class_names,
 90 |                                  cmap=plt.cm.Blues,
 91 |                                  normalize='true')
 92 |     disp.ax_.set_title(title)
 93 |     print(title)
 94 |     print(disp.confusion_matrix)
 95 | 
 96 |     if run is not None:
 97 |         run.log_image(title,  plot=plt)
 98 |     else:
 99 |         plt.savefig(
100 |             os.path.join(output_dir, 'confusion_matrix_normalised.png'))
101 | 
102 |     # Print classification report
103 |     print(classification_report(y_test, y_pred))
104 | 
105 |     # files saved in the "outputs" folder are automatically uploaded into
106 |     # Azure ML Service run history
107 |     model_folder = os.path.join(output_dir, 'model')
108 |     model_path = os.path.join(model_folder, '{{cookiecutter.mlops_name}}.joblib')
109 |     os.makedirs(model_folder, exist_ok=True)
110 |     joblib.dump(svm_model_linear, model_path)
111 |     print('Output saved to', output_dir)
112 | 
113 | 
114 | def main(arguments):
115 |     parser = argparse.ArgumentParser(
116 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
117 |     # environment parameters
118 |     # parser.add_argument(
119 |     #     '--data-folder',
120 |     #     help="local path to training data",
121 |     #     required=True
122 |     # )
123 | 
124 |     parser.add_argument(
125 |         "--output-dir", type=str,
126 |         default=os.path.join('..', '..', 'data', 'training', 'outputs'),
127 |         help='location to write output'
128 |     )
129 | 
130 |     # training specific parameters
131 |     parser.add_argument('--kernel', type=str, default='linear',
132 |                         help='Kernel type to be used in the algorithm')
133 |     parser.add_argument('--penalty', type=float, default=1.0,
134 |                         help='Penalty parameter of the error term')
135 | 
136 |     # parse the arguments
137 |     args = parser.parse_args(arguments)
138 | 
139 |     # setup output directory
140 |     # model_output_dir = os.path.join(
141 |     #     os.path.dirname(os.path.realpath(__file__)),
142 |     #     args.output_dir)
143 |     # os.makedirs(args.output-dir, exist_ok=True)
144 | 
145 |     train(args.output_dir, args.kernel, args.penalty)
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     sys.exit(main(sys.argv[1:]))
150 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/writeup/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # http://www.sphinx-doc.org/en/master/config
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | sys.path.insert(0, os.path.abspath(os.path.join('..', '..', 'src')))
 16 | 
 17 | 
 18 | # -- Project information -----------------------------------------------------
 19 | 
 20 | project = '{{cookiecutter.project_name}}'
 21 | copyright = 'Equinor'
 22 | author = '{{cookiecutter.project_name}}'
 23 | 
 24 | # The full version, including alpha/beta/rc tags
 25 | release = 'version'
 26 | 
 27 | 
 28 | # -- General configuration ---------------------------------------------------
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = [
 34 |     'sphinx.ext.autosummary',
 35 |     'sphinx.ext.napoleon',
 36 |     'm2r'
 37 | ]
 38 | 
 39 | # Add any paths that contain templates here, relative to this directory.
 40 | templates_path = ['_templates']
 41 | 
 42 | # List of patterns, relative to source directory, that match files and
 43 | # directories to ignore when looking for source files.
 44 | # This pattern also affects html_static_path and html_extra_path.
 45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 46 | 
 47 | # Extensions that recomonmark will use
 48 | source_suffix = {
 49 |     '.rst': 'restructuredtext',
 50 |     '.txt': 'markdown',
 51 |     '.md': 'markdown',
 52 | }
 53 | 
 54 | # Auto generate API documentation for modules
 55 | autosummary_generate = True
 56 | 
 57 | # Default flags used by autodoc directives
 58 | autodoc_default_options = {
 59 |     'members': True,
 60 |     'member-order': 'bysource',
 61 |     'special-members': '__init__',
 62 | }
 63 | 
 64 | # -- Options for HTML output -------------------------------------------------
 65 | 
 66 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 67 | # a list of builtin themes.
 68 | #
 69 | # html_theme = 'sphinx_rtd_theme'
 70 | html_theme = 'alabaster'
 71 | 
 72 | # Theme options are theme-specific and customize the look and feel of a theme
 73 | # further.  For a list of options available for each theme, see the
 74 | # documentation.
 75 | # alabaster theme options: https://alabaster.readthedocs.io/en/latest/customization.html
 76 | html_theme_options = {
 77 |     "description": "{{cookiecutter.project_description}}",
 78 |     "extra_nav_links": {
 79 |         "Index": "genindex.html",
 80 |         "Module Index": "py-modindex.html",
 81 |         "Search Page": "search.html"
 82 |     },
 83 |     "github_banner": False,
 84 |     "note_bg": "#FFF59C",
 85 |     "show_powered_by": False,
 86 |     "show_related": False,
 87 |     "sidebar_collapse": False,
 88 | }
 89 | 
 90 | # Custom sidebar templates (often theme specific), maps document names to template names.
 91 | # alabaster options: https://alabaster.readthedocs.io/en/latest/customization.html
 92 | html_sidebars = {
 93 |     "index": [
 94 |         "about.html",
 95 |         "navigation.html",
 96 |         "searchbox.html"
 97 |     ],
 98 |     "**": [
 99 |         "about.html",
100 |         'navigation.html',
101 |         "searchbox.html"
102 |     ],
103 | }
104 | 
105 | # Add any paths that contain custom themes here, relative to this directory.
106 | # html_theme_path = []
107 | 
108 | # The name for this set of Sphinx documents.  If None, it defaults to
109 | # "<project> v<release> documentation".
110 | # html_title = None
111 | 
112 | # A shorter title for the navigation bar.  Default is the same as html_title.
113 | # html_short_title = None
114 | 
115 | # The name of an image file (relative to this directory) to place at the top
116 | # of the sidebar.
117 | # html_logo = None
118 | 
119 | # The name of an image file (within the static path) to use as favicon of the
120 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
121 | # pixels large.
122 | # html_favicon = None
123 | 
124 | # Add any paths that contain custom static files (such as style sheets) here,
125 | # relative to this directory. They are copied after the builtin static files,
126 | # so a file named "default.css" will overwrite the builtin "default.css".
127 | html_static_path = [
128 |     # "_static"
129 | ]
130 | 
131 | # Add any extra paths that contain custom files (such as robots.txt or
132 | # .htaccess) here, relative to this directory. These files are copied
133 | # directly to the root of the documentation.
134 | # html_extra_path = []
135 | 
136 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
137 | # using the given strftime format.
138 | # html_last_updated_fmt = '%b %d, %Y'
139 | 
140 | # If true, SmartyPants will be used to convert quotes and dashes to
141 | # typographically correct entities.
142 | html_use_smartypants = False
143 | 
144 | # Additional templates that should be rendered to pages, maps page names to
145 | # template names.
146 | # html_additional_pages = {}
147 | 
148 | # If false, no module index is generated.
149 | # html_domain_indices = True
150 | 
151 | # If false, no index is generated.
152 | # html_use_index = True
153 | 
154 | # If true, the index is split into individual pages for each letter.
155 | # html_split_index = False
156 | 
157 | # If true, links to the reST sources are added to the pages.
158 | html_show_sourcelink = False
159 | 
160 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
161 | html_show_sphinx = False
162 | 
163 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
164 | html_show_copyright = True
165 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/notebooks/example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Example Notebook\n",
  8 |     "\n",
  9 |     "This is an example notebook.\n",
 10 |     "\n",
 11 |     "Modify / remove any of the below as suited for your needs"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "# Standard python packages\n",
 28 |     "import os\n",
 29 |     "import sys\n",
 30 |     "\n",
 31 |     "# Other package imports\n",
 32 |     "# import numpy as np\n",
 33 |     "# import pandas as pd\n",
 34 |     "# from matplotlib import pyplot as plt"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "Setup some global settings and configuration"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))\n",
 51 |     "data_folder = os.path.join(project_root, 'data')\n",
 52 |     "data_folder_raw = os.path.join(data_folder, 'raw')\n",
 53 |     "src_folder = os.path.join(project_root, 'src')"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "This notebook uses the shared package however first we need to ensure it is available (otherwise you get an error about the module not being found). You can either run setup.py as discussed in the readme to install the package or modify the path to include the src folder."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 3,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "# Explicitly set path so don't need to run setup.py - if we have multiple copies of \n",
 70 |     "# the code we would otherwise need to setup a seperate environment for each to\n",
 71 |     "# ensure the code pointers are correct.\n",
 72 |     "sys.path.insert(0, src_folder)\n",
 73 |     "\n",
 74 |     "from {{cookiecutter.package_name}} import examplemodule"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## Some Processing"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 4,
 87 |    "metadata": {},
 88 |    "outputs": [
 89 |     {
 90 |      "data": {
 91 |       "text/plain": [
 92 |        "'Hello World'"
 93 |       ]
 94 |      },
 95 |      "execution_count": 4,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "# Use our package\n",
102 |     "examplemodule.hello_world()"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "## Appendix 1 - Environment Configuration"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 5,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stdout",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "D:\\Development\\DataScience\\Projects\\DataScienceTemplate\\notebooks\\eda\n",
122 |       "3.6.4 |Anaconda custom (64-bit)| (default, Mar 12 2018, 20:20:50) [MSC v.1900 64 bit (AMD64)]\n",
123 |       "C:\\Applications\\Miniconda3\\envs\\anaconda\\python.exe\n",
124 |       "['D:\\\\Development\\\\DataScience\\\\Projects\\\\DataScienceTemplate\\\\src', '', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\python36.zip', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\DLLs', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages', 'd:\\\\development\\\\datascience\\\\projects\\\\data-science-shared\\\\python', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\xgboost-0.7-py3.6.egg', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\win32', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\win32\\\\lib', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\Pythonwin', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\mark_\\\\.ipython']\n"
125 |      ]
126 |     }
127 |    ],
128 |    "source": [
129 |     "print (os.getcwd())\n",
130 |     "print (sys.version)\n",
131 |     "print (sys.executable)\n",
132 |     "print (sys.path)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "## Appendix 2 - Automated Tests"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 6,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "============================= test session starts =============================\n",
152 |       "platform win32 -- Python 3.6.4, pytest-3.4.2, py-1.5.2, pluggy-0.6.0\n",
153 |       "rootdir: D:\\Development\\DataScience\\Projects\\DataScienceTemplate, inifile:\n",
154 |       "plugins: remotedata-0.2.0, openfiles-0.2.0, doctestplus-0.1.2, cov-2.5.1, arraydiff-0.2\n",
155 |       "collected 5 items\n",
156 |       "\n",
157 |       "tests\\examplepackage\\examplemodule\\test_add_value_to_numpy.py ...        [ 60%]\n",
158 |       "tests\\examplepackage\\examplemodule\\test_hello_world.py ..                [100%]\n",
159 |       "\n",
160 |       "========================== 5 passed in 0.37 seconds ===========================\n"
161 |      ]
162 |     }
163 |    ],
164 |    "source": [
165 |     "# Run tests within notebook\n",
166 |     "f_path = os.getcwd()\n",
167 |     "os.chdir(os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir)))\n",
168 |     "\n",
169 |     "# Run pytest from the repository root\n",
170 |     "!pytest\n",
171 |     "\n",
172 |     "os.chdir(f_path)"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "kernelspec": {
178 |    "display_name": "Python 3",
179 |    "language": "python",
180 |    "name": "python3"
181 |   },
182 |   "language_info": {
183 |    "codemirror_mode": {
184 |     "name": "ipython",
185 |     "version": 3
186 |    },
187 |    "file_extension": ".py",
188 |    "mimetype": "text/x-python",
189 |    "name": "python",
190 |    "nbconvert_exporter": "python",
191 |    "pygments_lexer": "ipython3",
192 |    "version": "3.6.4"
193 |   }
194 |  },
195 |  "nbformat": 4,
196 |  "nbformat_minor": 2
197 | }
198 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/data_science_code_of_conduct.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Authorship and validity
 4 | This code of conduct is derived from the “Data Science Code of Professional Conduct” written by the Data Science Association. The full version is available at [http://www.datascienceassn.org/code-of-conduct.html](http://www.datascienceassn.org/code-of-conduct.html). For definitions of the terminology in this code of conduct please consult the full version.
 5 | 
 6 | This code of conduct attempts to distill the fuller code to a more manageable level, a code which is easier to communicate whilst (hopefully) still retaining the spirit of the Data Association’s code of conduct. We have done this through highlighting what we believe to be the most important points for data science in Equinor. Points conspicuous in their absence should not be assumed to be invalid. The full code of conduct is embraced in its entirety.
 7 | 
 8 | The aim of this code of conduct is to help ensure the quality of Data Science carried out at Equinor is at least in accordance with the guidelines set out below. This includes the work of internal Data Scientists, other internal resources and external partners and vendors. Indeed, all data science which is delivered to Equinor.
 9 | 
10 | Key tenets are competence, honesty, scientific integrity and repeatability.
11 | 
12 | The rules detailed here are intended to augment relevant laws and regulations which are valid in the local where the data science work is being delivered (e.g. the European Union’s General Data Protection Regulation). In the event of any perceived conflict between this code of conduct and local rules and regulations it is the local rules and regulations which take precedence. 
13 | 
14 | ## Success criteria and validation
15 | 
16 | * **Rule 0:** The data scientist shall, in collaboration with the client, develop success criteria for the purposes of establishing metrics by which the quality of the delivery can be assessed. Success criteria shall:
17 |   * **a)**	As much as possible, be based on objective, industry accepted quantitative metrics such as precision and recall.
18 |   * **b)**	Be agreed as early as possible in the data science delivery
19 |   * **c)**	Remain unchanged throughout the delivery, unless it is agreed with the client that the success criteria are not appropriate
20 |   * **d)**	Not be repeatedly adjusted in light of unfavourable evidence. When re-establishing success criteria (as of rule 0c), great care must be taken to ensure that this will change will support the quality of the delivery
21 |   * **e)**	Where quantitative metrics cannot be agreed due to the nature of the problem, e.g. language models, anomaly detection with low rate of known anomalies, an honest assessment of the quality of the delivery is still required
22 |   * **f)**	Success criteria should focus on generalisation of models and workflows.
23 | 
24 | ## Client Communication
25 | 
26 | * **Rule 1:** A data scientist shall consult with the client about any real, perceived and potentially hidden risks in relying on data science results.
27 | 
28 | * **Rule 2:** A data scientist shall explain data science results to the extent reasonably necessary to permit the client to make informed decisions regarding the data science.
29 | Confidential Information
30 | 
31 | * **Rule 3:** Confidential information is information that the data scientist creates, develops, receives, uses or learns during employment as a data scientist for a client, either working directly in-house as an employee of an organization or as an independent professional. It includes information that is not generally known by the public about the client, including client affiliates, employees, customers or other parties with whom the client has a relationship and who have an expectation of confidentiality. The data scientist has a professional duty to protect all confidential information, regardless of its form or format, from the time of its creation or receipt until its authorised disposal.
32 | 
33 | * **Rule 4:** A data scientist shall make reasonable efforts to prevent the inadvertent or unauthorized disclosure of, or unauthorized access to, information relating to the representation of a client, which means:
34 |   * **a)** Not displaying, reviewing or discussing confidential information in public places, in the presence of third parties or that may be overheard;
35 |   * **b)** Not e-mailing confidential information outside of the organization or professional practice to a personal e-mail account or otherwise removing confidential information from the client by removing hard copies or copying it to any form of recordable digital media device; and
36 |   * **c)** Communicating confidential information only to client employees and authorized agents (such as legal professionals or external auditors) who have a legitimate business reason to know the information.
37 | 
38 | * **Rule 5:** A data scientist shall comply with client policies that apply to the acceptance, proper use and handling of confidential information, as well as any written agreements between the data scientist and the client relating to confidential information.
39 | 
40 | * **Rule 6:** A data scientist shall protect client confidential information after termination of work for the client.
41 | * **Rule 7:** A data scientist shall return any and all confidential information in possession or control upon termination of the data scientist - client relationship and, if requested, execute an affidavit affirming compliance with obligations relating to confidential information.
42 | 
43 | ## Data Science Evidence, Quality of Data and Quality of Evidence
44 | 
45 | * **Rule 8:** A data scientist shall inform the client of all data science results and material facts known to the data scientist that will enable the client to make informed decisions, whether or not the data science evidence is adverse.
46 | 
47 | * **Rule 9:** The data scientist understands that bad or uncertain data quality may compromise data science professional practice and may communicate a false reality or promote an illusion of understanding. The data scientist shall take reasonable measures to protect the client from relying and making decisions based on bad or uncertain data quality.
48 | 
49 | * **Rule 10:** The data scientist understands that evidence may be weak or strong or uncertain and shall take reasonable measures to protect the client from relying and making decisions based on weak or uncertain evidence.
50 | 
51 | * **Rule 11:** A data scientist shall not knowingly:
52 |   * **a)** fail to use scientific methods in performing data science;
53 |   * **b)** fail to convey the quality of evidence in a reasonable and understandable manner for the client;
54 |   * **c)** claim weak or uncertain evidence is strong evidence;
55 |   * **d)** misuse weak or uncertain evidence to communicate a false reality or promote an illusion of understanding;
56 |   * **e)** fail to convey the quality of data in a reasonable and understandable manner for the client;
57 |   * **f)** claim bad or uncertain data quality is good data quality;
58 |   * **g)** misuse bad or uncertain data quality to communicate a false reality or promote an illusion of understanding;
59 |   * **h)** engage in cherry-picking (pointing to individual cases or data that seem to confirm a particular position, while ignoring a significant portion of related cases or data that may contradict that position of data or data science evidence);
60 |   * **i)** fail to attempt to replicate data science results;
61 |   * **j)** fail to disclose that data science results could not be replicated;
62 |   * **k)** misuse data science results to communicate a false reality or promote an illusion of understanding;
63 |   * **l)** fail to disclose failed experiments or disconfirming evidence known to the data scientist to be directly adverse to the position of the client;
64 |   * **m)** offer evidence that the data scientist knows to be false. If a data scientist questions the quality of data or evidence the data scientist must disclose this to the client. If a data scientist has offered material evidence and the data scientist comes to know of its falsity, the data scientist shall take reasonable remedial measures, including disclosure to the client. A data scientist may disclose and label evidence the data scientist reasonably believes is false.
65 | 
66 | * **Rule 12:** A data scientist shall use reasonable diligence when assigning value and meaning to the following concepts when conducting data science:
67 |   * **a)** "Statistically Significant"
68 |   * **b)** "Correlation"
69 |   * **c)** "Spurious Correlation"
70 |   * **d)** "Causation"
71 | 
72 | * **Rule 13:** A data scientist shall not present incomplete evidence as real data science evidence. A data scientist may present a theory constituting incomplete evidence but shall label and clearly communicate the use of incomplete evidence.
73 | 
74 | * **Rule 14:** A data scientist shall use the data science method which consists of the following steps:
75 |   * **a)** Careful observations of data, data sets and relationships between data;
76 |   * **b)** Deduction of meaning from the data and different data relationships;
77 |   * **c)** Formation of hypotheses;
78 |   * **d)** Experimental or observational testing of the validity of the hypotheses. To be termed scientific, a method of inquiry must be based on empirical and measurable evidence subject to specific principles of reasoning.
79 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/.gitignore:
--------------------------------------------------------------------------------
  1 | ## The .gitignore file specifies things that git should ignore. 
  2 | ## This default template includes entries for R, Python and visual studio
  3 | 
  4 | ##
  5 | ## Add custom entries below here.
  6 | ##
  7 | scripts/config.json
  8 | 
  9 | ##
 10 | ## R Section - See https://github.com/github/gitignore/blob/master/R.gitignore
 11 | ##
 12 | 
 13 | # History files
 14 | .Rhistory
 15 | .Rapp.history
 16 | 
 17 | # Session Data files
 18 | .RData
 19 | 
 20 | # Example code in package build process
 21 | *-Ex.R
 22 | 
 23 | # Output files from R CMD build
 24 | /*.tar.gz
 25 | 
 26 | # Output files from R CMD check
 27 | /*.Rcheck/
 28 | 
 29 | # RStudio files
 30 | .Rproj.user/
 31 | 
 32 | # produced vignettes
 33 | vignettes/*.html
 34 | vignettes/*.pdf
 35 | 
 36 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 37 | .httr-oauth
 38 | 
 39 | # knitr and R markdown default cache directories
 40 | /*_cache/
 41 | /cache/
 42 | 
 43 | # Temporary files created by R markdown
 44 | *.utf8.md
 45 | *.knit.md
 46 | 
 47 | ##
 48 | ## Python Section - See https://github.com/github/gitignore/blob/master/Python.gitignore
 49 | ##
 50 | 
 51 | # PyCharm ide files
 52 | .idea
 53 | 
 54 | # Byte-compiled / optimized / DLL files
 55 | __pycache__/
 56 | *.py[cod]
 57 | *$py.class
 58 | 
 59 | # C extensions
 60 | *.so
 61 | 
 62 | # Distribution / packaging
 63 | .Python
 64 | env/
 65 | build/
 66 | develop-eggs/
 67 | dist/
 68 | downloads/
 69 | eggs/
 70 | .eggs/
 71 | lib/
 72 | lib64/
 73 | parts/
 74 | sdist/
 75 | var/
 76 | wheels/
 77 | *.egg-info/
 78 | .installed.cfg
 79 | *.egg
 80 | 
 81 | # PyInstaller
 82 | #  Usually these files are written by a python script from a template
 83 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 84 | *.manifest
 85 | *.spec
 86 | 
 87 | # Installer logs
 88 | pip-log.txt
 89 | pip-delete-this-directory.txt
 90 | 
 91 | # Unit test / coverage reports
 92 | htmlcov/
 93 | .tox/
 94 | .coverage
 95 | .coverage.*
 96 | .cache
 97 | nosetests.xml
 98 | coverage.xml
 99 | *.cover
100 | .hypothesis/
101 | 
102 | # Translations
103 | *.mo
104 | *.pot
105 | 
106 | # Django stuff:
107 | *.log
108 | local_settings.py
109 | 
110 | # Flask stuff:
111 | instance/
112 | .webassets-cache
113 | 
114 | # Scrapy stuff:
115 | .scrapy
116 | 
117 | # Sphinx documentation
118 | docs/_build/
119 | 
120 | # PyBuilder
121 | target/
122 | 
123 | # Jupyter Notebook
124 | .ipynb_checkpoints
125 | 
126 | # pyenv
127 | .python-version
128 | 
129 | # celery beat schedule file
130 | celerybeat-schedule
131 | 
132 | # SageMath parsed files
133 | *.sage.py
134 | 
135 | # dotenv
136 | .env
137 | 
138 | # virtualenv
139 | .venv
140 | venv/
141 | ENV/
142 | 
143 | # Spyder project settings
144 | .spyderproject
145 | .spyproject
146 | 
147 | # Rope project settings
148 | .ropeproject
149 | 
150 | # mkdocs documentation
151 | /site
152 | 
153 | # mypy
154 | .mypy_cache/
155 | 
156 | ## Ignore Visual Studio temporary files, build results, and
157 | ## files generated by popular Visual Studio add-ons.
158 | ##
159 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
160 | 
161 | # User-specific files
162 | *.suo
163 | *.user
164 | *.userosscache
165 | *.sln.docstates
166 | 
167 | # User-specific files (MonoDevelop/Xamarin Studio)
168 | *.userprefs
169 | 
170 | # Build results
171 | [Dd]ebug/
172 | [Dd]ebugPublic/
173 | [Rr]elease/
174 | [Rr]eleases/
175 | x64/
176 | x86/
177 | bld/
178 | [Bb]in/
179 | [Oo]bj/
180 | [Ll]og/
181 | 
182 | # Visual Studio 2015 cache/options directory
183 | .vs/
184 | # Uncomment if you have tasks that create the project's static files in wwwroot
185 | #wwwroot/
186 | 
187 | # MSTest test Results
188 | [Tt]est[Rr]esult*/
189 | [Bb]uild[Ll]og.*
190 | 
191 | # NUNIT
192 | *.VisualState.xml
193 | TestResult.xml
194 | 
195 | # Build Results of an ATL Project
196 | [Dd]ebugPS/
197 | [Rr]eleasePS/
198 | dlldata.c
199 | 
200 | # Benchmark Results
201 | BenchmarkDotNet.Artifacts/
202 | 
203 | # .NET Core
204 | project.lock.json
205 | project.fragment.lock.json
206 | artifacts/
207 | **/Properties/launchSettings.json
208 | 
209 | *_i.c
210 | *_p.c
211 | *_i.h
212 | *.ilk
213 | *.meta
214 | *.obj
215 | *.pch
216 | *.pdb
217 | *.pgc
218 | *.pgd
219 | *.rsp
220 | *.sbr
221 | *.tlb
222 | *.tli
223 | *.tlh
224 | *.tmp
225 | *.tmp_proj
226 | *.log
227 | *.vspscc
228 | *.vssscc
229 | .builds
230 | *.pidb
231 | *.svclog
232 | *.scc
233 | 
234 | # Chutzpah Test files
235 | _Chutzpah*
236 | 
237 | # Visual C++ cache files
238 | ipch/
239 | *.aps
240 | *.ncb
241 | *.opendb
242 | *.opensdf
243 | *.sdf
244 | *.cachefile
245 | *.VC.db
246 | *.VC.VC.opendb
247 | 
248 | # Visual Studio profiler
249 | *.psess
250 | *.vsp
251 | *.vspx
252 | *.sap
253 | 
254 | # Visual Studio Trace Files
255 | *.e2e
256 | 
257 | # TFS 2012 Local Workspace
258 | $tf/
259 | 
260 | # Guidance Automation Toolkit
261 | *.gpState
262 | 
263 | # ReSharper is a .NET coding add-in
264 | _ReSharper*/
265 | *.[Rr]e[Ss]harper
266 | *.DotSettings.user
267 | 
268 | # JustCode is a .NET coding add-in
269 | .JustCode
270 | 
271 | # TeamCity is a build add-in
272 | _TeamCity*
273 | 
274 | # DotCover is a Code Coverage Tool
275 | *.dotCover
276 | 
277 | # AxoCover is a Code Coverage Tool
278 | .axoCover/*
279 | !.axoCover/settings.json
280 | 
281 | # Visual Studio code coverage results
282 | *.coverage
283 | *.coveragexml
284 | 
285 | # NCrunch
286 | _NCrunch_*
287 | .*crunch*.local.xml
288 | nCrunchTemp_*
289 | 
290 | # MightyMoose
291 | *.mm.*
292 | AutoTest.Net/
293 | 
294 | # Web workbench (sass)
295 | .sass-cache/
296 | 
297 | # Installshield output folder
298 | [Ee]xpress/
299 | 
300 | # DocProject is a documentation generator add-in
301 | DocProject/buildhelp/
302 | DocProject/Help/*.HxT
303 | DocProject/Help/*.HxC
304 | DocProject/Help/*.hhc
305 | DocProject/Help/*.hhk
306 | DocProject/Help/*.hhp
307 | DocProject/Help/Html2
308 | DocProject/Help/html
309 | 
310 | # Click-Once directory
311 | publish/
312 | 
313 | # Publish Web Output
314 | *.[Pp]ublish.xml
315 | *.azurePubxml
316 | # Note: Comment the next line if you want to checkin your web deploy settings,
317 | # but database connection strings (with potential passwords) will be unencrypted
318 | *.pubxml
319 | *.publishproj
320 | 
321 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
322 | # checkin your Azure Web App publish settings, but sensitive information contained
323 | # in these scripts will be unencrypted
324 | PublishScripts/
325 | 
326 | # NuGet Packages
327 | *.nupkg
328 | # The packages folder can be ignored because of Package Restore
329 | **/[Pp]ackages/*
330 | # except build/, which is used as an MSBuild target.
331 | !**/[Pp]ackages/build/
332 | # Uncomment if necessary however generally it will be regenerated when needed
333 | #!**/[Pp]ackages/repositories.config
334 | # NuGet v3's project.json files produces more ignorable files
335 | *.nuget.props
336 | *.nuget.targets
337 | 
338 | # Microsoft Azure Build Output
339 | csx/
340 | *.build.csdef
341 | 
342 | # Microsoft Azure Emulator
343 | ecf/
344 | rcf/
345 | 
346 | # Windows Store app package directories and files
347 | AppPackages/
348 | BundleArtifacts/
349 | Package.StoreAssociation.xml
350 | _pkginfo.txt
351 | *.appx
352 | 
353 | # Visual Studio cache files
354 | # files ending in .cache can be ignored
355 | *.[Cc]ache
356 | # but keep track of directories ending in .cache
357 | !*.[Cc]ache/
358 | 
359 | # Others
360 | ClientBin/
361 | ~$*
362 | *~
363 | *.dbmdl
364 | *.dbproj.schemaview
365 | *.jfm
366 | *.pfx
367 | *.publishsettings
368 | orleans.codegen.cs
369 | 
370 | # Since there are multiple workflows, uncomment next line to ignore bower_components
371 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
372 | #bower_components/
373 | 
374 | # RIA/Silverlight projects
375 | Generated_Code/
376 | 
377 | # Backup & report files from converting an old project file
378 | # to a newer Visual Studio version. Backup files are not needed,
379 | # because we have git ;-)
380 | _UpgradeReport_Files/
381 | Backup*/
382 | UpgradeLog*.XML
383 | UpgradeLog*.htm
384 | 
385 | # SQL Server files
386 | *.mdf
387 | *.ldf
388 | *.ndf
389 | 
390 | # Business Intelligence projects
391 | *.rdl.data
392 | *.bim.layout
393 | *.bim_*.settings
394 | 
395 | # Microsoft Fakes
396 | FakesAssemblies/
397 | 
398 | # GhostDoc plugin setting file
399 | *.GhostDoc.xml
400 | 
401 | # Node.js Tools for Visual Studio
402 | .ntvs_analysis.dat
403 | node_modules/
404 | 
405 | # Typescript v1 declaration files
406 | typings/
407 | 
408 | # Visual Studio 6 build log
409 | *.plg
410 | 
411 | # Visual Studio 6 workspace options file
412 | *.opt
413 | 
414 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
415 | *.vbw
416 | 
417 | # Visual Studio LightSwitch build output
418 | **/*.HTMLClient/GeneratedArtifacts
419 | **/*.DesktopClient/GeneratedArtifacts
420 | **/*.DesktopClient/ModelManifest.xml
421 | **/*.Server/GeneratedArtifacts
422 | **/*.Server/ModelManifest.xml
423 | _Pvt_Extensions
424 | 
425 | # Paket dependency manager
426 | .paket/paket.exe
427 | paket-files/
428 | 
429 | # FAKE - F# Make
430 | .fake/
431 | 
432 | # JetBrains Rider
433 | .idea/
434 | *.sln.iml
435 | 
436 | # CodeRush
437 | .cr/
438 | 
439 | # Python Tools for Visual Studio (PTVS)
440 | __pycache__/
441 | *.pyc
442 | 
443 | # Cake - Uncomment if you are using it
444 | # tools/**
445 | # !tools/packages.config
446 | 
447 | # Tabs Studio
448 | *.tss
449 | 
450 | # Telerik's JustMock configuration file
451 | *.jmconfig
452 | 
453 | # BizTalk build output
454 | *.btp.cs
455 | *.btm.cs
456 | *.odx.cs
457 | *.xsd.cs
458 | 
459 | # OpenCover UI analysis results
460 | OpenCover/
461 | .cache/v/cache/lastfailed
462 | tests/.cache/v/cache/lastfailed
463 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## The .gitignore file specifies things that git should ignore. 
  2 | ## This default template includes entries for R, Python and visual studio
  3 | 
  4 | ##
  5 | ## Add custom entries below here.
  6 | ##
  7 | dst-env/
  8 | .cache/v/cache/lastfailed
  9 | tests/.cache/v/cache/lastfailed
 10 | .vscode/settings.json
 11 | 
 12 | ##
 13 | ## R Section - See https://github.com/github/gitignore/blob/master/R.gitignore
 14 | ##
 15 | 
 16 | # History files
 17 | .Rhistory
 18 | .Rapp.history
 19 | 
 20 | # Session Data files
 21 | .RData
 22 | 
 23 | # Example code in package build process
 24 | *-Ex.R
 25 | 
 26 | # Output files from R CMD build
 27 | /*.tar.gz
 28 | 
 29 | # Output files from R CMD check
 30 | /*.Rcheck/
 31 | 
 32 | # RStudio files
 33 | .Rproj.user/
 34 | 
 35 | # produced vignettes
 36 | vignettes/*.html
 37 | vignettes/*.pdf
 38 | 
 39 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 40 | .httr-oauth
 41 | 
 42 | # knitr and R markdown default cache directories
 43 | /*_cache/
 44 | /cache/
 45 | 
 46 | # Temporary files created by R markdown
 47 | *.utf8.md
 48 | *.knit.md
 49 | 
 50 | ##
 51 | ## Python Section - See https://github.com/github/gitignore/blob/master/Python.gitignore
 52 | ##
 53 | 
 54 | # PyCharm ide files
 55 | .idea
 56 | 
 57 | # Byte-compiled / optimized / DLL files
 58 | __pycache__/
 59 | *.py[cod]
 60 | *$py.class
 61 | 
 62 | # C extensions
 63 | *.so
 64 | 
 65 | # Distribution / packaging
 66 | .Python
 67 | env/
 68 | build/
 69 | develop-eggs/
 70 | dist/
 71 | downloads/
 72 | eggs/
 73 | .eggs/
 74 | lib/
 75 | lib64/
 76 | parts/
 77 | sdist/
 78 | var/
 79 | wheels/
 80 | *.egg-info/
 81 | .installed.cfg
 82 | *.egg
 83 | 
 84 | # PyInstaller
 85 | #  Usually these files are written by a python script from a template
 86 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 87 | *.manifest
 88 | *.spec
 89 | 
 90 | # Installer logs
 91 | pip-log.txt
 92 | pip-delete-this-directory.txt
 93 | 
 94 | # Unit test / coverage reports
 95 | htmlcov/
 96 | .tox/
 97 | .coverage
 98 | .coverage.*
 99 | .cache
100 | nosetests.xml
101 | coverage.xml
102 | *.cover
103 | .hypothesis/
104 | 
105 | # Translations
106 | *.mo
107 | *.pot
108 | 
109 | # Django stuff:
110 | *.log
111 | local_settings.py
112 | 
113 | # Flask stuff:
114 | instance/
115 | .webassets-cache
116 | 
117 | # Scrapy stuff:
118 | .scrapy
119 | 
120 | # Sphinx documentation
121 | docs/_build/
122 | 
123 | # PyBuilder
124 | target/
125 | 
126 | # Jupyter Notebook
127 | .ipynb_checkpoints
128 | 
129 | # pyenv
130 | .python-version
131 | 
132 | # celery beat schedule file
133 | celerybeat-schedule
134 | 
135 | # SageMath parsed files
136 | *.sage.py
137 | 
138 | # dotenv
139 | .env
140 | 
141 | # virtualenv
142 | .venv
143 | venv/
144 | ENV/
145 | 
146 | # Spyder project settings
147 | .spyderproject
148 | .spyproject
149 | 
150 | # Rope project settings
151 | .ropeproject
152 | 
153 | # mkdocs documentation
154 | /site
155 | 
156 | # mypy
157 | .mypy_cache/
158 | 
159 | ## Ignore Visual Studio temporary files, build results, and
160 | ## files generated by popular Visual Studio add-ons.
161 | ##
162 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
163 | 
164 | # User-specific files
165 | *.suo
166 | *.user
167 | *.userosscache
168 | *.sln.docstates
169 | 
170 | # User-specific files (MonoDevelop/Xamarin Studio)
171 | *.userprefs
172 | 
173 | # Build results
174 | [Dd]ebug/
175 | [Dd]ebugPublic/
176 | [Rr]elease/
177 | [Rr]eleases/
178 | x64/
179 | x86/
180 | bld/
181 | [Bb]in/
182 | [Oo]bj/
183 | [Ll]og/
184 | 
185 | # Visual Studio 2015 cache/options directory
186 | .vs/
187 | # Uncomment if you have tasks that create the project's static files in wwwroot
188 | #wwwroot/
189 | 
190 | # MSTest test Results
191 | [Tt]est[Rr]esult*/
192 | [Bb]uild[Ll]og.*
193 | 
194 | # NUNIT
195 | *.VisualState.xml
196 | TestResult.xml
197 | 
198 | # Build Results of an ATL Project
199 | [Dd]ebugPS/
200 | [Rr]eleasePS/
201 | dlldata.c
202 | 
203 | # Benchmark Results
204 | BenchmarkDotNet.Artifacts/
205 | 
206 | # .NET Core
207 | project.lock.json
208 | project.fragment.lock.json
209 | artifacts/
210 | **/Properties/launchSettings.json
211 | 
212 | *_i.c
213 | *_p.c
214 | *_i.h
215 | *.ilk
216 | *.meta
217 | *.obj
218 | *.pch
219 | *.pdb
220 | *.pgc
221 | *.pgd
222 | *.rsp
223 | *.sbr
224 | *.tlb
225 | *.tli
226 | *.tlh
227 | *.tmp
228 | *.tmp_proj
229 | *.log
230 | *.vspscc
231 | *.vssscc
232 | .builds
233 | *.pidb
234 | *.svclog
235 | *.scc
236 | 
237 | # Chutzpah Test files
238 | _Chutzpah*
239 | 
240 | # Visual C++ cache files
241 | ipch/
242 | *.aps
243 | *.ncb
244 | *.opendb
245 | *.opensdf
246 | *.sdf
247 | *.cachefile
248 | *.VC.db
249 | *.VC.VC.opendb
250 | 
251 | # Visual Studio profiler
252 | *.psess
253 | *.vsp
254 | *.vspx
255 | *.sap
256 | 
257 | # Visual Studio Trace Files
258 | *.e2e
259 | 
260 | # TFS 2012 Local Workspace
261 | $tf/
262 | 
263 | # Guidance Automation Toolkit
264 | *.gpState
265 | 
266 | # ReSharper is a .NET coding add-in
267 | _ReSharper*/
268 | *.[Rr]e[Ss]harper
269 | *.DotSettings.user
270 | 
271 | # JustCode is a .NET coding add-in
272 | .JustCode
273 | 
274 | # TeamCity is a build add-in
275 | _TeamCity*
276 | 
277 | # DotCover is a Code Coverage Tool
278 | *.dotCover
279 | 
280 | # AxoCover is a Code Coverage Tool
281 | .axoCover/*
282 | !.axoCover/settings.json
283 | 
284 | # Visual Studio code coverage results
285 | *.coverage
286 | *.coveragexml
287 | 
288 | # NCrunch
289 | _NCrunch_*
290 | .*crunch*.local.xml
291 | nCrunchTemp_*
292 | 
293 | # MightyMoose
294 | *.mm.*
295 | AutoTest.Net/
296 | 
297 | # Web workbench (sass)
298 | .sass-cache/
299 | 
300 | # Installshield output folder
301 | [Ee]xpress/
302 | 
303 | # DocProject is a documentation generator add-in
304 | DocProject/buildhelp/
305 | DocProject/Help/*.HxT
306 | DocProject/Help/*.HxC
307 | DocProject/Help/*.hhc
308 | DocProject/Help/*.hhk
309 | DocProject/Help/*.hhp
310 | DocProject/Help/Html2
311 | DocProject/Help/html
312 | 
313 | # Click-Once directory
314 | publish/
315 | 
316 | # Publish Web Output
317 | *.[Pp]ublish.xml
318 | *.azurePubxml
319 | # Note: Comment the next line if you want to checkin your web deploy settings,
320 | # but database connection strings (with potential passwords) will be unencrypted
321 | *.pubxml
322 | *.publishproj
323 | 
324 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
325 | # checkin your Azure Web App publish settings, but sensitive information contained
326 | # in these scripts will be unencrypted
327 | PublishScripts/
328 | 
329 | # NuGet Packages
330 | *.nupkg
331 | # The packages folder can be ignored because of Package Restore
332 | **/[Pp]ackages/*
333 | # except build/, which is used as an MSBuild target.
334 | !**/[Pp]ackages/build/
335 | # Uncomment if necessary however generally it will be regenerated when needed
336 | #!**/[Pp]ackages/repositories.config
337 | # NuGet v3's project.json files produces more ignorable files
338 | *.nuget.props
339 | *.nuget.targets
340 | 
341 | # Microsoft Azure Build Output
342 | csx/
343 | *.build.csdef
344 | 
345 | # Microsoft Azure Emulator
346 | ecf/
347 | rcf/
348 | 
349 | # Windows Store app package directories and files
350 | AppPackages/
351 | BundleArtifacts/
352 | Package.StoreAssociation.xml
353 | _pkginfo.txt
354 | *.appx
355 | 
356 | # Visual Studio cache files
357 | # files ending in .cache can be ignored
358 | *.[Cc]ache
359 | # but keep track of directories ending in .cache
360 | !*.[Cc]ache/
361 | 
362 | # Others
363 | ClientBin/
364 | ~$*
365 | *~
366 | *.dbmdl
367 | *.dbproj.schemaview
368 | *.jfm
369 | *.pfx
370 | *.publishsettings
371 | orleans.codegen.cs
372 | 
373 | # Since there are multiple workflows, uncomment next line to ignore bower_components
374 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
375 | #bower_components/
376 | 
377 | # RIA/Silverlight projects
378 | Generated_Code/
379 | 
380 | # Backup & report files from converting an old project file
381 | # to a newer Visual Studio version. Backup files are not needed,
382 | # because we have git ;-)
383 | _UpgradeReport_Files/
384 | Backup*/
385 | UpgradeLog*.XML
386 | UpgradeLog*.htm
387 | 
388 | # SQL Server files
389 | *.mdf
390 | *.ldf
391 | *.ndf
392 | 
393 | # Business Intelligence projects
394 | *.rdl.data
395 | *.bim.layout
396 | *.bim_*.settings
397 | 
398 | # Microsoft Fakes
399 | FakesAssemblies/
400 | 
401 | # GhostDoc plugin setting file
402 | *.GhostDoc.xml
403 | 
404 | # Node.js Tools for Visual Studio
405 | .ntvs_analysis.dat
406 | node_modules/
407 | 
408 | # Typescript v1 declaration files
409 | typings/
410 | 
411 | # Visual Studio 6 build log
412 | *.plg
413 | 
414 | # Visual Studio 6 workspace options file
415 | *.opt
416 | 
417 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
418 | *.vbw
419 | 
420 | # Visual Studio LightSwitch build output
421 | **/*.HTMLClient/GeneratedArtifacts
422 | **/*.DesktopClient/GeneratedArtifacts
423 | **/*.DesktopClient/ModelManifest.xml
424 | **/*.Server/GeneratedArtifacts
425 | **/*.Server/ModelManifest.xml
426 | _Pvt_Extensions
427 | 
428 | # Paket dependency manager
429 | .paket/paket.exe
430 | paket-files/
431 | 
432 | # FAKE - F# Make
433 | .fake/
434 | 
435 | # JetBrains Rider
436 | .idea/
437 | *.sln.iml
438 | 
439 | # CodeRush
440 | .cr/
441 | 
442 | # Python Tools for Visual Studio (PTVS)
443 | __pycache__/
444 | *.pyc
445 | 
446 | # Cake - Uncomment if you are using it
447 | # tools/**
448 | # !tools/packages.config
449 | 
450 | # Tabs Studio
451 | *.tss
452 | 
453 | # Telerik's JustMock configuration file
454 | *.jmconfig
455 | 
456 | # BizTalk build output
457 | *.btp.cs
458 | *.btm.cs
459 | *.odx.cs
460 | *.xsd.cs
461 | 
462 | # OpenCover UI analysis results
463 | OpenCover/
464 | junit/
465 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/README.md:
--------------------------------------------------------------------------------
  1 | {% if cookiecutter.devops_organisation != '' %}
  2 | [![Build Status](https://dev.azure.com/{{cookiecutter.devops_organisation}}/{{cookiecutter.repo_name}}/_apis/build/status/equinor.{{cookiecutter.repo_name}}?branchName=master)](https://dev.azure.com/{{cookiecutter.devops_organisation}}/{{cookiecutter.repo_name}}/_build/latest?definitionId=1&branchName=master)
  3 | {% endif %}
  4 | 
  5 | # {{cookiecutter.project_name}}
  6 | 
  7 | {{cookiecutter.project_description}}
  8 | 
  9 | ## Setup
 10 | 1. Install git and checkout the [git code repository]
 11 | 2. Install [anaconda] python version 3.6+
 12 | 3. Change working directory into the git code repository root
 13 | 4. Create the self contained conda environment. In a terminal go to the git code repository root and enter the command:
 14 | 
 15 |    `conda env create --file conda_env.yml`
 16 | 
 17 | 5. Any python modules under src need to be available to other scripts. This can be done in a couple of ways. You can 
 18 | setup and install the python modules by executing the setup.py command below which will install the packages to the 
 19 | conda environments site-packages folder but with a symlink to the src folder so modifications are reflected immediately. 
 20 | 
 21 |    `python setup.py develop`
 22 |    
 23 |     As an alternative you may prefer to set the python path directly from the console, within notebooks, test scripts 
 24 |     etc. From Pycharm you can also right click the src folder and select the _Mark Directory As | Source Root_ option.
 25 | 
 26 | 6. .. Place your own project specific setup steps here e.g. copying data files ...
 27 | 
 28 | When distributing your module, you can create a Python egg with the command `python setup.py bdist_egg` and upload the egg.
 29 | 
 30 | NOTE: When working in the project notebooks from within the Equinor network, you may need to include the lines below if your proxy is not otherwise setup.
 31 | 
 32 | `os.environ['HTTP_PROXY']="http://www-proxy.statoil.no:80"`<br />
 33 | `os.environ['HTTPS_PROXY']="http://www-proxy.statoil.no:80"`
 34 | 
 35 | ## Using the Python Conda environment
 36 | 
 37 | Once the Python Conda environment has been set up, you can
 38 | 
 39 | * Activate the environment using the following command in a terminal window:
 40 | 
 41 |   * Windows: `activate {{cookiecutter.conda_name}}`
 42 |   * Linux, OS X: `source activate {{cookiecutter.conda_name}}`
 43 |   * The __environment is activated per terminal session__, so you must activate it every time you open terminal.
 44 | 
 45 | * Deactivate the environment using the following command in a terminal window:
 46 | 
 47 |   * Windows: `deactivate {{cookiecutter.conda_name}}`
 48 |   * Linux, OS X: `source deactivate {{cookiecutter.conda_name}}`
 49 |                
 50 | * Delete the environment using the command (can't be undone):
 51 | 
 52 |   * `conda remove --name {{cookiecutter.conda_name}} --all`
 53 | 
 54 | ## Initial File Structure
 55 | 
 56 | ```
 57 | ├── .gitignore               <- Files that should be ignored by git. Add seperate .gitignore files in sub folders if 
 58 | │                               needed
 59 | ├── conda_env.yml            <- Conda environment definition for ensuring consistent setup across environments
 60 | ├── LICENSE
 61 | ├── README.md                <- The top-level README for developers using this project.
 62 | ├── requirements.txt         <- The requirements file for reproducing the analysis environment, e.g.
 63 | │                               generated with `pip freeze > requirements.txt`. Might not be needed if using conda.
 64 | ├── setup.py                 <- Metadata about your project for easy distribution.
 65 | │
 66 | ├── data
 67 | │   ├── interim_[desc]       <- Interim files - give these folders whatever name makes sense.
 68 | │   ├── processed            <- The final, canonical data sets for modeling.
 69 | │   ├── raw                  <- The original, immutable data dump.
 70 | │   ├── temp                 <- Temporary files.
 71 | │   └── training             <- Files relating to the training process
 72 | │
 73 | ├── docs                     <- Documentation
 74 | │   ├── data_science_code_of_conduct.md  <- Code of conduct.
 75 | │   ├── process_documentation.md         <- Standard template for documenting process and decisions.
 76 | │   └── writeup              <- Sphinx project for project writeup including auto generated API.
 77 | │      ├── conf.py           <- Sphinx configurtation file.
 78 | │      ├── index.rst         <- Start page.
 79 | │      ├── make.bat          <- For generating documentation (Windows)
 80 | │      └── Makefikle         <- For generating documentation (make)
 81 | │
 82 | ├── examples                 <- Add folders as needed e.g. examples, eda, use case
 83 | │
 84 | ├── extras                   <- Miscellaneous extras.
 85 | │   └── add_explorer_context_shortcuts.reg    <- Adds additional Windows Explorer context menus for starting jupyter.
 86 | │
 87 | ├── notebooks                <- Notebooks for analysis and testing
 88 | │   ├── eda                  <- Notebooks for EDA
 89 | │   │   └── example.ipynb    <- Example python notebook
 90 | │   ├── features             <- Notebooks for generating and analysing features (1 per feature)
 91 | │   ├── modelling            <- Notebooks for modelling
 92 | │   └── preprocessing        <- Notebooks for Preprocessing 
 93 | │
 94 | ├── scripts                  <- Standalone scripts
 95 | │   ├── deploy               <- MLOps scripts for deployment (WIP)
 96 | │   │   └── score.py         <- Scoring script
 97 | │   ├── train                <- MLOps scripts for training
 98 | │   │   ├── submit-train.py  <- Script for submitting a training run to Azure ML Service
 99 | │   │   ├── submit-train-local.py <- Script for local training using Azure ML
100 | │   │   └── train.py         <- Example training script using the iris dataset
101 | │   ├── example.py           <- Example sctipt
102 | │   └── MLOps.ipynb          <- End to end MLOps example (To be refactored into the above)
103 | │
104 | ├── src                      <- Code for use in this project.
105 | │   └── {{cookiecutter.package_name}}       <- Example python package - place shared code in such a package
106 | │       ├── __init__.py      <- Python package initialisation
107 | │       ├── examplemodule.py <- Example module with functions and naming / commenting best practices
108 | │       ├── features.py      <- Feature engineering functionality
109 | │       ├── io.py            <- IO functionality
110 | │       └── pipeline.py      <- Pipeline functionality
111 | │
112 | └── tests                    <- Test cases (named after module)
113 |     ├── test_notebook.py     <- Example testing that Jupyter notebooks run without errors
114 |     └── {{cookiecutter.package_name}}       <- {{cookiecutter.package_name}} tests
115 |         ├── examplemodule    <- examplemodule tests (1 file per method tested)
116 |         ├── features         <- features tests
117 |         ├── io               <- io tests
118 |         └── pipeline         <- pipeline tests
119 | ```
120 | 
121 | ## MLOps
122 | Starter scripts for MLOps with Azure ML Service are included as a part of this template in the scripts folder and may be
123 | customised for your own purposes. Please browse the contents of the scripts folder for more details.
124 | 
125 | For model training, the provided setup allows for running locally without any dependency on Azure ML by running train.py
126 | in the scripts/train folder directly. Alternatively you can submit local or remote runs using the submit scripts in the 
127 | same folder.
128 | 
129 | ## Testing
130 | Reproducability and the correct functioning of code are essential to avoid wasted time. If a code block is copied more 
131 | than once then it should be placed into a common script / module under src and unit tests added. The same applies for 
132 | any other non trivial code to ensure the correct functioning.
133 | 
134 | To run tests, install pytest using pip or conda (should have been setup already if you used the conda_env.yml file) and 
135 | then from the repository root run
136 |  
137 | ```
138 | pytest
139 | ```
140 | 
141 | ## Automated Document Generation
142 | A [sphinx](https://www.sphinx-doc.org/) project is provided under docs/writeup that will generate writeup that
143 | also includes automatically generated API information for any packages. THe output can be created in multiple
144 | formats including html and pdf. If you are using CI then this can be run automatically. To run 
145 | locally execute the following commands:
146 |  
147 | ```
148 | cd docs/writeup
149 | make html
150 | ```
151 | 
152 | On Windows this will run the make.bat, a Makefile is also included for those using the 'make' command.
153 | 
154 | ## Development Process
155 | Contributions to this template are greatly appreciated and encouraged.
156 | 
157 | To contribute an update simply:
158 | * Create a new branch / fork for your updates.
159 | * Check that your code follows the PEP8 guidelines (line lengths up to 120 are ok) and other general conventions within this document.
160 | * Ensure that as far as possible there are unit tests covering the functionality of any new code.
161 | * Check that all existing unit tests still pass.
162 | * Edit this document if needed to describe new files or other important information.
163 | * Create a pull request.
164 | 
165 | ## Important Links
166 | * https://wiki.equinor.com/wiki/index.php/Statoil_Data_Science_Technical_Standards - Data Science Technical Standards (Equinor Internal)
167 | * https://dataplatformwiki.azurewebsites.net/doku.php - Data Platform wiki (Equinor internal)
168 | * https://github.com/equinor/data-science-shared - Shared Data Science Code Repository (Equinor internal)
169 | 
170 | ## References
171 | * https://github.com/equinor/data-science-template/ - The master template for this project
172 | * http://docs.python-guide.org/en/latest/writing/structure/
173 | * https://github.com/Azure/Microsoft-TDSP
174 | * https://drivendata.github.io/cookiecutter-data-science/
175 | 
176 | [//]: #
177 |    [anaconda]: <https://www.continuum.io/downloads>
178 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | > [!WARNING]  
  2 | > This repository is no longer maintained and as it has diverged from changes to Azure ML is now archived.
  3 | 
  4 | [![Build Status](https://dev.azure.com/mhew/data-science-template/_apis/build/status/data-science-template?branchName=master)](https://dev.azure.com/mhew/data-science-template/_build/latest?definitionId=15&branchName=master)
  5 | 
  6 | # Data Science Template
  7 | This is a starter template for data science projects in Equinor, although it may also be useful for others. It contains many of the essential artifacts that you will need and presents a number of best practices including code setup, samples, MLOps using Azure, a standard document to guide and gather information relating to the data science process and more. 
  8 | 
  9 | As it is impossible to create a single template that will meet every projects needs, this example should be considered
 10 | a starting point and changed based upon the working and evolution of your project.
 11 | 
 12 | Before working with the contents of this template or Data Science projects in general it is recommended to familiarise yourself with the Equinor [Data Science Technical Standards](https://wiki.statoil.no/wiki/index.php/Statoil_Data_Science_Technical_Standards) (Currently Equinor internal only)
 13 | 
 14 | ## Getting Started With This Template
 15 | This template is provided as a [Cookiecutter template](http://cookiecutter.readthedocs.org/en/latest/installation.html) so you
 16 | can quickly create an instance customised for your project. An assumption is that you have a working python installation.
 17 | 
 18 | To get running, first install the latest Cookiecutter if you haven't installed it yet (this requires
 19 | Cookiecutter 1.4.0 or higher):
 20 | 
 21 |     pip install -U cookiecutter
 22 | 
 23 | ### Create project
 24 | Then generate a new project for your own use based upon the template, answering the questions to customise the generated 
 25 | project:
 26 | 
 27 |     cookiecutter https://github.com/equinor/data-science-template.git
 28 |     
 29 | The values you are prompted for are:
 30 | 
 31 | | Value                   | Description |
 32 | | :---                    | --- |
 33 | | project_name            | A name for your project. Used mostly within documentation | 
 34 | | project_description     | A description to include in the README.md | 
 35 | | repo_name               | The name of the github repository where the project will be held |
 36 | | conda_name              | The name of the conda environment to use |
 37 | | package_name            | A name for the generated python package. | 
 38 | | mlops_name              | Default name for Azure ML. | 
 39 | | mlops_compute_name      | Default Azure ML compute cluster name to use. | 
 40 | | author                  | The main author of the solution. Included in the setup.py file | 
 41 | | open_source_license     | What type of open source license the project will be released under | 
 42 | | devops_organisation     | An Azure DevOps organisation. Leave blank if you aren't using Azure DevOps | 
 43 | 
 44 | If you are uncertain about what to enter for any value then just accept the defaults. You can always change the generated project later.
 45 | 
 46 | *Getting problems? You can always download this repository using the download button above and reference the local copy e.g. cookiecutter c:\Downloads\data-science-template, however ideally fix any git proxy or other issues that are causing problems.*
 47 | 
 48 | You are now ready to get started, however you should first create a new github repository for your new project and add your 
 49 | project using the following commands (substitute myproject with the name of your project and REMOTE-REPOSITORY-URL 
 50 | with the remote repository url).
 51 | 
 52 |     cd myproject
 53 |     git init
 54 |     git add .
 55 |     git commit -m "Initial commit"
 56 |     git remote add origin REMOTE-REPOSITORY-URL
 57 |     git remote -v
 58 |     git push origin master
 59 | 
 60 | ### Continuous Integration
 61 | Continuous Integration (CI) increase quality by building, running tests and performing other validation whenever 
 62 | code is committed. The template contains a build pipeline for Azure DevOps, however requires a couple of manual
 63 | steps to setup:
 64 | 
 65 | * Log in to http://dev.azure.com and browse to, or create an organisation & project. The project name should be the same as your github repository name.
 66 | * Under *Pipelines -> Builds select* *New Pipeline*
 67 | * Select github and then your repository. Login / grant any permissions as prompted
 68 | * In the review pane click *run*
 69 | 
 70 | You are now setup for CI and automated test / building. You should verify the badge link in this README corresponds 
 71 | with your DevOps project, and as a further step might setup any release pipelines for automated deployment.
 72 | 
 73 | At this stage the build pipeline doesn't include MLOps steps, although these can be added based uon your needs.
 74 | 
 75 | ### Finally
 76 | 
 77 | * Update the project readme file with additional project specific details including setup, configuration and usage. 
 78 | * The docs\process_documentation.md file should be completed phase by phase, and each phase result shall be submitted for review and approval before the project moves on to the next phase. This is to assist with the gathering of essential information required to deliver a correct and robust solution. The git respoitory shall be added to the script that populates the [knowledge repository](https://git.statoil.no/DataScience/projects) to ease future knowledge sharing.
 79 | 
 80 | ## Generated Project Contents
 81 | Depending upon the selected options when creating the project, the generated structure will look similar to the below:
 82 | 
 83 | ```
 84 | ├── .gitignore               <- Files that should be ignored by git. Add seperate .gitignore files in sub folders if 
 85 | │                               needed
 86 | ├── conda_env.yml            <- Conda environment definition for ensuring consistent setup across environments
 87 | ├── LICENSE
 88 | ├── README.md                <- The top-level README for developers using this project.
 89 | ├── requirements.txt         <- The requirements file for reproducing the analysis environment, e.g.
 90 | │                               generated with `pip freeze > requirements.txt`. Might not be needed if using conda.
 91 | ├── setup.py                 <- Metadata about your project for easy distribution.
 92 | │
 93 | ├── data
 94 | │   ├── interim_[desc]       <- Interim files - give these folders whatever name makes sense.
 95 | │   ├── processed            <- The final, canonical data sets for modeling.
 96 | │   ├── raw                  <- The original, immutable data dump.
 97 | │   ├── temp                 <- Temporary files.
 98 | │   └── training             <- Files relating to the training process
 99 | │
100 | ├── docs                     <- Documentation
101 | │   ├── data_science_code_of_conduct.md  <- Code of conduct.
102 | │   ├── process_documentation.md         <- Standard template for documenting process and decisions.
103 | │   └── writeup              <- Sphinx project for project writeup including auto generated API.
104 | │      ├── conf.py           <- Sphinx configurtation file.
105 | │      ├── index.rst         <- Start page.
106 | │      ├── make.bat          <- For generating documentation (Windows)
107 | │      └── Makefikle         <- For generating documentation (make)
108 | │
109 | ├── examples                 <- Add folders as needed e.g. examples, eda, use case
110 | │
111 | ├── extras                   <- Miscellaneous extras.
112 | │   └── add_explorer_context_shortcuts.reg    <- Adds additional Windows Explorer context menus for starting jupyter.
113 | │
114 | ├── notebooks                <- Notebooks for analysis and testing
115 | │   ├── eda                  <- Notebooks for EDA
116 | │   │   └── example.ipynb    <- Example python notebook
117 | │   ├── features             <- Notebooks for generating and analysing features (1 per feature)
118 | │   ├── modelling            <- Notebooks for modelling
119 | │   └── preprocessing        <- Notebooks for Preprocessing 
120 | │
121 | ├── scripts                  <- Standalone scripts
122 | │   ├── deploy               <- MLOps scripts for deployment (WIP)
123 | │   │   └── score.py         <- Scoring script
124 | │   ├── train                <- MLOps scripts for training
125 | │   │   ├── submit-train.py  <- Script for submitting a training run to Azure ML Service
126 | │   │   ├── submit-train-local.py <- Script for local training using Azure ML
127 | │   │   └── train.py         <- Example training script using the iris dataset
128 | │   ├── example.py           <- Example sctipt
129 | │   └── MLOps.ipynb          <- End to end MLOps example (To be refactored into the above)
130 | │
131 | ├── src                      <- Code for use in this project.
132 | │   └── examplepackage       <- Example python package - place shared code in such a package
133 | │       ├── __init__.py      <- Python package initialisation
134 | │       ├── examplemodule.py <- Example module with functions and naming / commenting best practices
135 | │       ├── features.py      <- Feature engineering functionality
136 | │       ├── io.py            <- IO functionality
137 | │       └── pipeline.py      <- Pipeline functionality
138 | │
139 | └── tests                    <- Test cases (named after module)
140 |     ├── test_notebook.py     <- Example testing that Jupyter notebooks run without errors
141 |     ├── examplepackage       <- examplepackage tests
142 |         ├── examplemodule    <- examplemodule tests (1 file per method tested)
143 |         ├── features         <- features tests
144 |         ├── io               <- io tests
145 |         └── pipeline         <- pipeline tests
146 | ```
147 | 
148 | ## Contributing to This Template
149 | Contributions to this template are greatly appreciated and encouraged.
150 | 
151 | To contribute an update simply:
152 | * Submit an issue describing your proposed change to the repo in question.
153 | * The repo owner will respond to your issue promptly.
154 | * Fork the desired repo, develop and test your code changes.
155 | * Check that your code follows the PEP8 guidelines (line lengths up to 120 are ok) and other general conventions within this document.
156 | * Ensure that your code adheres to the existing style. Refer to the
157 |    [Google Cloud Platform Samples Style Guide](
158 |    https://github.com/GoogleCloudPlatform/Template/wiki/style.html) for the
159 |    recommended coding standards for this organization.
160 | * Ensure that as far as possible there are unit tests covering the functionality of any new code.
161 | * Check that all existing unit tests still pass.
162 | * Edit this document and the template README.md if needed to describe new files or other important information.
163 | * Submit a pull request.
164 | 
165 | 
166 | ### Template development environment
167 | To develop this template further you might want to setup a virtual environment
168 | 
169 | #### Setup using
170 | ```
171 | cd data-science-template
172 | python -m venv dst-env
173 | ```
174 | 
175 | #### Activate environment
176 | Max / Linux
177 | ```
178 | source dst-env/bin/activate
179 | ```
180 | 
181 | Windows
182 | ```
183 | dst-env\Scripts\activate
184 | ```
185 | 
186 | #### Install Dependencies
187 | ```
188 | pip install -r requirements.txt
189 | ```
190 | 
191 |     
192 | #### Testing
193 | To run the template tests, install pytest using pip or conda and then from the repository root run
194 |  
195 |     pytest tests
196 | 
197 | #### Linting
198 | To verify that your code adheres to python standards run linting as shown below:
199 | 
200 |     flake8 --max-line-length=120 *.py hooks/ tests/
201 | 
202 | ## Important Links
203 | * https://wiki.statoil.no/wiki/index.php/Statoil_Data_Science_Technical_Standards - Data Science Technical Standards (Equinor Internal)
204 | * https://dataplatformwiki.azurewebsites.net/doku.php - Data Platform wiki (Equinor internal)
205 | * https://github.com/Statoil/data-science-shared - Shared Data Science Code Repository (Equinor internal)
206 | 
207 | ## References
208 | * https://github.com/Statoil/data-science-template/ - The master template for this project
209 | * http://docs.python-guide.org/en/latest/writing/structure/
210 | * https://github.com/Azure/Microsoft-TDSP
211 | * https://drivendata.github.io/cookiecutter-data-science/
212 | * https://github.com/audreyr/cookiecutter-pypackage
213 | 
214 | [//]: #
215 |    [anaconda]: <https://www.continuum.io/downloads>
216 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/docs/process_documentation.md:
--------------------------------------------------------------------------------
  1 | # Data Science Process
  2 | 
  3 | ## Introduction
  4 | This DS process standard takes the form of a template. Each DS project shall complete this template to make sure that DS project in Statoil can deliver value to business to the maximal extent.
  5 |  
  6 | The document is structured into 6 sections according to the 6 DS phases: 
  7 | * Business understanding and problem definition
  8 | * Project planning
  9 | * Collecting and preparing data
 10 | * Modeling
 11 | * Evaluation
 12 | * Deployment and monitoring
 13 | 
 14 | For each phase, a set of tasks are identified. A group of questions are listed under each task. These questions are designed to guide DS teams to carry out DS projects in a standard way.
 15 | 
 16 | The DS team shall complete all the questions as a joint effort of the whole team. The answers for each phase shall be reviewed/approved by QA team (for example, DS VP, PM, DS discipline advisor, Peers, Stakeholders) before the project moves to the next phase (Table 1. shall contain the review/approval records for each phase). The questions answered in previous iterations shall be updated if needed, and the updated document shall be reviewed and approved. 
 17 | 
 18 | DS project leader (DS team responsible) owns (is responsible for completing) this document. 
 19 | 
 20 | Figure 1. shows the adopted data science process phases, and the document/quality control process is plotted in the middle. A more detailed document/quality control flowchart is illustrated in Figure 2. 
 21 | 
 22 | The document shall be added to the script that populates the [knowledge repository](https://git.statoil.no/DataScience/projects) to ease future knowledge sharing.
 23 | 
 24 | * * *
 25 | **Table 1.** Data Science phase review/approval table
 26 | 
 27 | <img src="resources/ReviewTable.jpg" align="middle" width="800">
 28 | 
 29 | * * *
 30 | 
 31 | **Figure 1.** Data Science process diagram
 32 | 
 33 | <img src="resources/DS_ProcessFlow.jpg" align="middle" width="600">
 34 | 
 35 | _ _ _
 36 | 
 37 | **Figure 2.** Data Science process FlowChart
 38 | 
 39 | <img src="resources/DS_FlowChart.jpg" align="middle" width="800">
 40 | 
 41 | _ _ _
 42 | 
 43 | ## 1 Business Understanding and Problem Definition
 44 | 
 45 | ### 1.1 Connecting business
 46 | #####   1.1.1 Who is the sponsor?
 47 | ##### 	1.1.2 What is the business organization chart?
 48 | ##### 	1.1.3	What is the business goal/objective?
 49 | ##### 	1.1.4	What is the business strategy?
 50 | ##### 	1.1.5	What is the business process?
 51 | ##### 	1.1.6	How is the business efficiency measured?
 52 | ##### 	1.1.7	Who is the main contact person?
 53 | ##### 	1.1.8	Who is the SME?
 54 | ##### 	1.1.9	Who is the end user?
 55 | 
 56 | ### 	1.2	Understanding business challenge
 57 | ##### 	1.2.1	What is the business challenge/pain point/bottleneck?
 58 | ##### 	1.2.2	What are the identified use cases?
 59 | ##### 	1.2.3	Has the discovery workshop been executed, and what is the result?
 60 | ##### 	1.2.4	What is the current solution / how might a theoretical human “expert” perform the task today?
 61 | ##### 	1.2.5	If your human expert were to perform this task, how would you respond to them so they improved for the next time? For classification, do this for all four phases of a typical confusion matrix (e.g. true / false)
 62 | ##### 	1.2.6	If a human were to perform this task, what assumptions would the user want them to make?
 63 | ##### 	1.2.7	What are the constraints?
 64 | 
 65 | 
 66 | ###	1.3	Identifying requirements
 67 | ##### 	1.3.1	What are the functional requirements?
 68 | ##### 	1.3.2	What are the identified end user UX requirements?
 69 | ##### 	1.3.3	How should the potential solution fit into the business process?
 70 | ##### 	1.3.4	What is the solution delivery deadline?
 71 | 
 72 | ###	1.4	Discovering data
 73 | ##### 	1.4.1	What data do you need ideally to solve the problem?
 74 | ##### 	1.4.2	What data is available?
 75 | ##### 	1.4.3	Have you use Statoil data catalog for data discovering [link to data catalog](https://eun-su1.azuredatacatalog.com/#/home)?
 76 | ##### 	1.4.4	What is the data format e.g. structured in Database, free text or image?
 77 | ##### 	1.4.5	Who owns the data?
 78 | ##### 	1.4.6	How is the data accessed?
 79 | ##### 	1.4.7	Who is the contact person to access the data?
 80 | ##### 	1.4.8	What are the risks for accessing, understanding, analyzing the data?
 81 | ##### 	1.4.9	What is the effort estimation for data collecting and preparing for each data source?
 82 | ##### 	1.4.10	What are the data governance issues?
 83 | ##### 	1.4.11	How is each use case/process/entity covered by the discovered data?
 84 | 
 85 | ###	1.5	Identifying DS opportunity
 86 | ##### 	1.5.1	What is the problem type from DS perspective, e.g. regression, classification, clustering etc.? 
 87 | ##### 	1.5.2	What is the problem-solving process from DS perspective (solution framework)?
 88 | ##### 	1.5.3	What are the previous relevant experience/components that can be reused?
 89 | ##### 	1.5.4	Has the feasibility study been done? What is the result?
 90 | ##### 	1.5.5	What is the business value the DS solution can bring?
 91 | ##### 	1.5.6	What is the consequence of a potential DS solution error? How can we control it?
 92 | ##### 	1.5.7	How can the DS solution fit into the business process?
 93 | ##### 	1.5.8	What are the main risks to fail? How can we control them?
 94 | ##### 	1.5.9	What is the feedback from SME on the proposed DS solution?
 95 | 
 96 | ###	1.6	Setting success/stop criteria
 97 | ##### 	1.6.1	What is the evaluation strategy, objective evaluation or subjective evaluation?
 98 | ##### 	1.6.2	What is the objective success criteria (recall, precision, accuracy, etc.)?
 99 | ####	1.6.3	What is the subjective success criteria?
100 | ####	1.6.4	What is the stop criteria?
101 | ####	1.6.5	Have success/stop criteria communicated and agreed with stakeholders?
102 | ####	1.6.6	Is insufficient SME involvement part of the stop criteria? What has been agreed on with respect to the SME involvement and SME experience level?
103 | 
104 | ###	1.7	Document review and artifact archiving
105 | ####	1.7.1	Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 
106 | ####	1.7.2	What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference? 
107 | 
108 | ##	2	Project Planning
109 | 
110 | ###	2.1	Resource planning
111 | ####	2.1.1	What is the competence and resource plan?
112 | ####	2.1.2	What are the competences needed (competence matrix)?
113 | ####	2.1.3	How can team members meet the competence matrix?
114 | ####	2.1.4	Does team include the following roles: data scientist, ML engineer, and data engineer?
115 | ####	2.1.5	Are there dedicated SMEs allocated for the project?
116 | ####	2.1.6	What is the cooperation model with SME and end user?
117 | 
118 | ###	2.2	Time planning
119 | ####	2.2.1	What is the time plan?
120 | ####	2.2.2	How is the DS time plan aligned with main project plan (if relevant)?
121 | ####	2.2.3	How does DS iterations fit into the time plan?
122 | ####	2.2.4	What are the definition of project phases/steps and what are milestones for them?
123 | ####	2.2.5	What are the risks to follow the plan? How can the risks be controlled?
124 | ####	2.2.6	What is the feedback from stakeholders on time plan?
125 | 
126 | ###	2.3	Process planning
127 | ####	2.3.1	What is the scope of the DS project?
128 | ####	2.3.2	Is the DS project a standalone process or running parallel with a bigger SW implementation project where DS solution is part of?
129 | ####	2.3.3	If parallel, what are the cooperation model between the main process and the DS process?
130 | ####	2.3.4	If parallel, has DS project research/iteration feature communicated with the main project management?
131 | ####	2.3.5	Do you follow the rule: start with simple, get value into business and iteration for improvement?
132 | ####	2.3.6	How are the DS iterations planned in the project?
133 | ####	2.3.7	What is the plan to build up pipeline as early as possible to speed up the iteration?
134 | ####	2.3.8	Is Kanban agile project process considered as the first option? If not, why?
135 | ####	2.3.9	What is the feedback loop from stakeholders?
136 | ####	2.3.10	How can the results from each phase be reviewed by stakeholders?
137 | ####    2.3.11  Has an architecture contract been completed?
138 | 
139 | ###	2.4	DS tools planning
140 | ####	2.4.1	What tools/platforms/systems are planned to be used in the DS project?
141 | ####	2.4.2	Are there experience/competence gap to use these tools? If yes, what is the plan to close the gap?
142 | ####	2.4.3	Are all the tools standard tools according to the DCOE DS tech standard? If not, why? ([link to the standard wiki page](https://wiki.statoil.no/wiki/index.php/Statoil_Data_Science_Technical_Standards))
143 | ####	2.4.4	What is the plan for code/document review, artifact archiving, and knowledge sharing?
144 | 
145 | ###	2.5	Document review and artifact archiving
146 | ####	2.5.1	Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 
147 | ####	2.5.2	What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference?
148 | 
149 | ##	3	Data collecting and preparing
150 | 
151 | ###	3.1	Collecting data
152 | ####	3.1.1	What are the rules to select relevant and irrelevant data?
153 | ####	3.1.2	What is the frequency/granularity the data is collected? Is it enough for the target problem?
154 | ####	3.1.3	Is the raw data kept untouched after collection?
155 | ####	3.1.4	Is the data to be stored in the data platform? If not, why?
156 | ####	3.1.5	Is open data format used to store the data, for example, txt, json, csv? If not, why?
157 | #### 3.1.6 Is an automated pipeline set up for processing new data? If not, why?
158 | ####	3.1.7	Document the data collection process?
159 | 
160 | ###	3.2	Exploring data
161 | ####	3.2.1	What is the structure of the data?
162 | ####	3.2.2	What are the relationships between data items?
163 | ####	3.2.3	How are the data from different source mapped together?
164 | ####	3.2.4	What tools, statistical methods, visualization tools have been used to explore the data?
165 | ####	3.2.5	What is the data quality: completeness, consistency, validity, and accuracy?
166 | ####	3.2.6	What are the other issues with data quality?
167 | ####	3.2.7	How should duplicated data be filtered or removed?
168 | ####	3.2.8	Are there outliers in the data?
169 | ####	3.2.9	What patterns have you found in the data?
170 | 
171 | ###	3.3	Understanding data from domain
172 | ####	3.3.1	How was the data generated?
173 | ####	3.3.2	How was the data sampled/transferred?
174 | ####	3.3.3	What is the meaning of each data item from business perspective?
175 | ####	3.3.4	What is the relationship between each data item and the target output?
176 | ####	3.3.5	Are there unstable data period, how to identify and remove?
177 | ####	3.3.6	Are all the identified outliers noise?
178 | ####	3.3.7	What is the valid data range for each data item?
179 | 
180 | ###	3.4	Preparing data
181 | ####	3.4.1	What is the plan to prepare the data?
182 | ####	3.4.2	What is the feedback from SME on the data preparation plan?
183 | ####	3.4.3	How is missing data filled/removed?
184 | ####	3.4.4	How is noise data removed/replaced?
185 | ####	3.4.5	How is the overlapped data combined/filtered/removed?
186 | ####	3.4.6	How the data is transformed?
187 | ####	3.4.7	How are training, validation and test dataset split?
188 | 
189 | ###	3.5	Feature engineering
190 | ####	3.5.1	Is ML method used to reduce the complexity of the input feature space, for example, PCA or autoencoder?
191 | ####	3.5.2	What are the useful domain characteristics that are not represented in the dataset?
192 | ####	3.5.3	What features can be identified/created to represent the identified missing characters?
193 | 
194 | ###	3.6	Document review and artifact archiving
195 | ####	3.6.1	Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 
196 | ####	3.6.2	What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference?
197 | 
198 | ##	4	Modeling
199 | 
200 | ###	4.1	Selecting model
201 | ####	4.1.1	What DS models have been considered, and what are the ones chosen for further evaluation?
202 | ####	4.1.2	What are the advantages and disadvantages of the chosen models?
203 | ####	4.1.3	Is the simplest model chosen as benchmark in the first iteration?
204 | ####	4.1.4	What is the criteria to compare candidate models?
205 | 
206 | ###	4.2	Building model
207 | ####	4.2.1	What are the hyper-parameters for the selected models?
208 | ####	4.2.2	What are the processes to optimize the hyper-parameters?
209 | ####	4.2.3	Has the data been normalized? Give explanation.
210 | ####	4.2.4	What is the time used to train the model? Is it acceptable for offline or online training?
211 | 
212 | ###	4.3	Testing model
213 | ####	4.3.1	Is separate test dataset used to test models?
214 | ####	4.3.2	Are success criteria met by models? 
215 | ####	4.3.3	How easily can the end user use/understand model output?
216 | ####	4.3.4	Is model performance part of the criteria?
217 | ####	4.3.5	What is the subjective evaluation result?
218 | ####	4.3.6	If more than one models meet the criteria, how should one/ones have chosen from them? 
219 | 
220 | ###	4.4	Document review and artifact archiving
221 | ####	4.4.1	Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 
222 | ####	4.4.2	What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference?
223 | 
224 | ##	5	Evaluation
225 | 
226 | ###	5.1	Technical evaluation
227 | ####	5.1.1	Have predefined success/acceptance criteria been met by the chosen model?
228 | ####	5.1.2	Have all the identified requirements been met? 
229 | ####	5.1.3	Have all the identified use cases been covered?
230 | ####	5.1.4	What are the preconditions and limitations of the chosen model? 
231 | ####	5.1.5	How easily can the model be integrated into the work process?
232 | ####	5.1.6	What are the end user competence needed to use the model? Is this competence requirement acceptable by the end user organization?
233 | ####	5.1.7	Has the business changed so that the solution cannot be applied anymore?
234 | 
235 | ###	5.2	Process evaluation
236 | ####	5.2.1	Has the planned artifact peer review been executed as planed?
237 | ####	5.2.2	Have the communication channels with stakeholders worked as expected?
238 | ####	5.2.3	Has the time plan been met? If not, why?
239 | ####	5.2.4	Have all the relevant documents been in place and approved?
240 | ####	5.2.5	What is the go/no-go decision and reason behind it?
241 | ####	5.2.6	What is the feedback from SME or end user on the evaluation result?
242 | 
243 | ###	5.3	Document review and artifact archiving
244 | ####	5.3.1	Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 
245 | ####	5.3.2	What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference?
246 | 
247 | ##	6	Deployment and Monitoring
248 | 
249 | ###	6.1	Deploying model
250 | ####	6.1.1	How is the chosen model implemented/deployed?
251 | ####	6.1.2	How is the result to be presented to the end user? Is there a graphic way to do it?
252 | ####	6.1.3	If deployed as SW, does the Statoil architecture contract been met?
253 | ####	6.1.4	Which TRL (Technology readiness level) level is the SW classified as?
254 | ####	6.1.5	How are the results to be interpreted or utilized?
255 | ####	6.1.6	Are there IP governance issues?
256 | 
257 | ###	6.2	Managing the process change
258 | ####	6.2.1	What are the changes the new DS solution brings to the business process?
259 | ####	6.2.2	What are the efforts/process to adopt the new solution by the management?
260 | ####	6.2.3	What effort is used to train the end user to use the new solution?
261 | 
262 | ###	6.3	Monitoring and maintaining DS model
263 | ####	6.3.1	What is the operation/maintenance plan for the DS solution?
264 | ####	6.3.2	How often shall the model be re-trained and re-deployed? Automatically or manually?
265 | ####	6.3.3	What is the usage monitoring plan for the DS solution?
266 | ####	6.3.4	Are the DS solution constraints/limitations under monitoring?
267 | ####	6.3.5	What is the user feedback loop?
268 | 
269 | ###	6.4	Knowledge sharing
270 | ####	6.4.1	What can be learned from the project process?
271 | ####	6.4.2	How can the experience be shared with others and retrieved for future reference?
272 | ####	6.4.3	What DS components (knowledge, product, process and data) can we reuse or share?
273 | ####	6.4.4	What improvement suggestions do you have for this DS standard template?
274 | 
275 | ###	6.5	Document review
276 | ####	6.5.1	Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase)
277 | 


--------------------------------------------------------------------------------
/{{cookiecutter.repo_name}}/LICENSE:
--------------------------------------------------------------------------------
  1 | {% if cookiecutter.open_source_license == 'MIT' %}
  2 | The MIT License (MIT)
  3 | Copyright (c) {% now 'utc', '%Y' %}, {{ cookiecutter.author }}
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | 
  7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  8 | 
  9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 10 | 
 11 | {% elif cookiecutter.open_source_license == 'LGPL3' %}
 12 |                    GNU LESSER GENERAL PUBLIC LICENSE
 13 |                        Version 3, 29 June 2007
 14 | 
 15 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 16 |  Everyone is permitted to copy and distribute verbatim copies
 17 |  of this license document, but changing it is not allowed.
 18 | 
 19 | 
 20 |   This version of the GNU Lesser General Public License incorporates
 21 | the terms and conditions of version 3 of the GNU General Public
 22 | License, supplemented by the additional permissions listed below.
 23 | 
 24 |   0. Additional Definitions.
 25 | 
 26 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 27 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 28 | General Public License.
 29 | 
 30 |   "The Library" refers to a covered work governed by this License,
 31 | other than an Application or a Combined Work as defined below.
 32 | 
 33 |   An "Application" is any work that makes use of an interface provided
 34 | by the Library, but which is not otherwise based on the Library.
 35 | Defining a subclass of a class defined by the Library is deemed a mode
 36 | of using an interface provided by the Library.
 37 | 
 38 |   A "Combined Work" is a work produced by combining or linking an
 39 | Application with the Library.  The particular version of the Library
 40 | with which the Combined Work was made is also called the "Linked
 41 | Version".
 42 | 
 43 |   The "Minimal Corresponding Source" for a Combined Work means the
 44 | Corresponding Source for the Combined Work, excluding any source code
 45 | for portions of the Combined Work that, considered in isolation, are
 46 | based on the Application, and not on the Linked Version.
 47 | 
 48 |   The "Corresponding Application Code" for a Combined Work means the
 49 | object code and/or source code for the Application, including any data
 50 | and utility programs needed for reproducing the Combined Work from the
 51 | Application, but excluding the System Libraries of the Combined Work.
 52 | 
 53 |   1. Exception to Section 3 of the GNU GPL.
 54 | 
 55 |   You may convey a covered work under sections 3 and 4 of this License
 56 | without being bound by section 3 of the GNU GPL.
 57 | 
 58 |   2. Conveying Modified Versions.
 59 | 
 60 |   If you modify a copy of the Library, and, in your modifications, a
 61 | facility refers to a function or data to be supplied by an Application
 62 | that uses the facility (other than as an argument passed when the
 63 | facility is invoked), then you may convey a copy of the modified
 64 | version:
 65 | 
 66 |    a) under this License, provided that you make a good faith effort to
 67 |    ensure that, in the event an Application does not supply the
 68 |    function or data, the facility still operates, and performs
 69 |    whatever part of its purpose remains meaningful, or
 70 | 
 71 |    b) under the GNU GPL, with none of the additional permissions of
 72 |    this License applicable to that copy.
 73 | 
 74 |   3. Object Code Incorporating Material from Library Header Files.
 75 | 
 76 |   The object code form of an Application may incorporate material from
 77 | a header file that is part of the Library.  You may convey such object
 78 | code under terms of your choice, provided that, if the incorporated
 79 | material is not limited to numerical parameters, data structure
 80 | layouts and accessors, or small macros, inline functions and templates
 81 | (ten or fewer lines in length), you do both of the following:
 82 | 
 83 |    a) Give prominent notice with each copy of the object code that the
 84 |    Library is used in it and that the Library and its use are
 85 |    covered by this License.
 86 | 
 87 |    b) Accompany the object code with a copy of the GNU GPL and this license
 88 |    document.
 89 | 
 90 |   4. Combined Works.
 91 | 
 92 |   You may convey a Combined Work under terms of your choice that,
 93 | taken together, effectively do not restrict modification of the
 94 | portions of the Library contained in the Combined Work and reverse
 95 | engineering for debugging such modifications, if you also do each of
 96 | the following:
 97 | 
 98 |    a) Give prominent notice with each copy of the Combined Work that
 99 |    the Library is used in it and that the Library and its use are
100 |    covered by this License.
101 | 
102 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
103 |    document.
104 | 
105 |    c) For a Combined Work that displays copyright notices during
106 |    execution, include the copyright notice for the Library among
107 |    these notices, as well as a reference directing the user to the
108 |    copies of the GNU GPL and this license document.
109 | 
110 |    d) Do one of the following:
111 | 
112 |        0) Convey the Minimal Corresponding Source under the terms of this
113 |        License, and the Corresponding Application Code in a form
114 |        suitable for, and under terms that permit, the user to
115 |        recombine or relink the Application with a modified version of
116 |        the Linked Version to produce a modified Combined Work, in the
117 |        manner specified by section 6 of the GNU GPL for conveying
118 |        Corresponding Source.
119 | 
120 |        1) Use a suitable shared library mechanism for linking with the
121 |        Library.  A suitable mechanism is one that (a) uses at run time
122 |        a copy of the Library already present on the user's computer
123 |        system, and (b) will operate properly with a modified version
124 |        of the Library that is interface-compatible with the Linked
125 |        Version.
126 | 
127 |    e) Provide Installation Information, but only if you would otherwise
128 |    be required to provide such information under section 6 of the
129 |    GNU GPL, and only to the extent that such information is
130 |    necessary to install and execute a modified version of the
131 |    Combined Work produced by recombining or relinking the
132 |    Application with a modified version of the Linked Version. (If
133 |    you use option 4d0, the Installation Information must accompany
134 |    the Minimal Corresponding Source and Corresponding Application
135 |    Code. If you use option 4d1, you must provide the Installation
136 |    Information in the manner specified by section 6 of the GNU GPL
137 |    for conveying Corresponding Source.)
138 | 
139 |   5. Combined Libraries.
140 | 
141 |   You may place library facilities that are a work based on the
142 | Library side by side in a single library together with other library
143 | facilities that are not Applications and are not covered by this
144 | License, and convey such a combined library under terms of your
145 | choice, if you do both of the following:
146 | 
147 |    a) Accompany the combined library with a copy of the same work based
148 |    on the Library, uncombined with any other library facilities,
149 |    conveyed under the terms of this License.
150 | 
151 |    b) Give prominent notice with the combined library that part of it
152 |    is a work based on the Library, and explaining where to find the
153 |    accompanying uncombined form of the same work.
154 | 
155 |   6. Revised Versions of the GNU Lesser General Public License.
156 | 
157 |   The Free Software Foundation may publish revised and/or new versions
158 | of the GNU Lesser General Public License from time to time. Such new
159 | versions will be similar in spirit to the present version, but may
160 | differ in detail to address new problems or concerns.
161 | 
162 |   Each version is given a distinguishing version number. If the
163 | Library as you received it specifies that a certain numbered version
164 | of the GNU Lesser General Public License "or any later version"
165 | applies to it, you have the option of following the terms and
166 | conditions either of that published version or of any later version
167 | published by the Free Software Foundation. If the Library as you
168 | received it does not specify a version number of the GNU Lesser
169 | General Public License, you may choose any version of the GNU Lesser
170 | General Public License ever published by the Free Software Foundation.
171 | 
172 |   If the Library as you received it specifies that a proxy can decide
173 | whether future versions of the GNU Lesser General Public License shall
174 | apply, that proxy's public statement of acceptance of any version is
175 | permanent authorization for you to choose that version for the
176 | Library.
177 | 
178 | {% elif cookiecutter.open_source_license == 'GPL3' %}
179 |                     GNU GENERAL PUBLIC LICENSE
180 |                        Version 3, 29 June 2007
181 | 
182 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
183 |  Everyone is permitted to copy and distribute verbatim copies
184 |  of this license document, but changing it is not allowed.
185 | 
186 |                             Preamble
187 | 
188 |   The GNU General Public License is a free, copyleft license for
189 | software and other kinds of works.
190 | 
191 |   The licenses for most software and other practical works are designed
192 | to take away your freedom to share and change the works.  By contrast,
193 | the GNU General Public License is intended to guarantee your freedom to
194 | share and change all versions of a program--to make sure it remains free
195 | software for all its users.  We, the Free Software Foundation, use the
196 | GNU General Public License for most of our software; it applies also to
197 | any other work released this way by its authors.  You can apply it to
198 | your programs, too.
199 | 
200 |   When we speak of free software, we are referring to freedom, not
201 | price.  Our General Public Licenses are designed to make sure that you
202 | have the freedom to distribute copies of free software (and charge for
203 | them if you wish), that you receive source code or can get it if you
204 | want it, that you can change the software or use pieces of it in new
205 | free programs, and that you know you can do these things.
206 | 
207 |   To protect your rights, we need to prevent others from denying you
208 | these rights or asking you to surrender the rights.  Therefore, you have
209 | certain responsibilities if you distribute copies of the software, or if
210 | you modify it: responsibilities to respect the freedom of others.
211 | 
212 |   For example, if you distribute copies of such a program, whether
213 | gratis or for a fee, you must pass on to the recipients the same
214 | freedoms that you received.  You must make sure that they, too, receive
215 | or can get the source code.  And you must show them these terms so they
216 | know their rights.
217 | 
218 |   Developers that use the GNU GPL protect your rights with two steps:
219 | (1) assert copyright on the software, and (2) offer you this License
220 | giving you legal permission to copy, distribute and/or modify it.
221 | 
222 |   For the developers' and authors' protection, the GPL clearly explains
223 | that there is no warranty for this free software.  For both users' and
224 | authors' sake, the GPL requires that modified versions be marked as
225 | changed, so that their problems will not be attributed erroneously to
226 | authors of previous versions.
227 | 
228 |   Some devices are designed to deny users access to install or run
229 | modified versions of the software inside them, although the manufacturer
230 | can do so.  This is fundamentally incompatible with the aim of
231 | protecting users' freedom to change the software.  The systematic
232 | pattern of such abuse occurs in the area of products for individuals to
233 | use, which is precisely where it is most unacceptable.  Therefore, we
234 | have designed this version of the GPL to prohibit the practice for those
235 | products.  If such problems arise substantially in other domains, we
236 | stand ready to extend this provision to those domains in future versions
237 | of the GPL, as needed to protect the freedom of users.
238 | 
239 |   Finally, every program is threatened constantly by software patents.
240 | States should not allow patents to restrict development and use of
241 | software on general-purpose computers, but in those that do, we wish to
242 | avoid the special danger that patents applied to a free program could
243 | make it effectively proprietary.  To prevent this, the GPL assures that
244 | patents cannot be used to render the program non-free.
245 | 
246 |   The precise terms and conditions for copying, distribution and
247 | modification follow.
248 | 
249 |                        TERMS AND CONDITIONS
250 | 
251 |   0. Definitions.
252 | 
253 |   "This License" refers to version 3 of the GNU General Public License.
254 | 
255 |   "Copyright" also means copyright-like laws that apply to other kinds of
256 | works, such as semiconductor masks.
257 | 
258 |   "The Program" refers to any copyrightable work licensed under this
259 | License.  Each licensee is addressed as "you".  "Licensees" and
260 | "recipients" may be individuals or organizations.
261 | 
262 |   To "modify" a work means to copy from or adapt all or part of the work
263 | in a fashion requiring copyright permission, other than the making of an
264 | exact copy.  The resulting work is called a "modified version" of the
265 | earlier work or a work "based on" the earlier work.
266 | 
267 |   A "covered work" means either the unmodified Program or a work based
268 | on the Program.
269 | 
270 |   To "propagate" a work means to do anything with it that, without
271 | permission, would make you directly or secondarily liable for
272 | infringement under applicable copyright law, except executing it on a
273 | computer or modifying a private copy.  Propagation includes copying,
274 | distribution (with or without modification), making available to the
275 | public, and in some countries other activities as well.
276 | 
277 |   To "convey" a work means any kind of propagation that enables other
278 | parties to make or receive copies.  Mere interaction with a user through
279 | a computer network, with no transfer of a copy, is not conveying.
280 | 
281 |   An interactive user interface displays "Appropriate Legal Notices"
282 | to the extent that it includes a convenient and prominently visible
283 | feature that (1) displays an appropriate copyright notice, and (2)
284 | tells the user that there is no warranty for the work (except to the
285 | extent that warranties are provided), that licensees may convey the
286 | work under this License, and how to view a copy of this License.  If
287 | the interface presents a list of user commands or options, such as a
288 | menu, a prominent item in the list meets this criterion.
289 | 
290 |   1. Source Code.
291 | 
292 |   The "source code" for a work means the preferred form of the work
293 | for making modifications to it.  "Object code" means any non-source
294 | form of a work.
295 | 
296 |   A "Standard Interface" means an interface that either is an official
297 | standard defined by a recognized standards body, or, in the case of
298 | interfaces specified for a particular programming language, one that
299 | is widely used among developers working in that language.
300 | 
301 |   The "System Libraries" of an executable work include anything, other
302 | than the work as a whole, that (a) is included in the normal form of
303 | packaging a Major Component, but which is not part of that Major
304 | Component, and (b) serves only to enable use of the work with that
305 | Major Component, or to implement a Standard Interface for which an
306 | implementation is available to the public in source code form.  A
307 | "Major Component", in this context, means a major essential component
308 | (kernel, window system, and so on) of the specific operating system
309 | (if any) on which the executable work runs, or a compiler used to
310 | produce the work, or an object code interpreter used to run it.
311 | 
312 |   The "Corresponding Source" for a work in object code form means all
313 | the source code needed to generate, install, and (for an executable
314 | work) run the object code and to modify the work, including scripts to
315 | control those activities.  However, it does not include the work's
316 | System Libraries, or general-purpose tools or generally available free
317 | programs which are used unmodified in performing those activities but
318 | which are not part of the work.  For example, Corresponding Source
319 | includes interface definition files associated with source files for
320 | the work, and the source code for shared libraries and dynamically
321 | linked subprograms that the work is specifically designed to require,
322 | such as by intimate data communication or control flow between those
323 | subprograms and other parts of the work.
324 | 
325 |   The Corresponding Source need not include anything that users
326 | can regenerate automatically from other parts of the Corresponding
327 | Source.
328 | 
329 |   The Corresponding Source for a work in source code form is that
330 | same work.
331 | 
332 |   2. Basic Permissions.
333 | 
334 |   All rights granted under this License are granted for the term of
335 | copyright on the Program, and are irrevocable provided the stated
336 | conditions are met.  This License explicitly affirms your unlimited
337 | permission to run the unmodified Program.  The output from running a
338 | covered work is covered by this License only if the output, given its
339 | content, constitutes a covered work.  This License acknowledges your
340 | rights of fair use or other equivalent, as provided by copyright law.
341 | 
342 |   You may make, run and propagate covered works that you do not
343 | convey, without conditions so long as your license otherwise remains
344 | in force.  You may convey covered works to others for the sole purpose
345 | of having them make modifications exclusively for you, or provide you
346 | with facilities for running those works, provided that you comply with
347 | the terms of this License in conveying all material for which you do
348 | not control copyright.  Those thus making or running the covered works
349 | for you must do so exclusively on your behalf, under your direction
350 | and control, on terms that prohibit them from making any copies of
351 | your copyrighted material outside their relationship with you.
352 | 
353 |   Conveying under any other circumstances is permitted solely under
354 | the conditions stated below.  Sublicensing is not allowed; section 10
355 | makes it unnecessary.
356 | 
357 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
358 | 
359 |   No covered work shall be deemed part of an effective technological
360 | measure under any applicable law fulfilling obligations under article
361 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
362 | similar laws prohibiting or restricting circumvention of such
363 | measures.
364 | 
365 |   When you convey a covered work, you waive any legal power to forbid
366 | circumvention of technological measures to the extent such circumvention
367 | is effected by exercising rights under this License with respect to
368 | the covered work, and you disclaim any intention to limit operation or
369 | modification of the work as a means of enforcing, against the work's
370 | users, your or third parties' legal rights to forbid circumvention of
371 | technological measures.
372 | 
373 |   4. Conveying Verbatim Copies.
374 | 
375 |   You may convey verbatim copies of the Program's source code as you
376 | receive it, in any medium, provided that you conspicuously and
377 | appropriately publish on each copy an appropriate copyright notice;
378 | keep intact all notices stating that this License and any
379 | non-permissive terms added in accord with section 7 apply to the code;
380 | keep intact all notices of the absence of any warranty; and give all
381 | recipients a copy of this License along with the Program.
382 | 
383 |   You may charge any price or no price for each copy that you convey,
384 | and you may offer support or warranty protection for a fee.
385 | 
386 |   5. Conveying Modified Source Versions.
387 | 
388 |   You may convey a work based on the Program, or the modifications to
389 | produce it from the Program, in the form of source code under the
390 | terms of section 4, provided that you also meet all of these conditions:
391 | 
392 |     a) The work must carry prominent notices stating that you modified
393 |     it, and giving a relevant date.
394 | 
395 |     b) The work must carry prominent notices stating that it is
396 |     released under this License and any conditions added under section
397 |     7.  This requirement modifies the requirement in section 4 to
398 |     "keep intact all notices".
399 | 
400 |     c) You must license the entire work, as a whole, under this
401 |     License to anyone who comes into possession of a copy.  This
402 |     License will therefore apply, along with any applicable section 7
403 |     additional terms, to the whole of the work, and all its parts,
404 |     regardless of how they are packaged.  This License gives no
405 |     permission to license the work in any other way, but it does not
406 |     invalidate such permission if you have separately received it.
407 | 
408 |     d) If the work has interactive user interfaces, each must display
409 |     Appropriate Legal Notices; however, if the Program has interactive
410 |     interfaces that do not display Appropriate Legal Notices, your
411 |     work need not make them do so.
412 | 
413 |   A compilation of a covered work with other separate and independent
414 | works, which are not by their nature extensions of the covered work,
415 | and which are not combined with it such as to form a larger program,
416 | in or on a volume of a storage or distribution medium, is called an
417 | "aggregate" if the compilation and its resulting copyright are not
418 | used to limit the access or legal rights of the compilation's users
419 | beyond what the individual works permit.  Inclusion of a covered work
420 | in an aggregate does not cause this License to apply to the other
421 | parts of the aggregate.
422 | 
423 |   6. Conveying Non-Source Forms.
424 | 
425 |   You may convey a covered work in object code form under the terms
426 | of sections 4 and 5, provided that you also convey the
427 | machine-readable Corresponding Source under the terms of this License,
428 | in one of these ways:
429 | 
430 |     a) Convey the object code in, or embodied in, a physical product
431 |     (including a physical distribution medium), accompanied by the
432 |     Corresponding Source fixed on a durable physical medium
433 |     customarily used for software interchange.
434 | 
435 |     b) Convey the object code in, or embodied in, a physical product
436 |     (including a physical distribution medium), accompanied by a
437 |     written offer, valid for at least three years and valid for as
438 |     long as you offer spare parts or customer support for that product
439 |     model, to give anyone who possesses the object code either (1) a
440 |     copy of the Corresponding Source for all the software in the
441 |     product that is covered by this License, on a durable physical
442 |     medium customarily used for software interchange, for a price no
443 |     more than your reasonable cost of physically performing this
444 |     conveying of source, or (2) access to copy the
445 |     Corresponding Source from a network server at no charge.
446 | 
447 |     c) Convey individual copies of the object code with a copy of the
448 |     written offer to provide the Corresponding Source.  This
449 |     alternative is allowed only occasionally and noncommercially, and
450 |     only if you received the object code with such an offer, in accord
451 |     with subsection 6b.
452 | 
453 |     d) Convey the object code by offering access from a designated
454 |     place (gratis or for a charge), and offer equivalent access to the
455 |     Corresponding Source in the same way through the same place at no
456 |     further charge.  You need not require recipients to copy the
457 |     Corresponding Source along with the object code.  If the place to
458 |     copy the object code is a network server, the Corresponding Source
459 |     may be on a different server (operated by you or a third party)
460 |     that supports equivalent copying facilities, provided you maintain
461 |     clear directions next to the object code saying where to find the
462 |     Corresponding Source.  Regardless of what server hosts the
463 |     Corresponding Source, you remain obligated to ensure that it is
464 |     available for as long as needed to satisfy these requirements.
465 | 
466 |     e) Convey the object code using peer-to-peer transmission, provided
467 |     you inform other peers where the object code and Corresponding
468 |     Source of the work are being offered to the general public at no
469 |     charge under subsection 6d.
470 | 
471 |   A separable portion of the object code, whose source code is excluded
472 | from the Corresponding Source as a System Library, need not be
473 | included in conveying the object code work.
474 | 
475 |   A "User Product" is either (1) a "consumer product", which means any
476 | tangible personal property which is normally used for personal, family,
477 | or household purposes, or (2) anything designed or sold for incorporation
478 | into a dwelling.  In determining whether a product is a consumer product,
479 | doubtful cases shall be resolved in favor of coverage.  For a particular
480 | product received by a particular user, "normally used" refers to a
481 | typical or common use of that class of product, regardless of the status
482 | of the particular user or of the way in which the particular user
483 | actually uses, or expects or is expected to use, the product.  A product
484 | is a consumer product regardless of whether the product has substantial
485 | commercial, industrial or non-consumer uses, unless such uses represent
486 | the only significant mode of use of the product.
487 | 
488 |   "Installation Information" for a User Product means any methods,
489 | procedures, authorization keys, or other information required to install
490 | and execute modified versions of a covered work in that User Product from
491 | a modified version of its Corresponding Source.  The information must
492 | suffice to ensure that the continued functioning of the modified object
493 | code is in no case prevented or interfered with solely because
494 | modification has been made.
495 | 
496 |   If you convey an object code work under this section in, or with, or
497 | specifically for use in, a User Product, and the conveying occurs as
498 | part of a transaction in which the right of possession and use of the
499 | User Product is transferred to the recipient in perpetuity or for a
500 | fixed term (regardless of how the transaction is characterized), the
501 | Corresponding Source conveyed under this section must be accompanied
502 | by the Installation Information.  But this requirement does not apply
503 | if neither you nor any third party retains the ability to install
504 | modified object code on the User Product (for example, the work has
505 | been installed in ROM).
506 | 
507 |   The requirement to provide Installation Information does not include a
508 | requirement to continue to provide support service, warranty, or updates
509 | for a work that has been modified or installed by the recipient, or for
510 | the User Product in which it has been modified or installed.  Access to a
511 | network may be denied when the modification itself materially and
512 | adversely affects the operation of the network or violates the rules and
513 | protocols for communication across the network.
514 | 
515 |   Corresponding Source conveyed, and Installation Information provided,
516 | in accord with this section must be in a format that is publicly
517 | documented (and with an implementation available to the public in
518 | source code form), and must require no special password or key for
519 | unpacking, reading or copying.
520 | 
521 |   7. Additional Terms.
522 | 
523 |   "Additional permissions" are terms that supplement the terms of this
524 | License by making exceptions from one or more of its conditions.
525 | Additional permissions that are applicable to the entire Program shall
526 | be treated as though they were included in this License, to the extent
527 | that they are valid under applicable law.  If additional permissions
528 | apply only to part of the Program, that part may be used separately
529 | under those permissions, but the entire Program remains governed by
530 | this License without regard to the additional permissions.
531 | 
532 |   When you convey a copy of a covered work, you may at your option
533 | remove any additional permissions from that copy, or from any part of
534 | it.  (Additional permissions may be written to require their own
535 | removal in certain cases when you modify the work.)  You may place
536 | additional permissions on material, added by you to a covered work,
537 | for which you have or can give appropriate copyright permission.
538 | 
539 |   Notwithstanding any other provision of this License, for material you
540 | add to a covered work, you may (if authorized by the copyright holders of
541 | that material) supplement the terms of this License with terms:
542 | 
543 |     a) Disclaiming warranty or limiting liability differently from the
544 |     terms of sections 15 and 16 of this License; or
545 | 
546 |     b) Requiring preservation of specified reasonable legal notices or
547 |     author attributions in that material or in the Appropriate Legal
548 |     Notices displayed by works containing it; or
549 | 
550 |     c) Prohibiting misrepresentation of the origin of that material, or
551 |     requiring that modified versions of such material be marked in
552 |     reasonable ways as different from the original version; or
553 | 
554 |     d) Limiting the use for publicity purposes of names of licensors or
555 |     authors of the material; or
556 | 
557 |     e) Declining to grant rights under trademark law for use of some
558 |     trade names, trademarks, or service marks; or
559 | 
560 |     f) Requiring indemnification of licensors and authors of that
561 |     material by anyone who conveys the material (or modified versions of
562 |     it) with contractual assumptions of liability to the recipient, for
563 |     any liability that these contractual assumptions directly impose on
564 |     those licensors and authors.
565 | 
566 |   All other non-permissive additional terms are considered "further
567 | restrictions" within the meaning of section 10.  If the Program as you
568 | received it, or any part of it, contains a notice stating that it is
569 | governed by this License along with a term that is a further
570 | restriction, you may remove that term.  If a license document contains
571 | a further restriction but permits relicensing or conveying under this
572 | License, you may add to a covered work material governed by the terms
573 | of that license document, provided that the further restriction does
574 | not survive such relicensing or conveying.
575 | 
576 |   If you add terms to a covered work in accord with this section, you
577 | must place, in the relevant source files, a statement of the
578 | additional terms that apply to those files, or a notice indicating
579 | where to find the applicable terms.
580 | 
581 |   Additional terms, permissive or non-permissive, may be stated in the
582 | form of a separately written license, or stated as exceptions;
583 | the above requirements apply either way.
584 | 
585 |   8. Termination.
586 | 
587 |   You may not propagate or modify a covered work except as expressly
588 | provided under this License.  Any attempt otherwise to propagate or
589 | modify it is void, and will automatically terminate your rights under
590 | this License (including any patent licenses granted under the third
591 | paragraph of section 11).
592 | 
593 |   However, if you cease all violation of this License, then your
594 | license from a particular copyright holder is reinstated (a)
595 | provisionally, unless and until the copyright holder explicitly and
596 | finally terminates your license, and (b) permanently, if the copyright
597 | holder fails to notify you of the violation by some reasonable means
598 | prior to 60 days after the cessation.
599 | 
600 |   Moreover, your license from a particular copyright holder is
601 | reinstated permanently if the copyright holder notifies you of the
602 | violation by some reasonable means, this is the first time you have
603 | received notice of violation of this License (for any work) from that
604 | copyright holder, and you cure the violation prior to 30 days after
605 | your receipt of the notice.
606 | 
607 |   Termination of your rights under this section does not terminate the
608 | licenses of parties who have received copies or rights from you under
609 | this License.  If your rights have been terminated and not permanently
610 | reinstated, you do not qualify to receive new licenses for the same
611 | material under section 10.
612 | 
613 |   9. Acceptance Not Required for Having Copies.
614 | 
615 |   You are not required to accept this License in order to receive or
616 | run a copy of the Program.  Ancillary propagation of a covered work
617 | occurring solely as a consequence of using peer-to-peer transmission
618 | to receive a copy likewise does not require acceptance.  However,
619 | nothing other than this License grants you permission to propagate or
620 | modify any covered work.  These actions infringe copyright if you do
621 | not accept this License.  Therefore, by modifying or propagating a
622 | covered work, you indicate your acceptance of this License to do so.
623 | 
624 |   10. Automatic Licensing of Downstream Recipients.
625 | 
626 |   Each time you convey a covered work, the recipient automatically
627 | receives a license from the original licensors, to run, modify and
628 | propagate that work, subject to this License.  You are not responsible
629 | for enforcing compliance by third parties with this License.
630 | 
631 |   An "entity transaction" is a transaction transferring control of an
632 | organization, or substantially all assets of one, or subdividing an
633 | organization, or merging organizations.  If propagation of a covered
634 | work results from an entity transaction, each party to that
635 | transaction who receives a copy of the work also receives whatever
636 | licenses to the work the party's predecessor in interest had or could
637 | give under the previous paragraph, plus a right to possession of the
638 | Corresponding Source of the work from the predecessor in interest, if
639 | the predecessor has it or can get it with reasonable efforts.
640 | 
641 |   You may not impose any further restrictions on the exercise of the
642 | rights granted or affirmed under this License.  For example, you may
643 | not impose a license fee, royalty, or other charge for exercise of
644 | rights granted under this License, and you may not initiate litigation
645 | (including a cross-claim or counterclaim in a lawsuit) alleging that
646 | any patent claim is infringed by making, using, selling, offering for
647 | sale, or importing the Program or any portion of it.
648 | 
649 |   11. Patents.
650 | 
651 |   A "contributor" is a copyright holder who authorizes use under this
652 | License of the Program or a work on which the Program is based.  The
653 | work thus licensed is called the contributor's "contributor version".
654 | 
655 |   A contributor's "essential patent claims" are all patent claims
656 | owned or controlled by the contributor, whether already acquired or
657 | hereafter acquired, that would be infringed by some manner, permitted
658 | by this License, of making, using, or selling its contributor version,
659 | but do not include claims that would be infringed only as a
660 | consequence of further modification of the contributor version.  For
661 | purposes of this definition, "control" includes the right to grant
662 | patent sublicenses in a manner consistent with the requirements of
663 | this License.
664 | 
665 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
666 | patent license under the contributor's essential patent claims, to
667 | make, use, sell, offer for sale, import and otherwise run, modify and
668 | propagate the contents of its contributor version.
669 | 
670 |   In the following three paragraphs, a "patent license" is any express
671 | agreement or commitment, however denominated, not to enforce a patent
672 | (such as an express permission to practice a patent or covenant not to
673 | sue for patent infringement).  To "grant" such a patent license to a
674 | party means to make such an agreement or commitment not to enforce a
675 | patent against the party.
676 | 
677 |   If you convey a covered work, knowingly relying on a patent license,
678 | and the Corresponding Source of the work is not available for anyone
679 | to copy, free of charge and under the terms of this License, through a
680 | publicly available network server or other readily accessible means,
681 | then you must either (1) cause the Corresponding Source to be so
682 | available, or (2) arrange to deprive yourself of the benefit of the
683 | patent license for this particular work, or (3) arrange, in a manner
684 | consistent with the requirements of this License, to extend the patent
685 | license to downstream recipients.  "Knowingly relying" means you have
686 | actual knowledge that, but for the patent license, your conveying the
687 | covered work in a country, or your recipient's use of the covered work
688 | in a country, would infringe one or more identifiable patents in that
689 | country that you have reason to believe are valid.
690 | 
691 |   If, pursuant to or in connection with a single transaction or
692 | arrangement, you convey, or propagate by procuring conveyance of, a
693 | covered work, and grant a patent license to some of the parties
694 | receiving the covered work authorizing them to use, propagate, modify
695 | or convey a specific copy of the covered work, then the patent license
696 | you grant is automatically extended to all recipients of the covered
697 | work and works based on it.
698 | 
699 |   A patent license is "discriminatory" if it does not include within
700 | the scope of its coverage, prohibits the exercise of, or is
701 | conditioned on the non-exercise of one or more of the rights that are
702 | specifically granted under this License.  You may not convey a covered
703 | work if you are a party to an arrangement with a third party that is
704 | in the business of distributing software, under which you make payment
705 | to the third party based on the extent of your activity of conveying
706 | the work, and under which the third party grants, to any of the
707 | parties who would receive the covered work from you, a discriminatory
708 | patent license (a) in connection with copies of the covered work
709 | conveyed by you (or copies made from those copies), or (b) primarily
710 | for and in connection with specific products or compilations that
711 | contain the covered work, unless you entered into that arrangement,
712 | or that patent license was granted, prior to 28 March 2007.
713 | 
714 |   Nothing in this License shall be construed as excluding or limiting
715 | any implied license or other defenses to infringement that may
716 | otherwise be available to you under applicable patent law.
717 | 
718 |   12. No Surrender of Others' Freedom.
719 | 
720 |   If conditions are imposed on you (whether by court order, agreement or
721 | otherwise) that contradict the conditions of this License, they do not
722 | excuse you from the conditions of this License.  If you cannot convey a
723 | covered work so as to satisfy simultaneously your obligations under this
724 | License and any other pertinent obligations, then as a consequence you may
725 | not convey it at all.  For example, if you agree to terms that obligate you
726 | to collect a royalty for further conveying from those to whom you convey
727 | the Program, the only way you could satisfy both those terms and this
728 | License would be to refrain entirely from conveying the Program.
729 | 
730 |   13. Use with the GNU Affero General Public License.
731 | 
732 |   Notwithstanding any other provision of this License, you have
733 | permission to link or combine any covered work with a work licensed
734 | under version 3 of the GNU Affero General Public License into a single
735 | combined work, and to convey the resulting work.  The terms of this
736 | License will continue to apply to the part which is the covered work,
737 | but the special requirements of the GNU Affero General Public License,
738 | section 13, concerning interaction through a network will apply to the
739 | combination as such.
740 | 
741 |   14. Revised Versions of this License.
742 | 
743 |   The Free Software Foundation may publish revised and/or new versions of
744 | the GNU General Public License from time to time.  Such new versions will
745 | be similar in spirit to the present version, but may differ in detail to
746 | address new problems or concerns.
747 | 
748 |   Each version is given a distinguishing version number.  If the
749 | Program specifies that a certain numbered version of the GNU General
750 | Public License "or any later version" applies to it, you have the
751 | option of following the terms and conditions either of that numbered
752 | version or of any later version published by the Free Software
753 | Foundation.  If the Program does not specify a version number of the
754 | GNU General Public License, you may choose any version ever published
755 | by the Free Software Foundation.
756 | 
757 |   If the Program specifies that a proxy can decide which future
758 | versions of the GNU General Public License can be used, that proxy's
759 | public statement of acceptance of a version permanently authorizes you
760 | to choose that version for the Program.
761 | 
762 |   Later license versions may give you additional or different
763 | permissions.  However, no additional obligations are imposed on any
764 | author or copyright holder as a result of your choosing to follow a
765 | later version.
766 | 
767 |   15. Disclaimer of Warranty.
768 | 
769 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
770 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
771 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
772 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
773 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
774 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
775 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
776 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
777 | 
778 |   16. Limitation of Liability.
779 | 
780 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
781 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
782 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
783 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
784 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
785 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
786 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
787 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
788 | SUCH DAMAGES.
789 | 
790 |   17. Interpretation of Sections 15 and 16.
791 | 
792 |   If the disclaimer of warranty and limitation of liability provided
793 | above cannot be given local legal effect according to their terms,
794 | reviewing courts shall apply local law that most closely approximates
795 | an absolute waiver of all civil liability in connection with the
796 | Program, unless a warranty or assumption of liability accompanies a
797 | copy of the Program in return for a fee.
798 | 
799 |                      END OF TERMS AND CONDITIONS
800 | 
801 |             How to Apply These Terms to Your New Programs
802 | 
803 |   If you develop a new program, and you want it to be of the greatest
804 | possible use to the public, the best way to achieve this is to make it
805 | free software which everyone can redistribute and change under these terms.
806 | 
807 |   To do so, attach the following notices to the program.  It is safest
808 | to attach them to the start of each source file to most effectively
809 | state the exclusion of warranty; and each file should have at least
810 | the "copyright" line and a pointer to where the full notice is found.
811 | 
812 |     <one line to give the program's name and a brief idea of what it does.>
813 |     Copyright (C) <year>  <name of author>
814 | 
815 |     This program is free software: you can redistribute it and/or modify
816 |     it under the terms of the GNU General Public License as published by
817 |     the Free Software Foundation, either version 3 of the License, or
818 |     (at your option) any later version.
819 | 
820 |     This program is distributed in the hope that it will be useful,
821 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
822 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
823 |     GNU General Public License for more details.
824 | 
825 |     You should have received a copy of the GNU General Public License
826 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
827 | 
828 | Also add information on how to contact you by electronic and paper mail.
829 | 
830 |   If the program does terminal interaction, make it output a short
831 | notice like this when it starts in an interactive mode:
832 | 
833 |     <program>  Copyright (C) <year>  <name of author>
834 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
835 |     This is free software, and you are welcome to redistribute it
836 |     under certain conditions; type `show c' for details.
837 | 
838 | The hypothetical commands `show w' and `show c' should show the appropriate
839 | parts of the General Public License.  Of course, your program's commands
840 | might be different; for a GUI interface, you would use an "about box".
841 | 
842 |   You should also get your employer (if you work as a programmer) or school,
843 | if any, to sign a "copyright disclaimer" for the program, if necessary.
844 | For more information on this, and how to apply and follow the GNU GPL, see
845 | <https://www.gnu.org/licenses/>.
846 | 
847 |   The GNU General Public License does not permit incorporating your program
848 | into proprietary programs.  If your program is a subroutine library, you
849 | may consider it more useful to permit linking proprietary applications with
850 | the library.  If this is what you want to do, use the GNU Lesser General
851 | Public License instead of this License.  But first, please read
852 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
853 | 
854 | {% endif %}
855 | 


--------------------------------------------------------------------------------