├── {{cookiecutter.repo_name}} ├── src │ └── {{cookiecutter.package_name}} │ │ ├── __init__.py │ │ ├── pipeline.py │ │ ├── features.py │ │ ├── examplemodule.py │ │ └── io.py ├── docs │ ├── writeup │ │ ├── info │ │ │ ├── code_of_conduct.rst │ │ │ ├── process_documentation.rst │ │ │ └── setup.rst │ │ ├── results │ │ │ └── eda.rst │ │ ├── api-{{cookiecutter.package_name}}.rst │ │ ├── Makefile │ │ ├── make.bat │ │ ├── index.rst │ │ └── conf.py │ ├── resources │ │ ├── DS_FlowChart.jpg │ │ ├── ReviewTable.jpg │ │ └── DS_ProcessFlow.jpg │ ├── data_science_code_of_conduct.md │ └── process_documentation.md ├── requirements.txt ├── data │ ├── temp │ │ └── .gitignore │ ├── raw │ │ └── .gitignore │ ├── processed │ │ └── .gitignore │ ├── training │ │ └── .gitignore │ └── interim_[desc] │ │ └── .gitignore ├── conda_env.yml ├── scripts │ ├── train │ │ ├── amlrun.py │ │ ├── submit-train-local.py │ │ ├── submit-train.py │ │ └── train.py │ ├── example.py │ └── deploy │ │ └── score.py ├── tests │ ├── {{cookiecutter.package_name}} │ │ └── examplemodule │ │ │ ├── test_hello_world.py │ │ │ └── test_add_value_to_numpy.py │ └── test_notebook.py ├── azure-pipelines.yml ├── setup.py ├── extras │ └── add_explorer_context_shortcuts.reg ├── notebooks │ └── example.ipynb ├── .gitignore ├── README.md └── LICENSE ├── requirements.txt ├── data └── training │ └── .gitignore ├── cookiecutter.json ├── LICENSE ├── azure-pipelines.yml ├── hooks └── post_gen_project.py ├── .travis.yml ├── tests └── test_create.py ├── .gitignore └── README.md /{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cookiecutter 2 | flake8 3 | pytest 4 | pytest-cookies 5 | pytest-cov -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/info/code_of_conduct.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../../data_science_code_of_conduct.md -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/info/process_documentation.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../../process_documentation.md -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/requirements.txt: -------------------------------------------------------------------------------- 1 | flake8 2 | m2r 3 | nbformat 4 | numpy 5 | pandas 6 | pytest 7 | pytest-cookies 8 | pytest-cov 9 | sphinx -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/resources/DS_FlowChart.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/equinor/data-science-template/HEAD/{{cookiecutter.repo_name}}/docs/resources/DS_FlowChart.jpg -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/resources/ReviewTable.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/equinor/data-science-template/HEAD/{{cookiecutter.repo_name}}/docs/resources/ReviewTable.jpg -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/resources/DS_ProcessFlow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/equinor/data-science-template/HEAD/{{cookiecutter.repo_name}}/docs/resources/DS_ProcessFlow.jpg -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/data/temp/.gitignore: -------------------------------------------------------------------------------- 1 | # The .gitignore file specifies things that git should ignore. 2 | # 3 | # Temporary folder for your own usage 4 | # 5 | # Git should typically ignore everything in this directory (except for this file) 6 | 7 | * 8 | !.gitignore 9 | #!SomeOtherFileToInclude 10 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/info/setup.rst: -------------------------------------------------------------------------------- 1 | Setup 2 | ===== 3 | 4 | Introduction 5 | ------------ 6 | 7 | .. note:: 8 | These documentation page are for your own use as you best see fit for your project. 9 | 10 | Here you could add setup information, or details on how you run things from an 11 | operational perspective. 12 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/conda_env.yml: -------------------------------------------------------------------------------- 1 | name: {{cookiecutter.conda_name}} 2 | dependencies: 3 | - pandas=0.23.4 4 | - pytest=5.3.1 5 | - pytest-cov=2.8.1 6 | - numpy=1.17.4 7 | - nbconvert=5.6.1 8 | - nbformat=4.4.0 9 | - pip: 10 | - azureml-sdk 11 | - joblib==0.14.1 12 | - matplotlib==3.1.2 13 | - scikit-learn==0.22.1 14 | -------------------------------------------------------------------------------- /data/training/.gitignore: -------------------------------------------------------------------------------- 1 | # The .gitignore file specifies things that git should ignore. 2 | # 3 | # Whilst data should typically be consumed from the datalake or some other source, 4 | # this folder could contain local raw data files that should be copied in. 5 | # 6 | # Git should typically ignore everything in this directory (except for this file) 7 | 8 | * 9 | !.gitignore 10 | #!SomeOtherFileToInclude 11 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/data/raw/.gitignore: -------------------------------------------------------------------------------- 1 | # The .gitignore file specifies things that git should ignore. 2 | # 3 | # Whilst data should typically be consumed from the datalake or some other source, 4 | # this folder could contain local raw data files that should be copied in. 5 | # 6 | # Git should typically ignore everything in this directory (except for this file) 7 | 8 | * 9 | !.gitignore 10 | #!SomeOtherFileToInclude 11 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/data/processed/.gitignore: -------------------------------------------------------------------------------- 1 | # The .gitignore file specifies things that git should ignore. 2 | # 3 | # Whilst data should typically be consumed from the datalake or some other source, 4 | # this folder could contain local raw data files that should be copied in. 5 | # 6 | # Git should typically ignore everything in this directory (except for this file) 7 | 8 | * 9 | !.gitignore 10 | #!SomeOtherFileToInclude 11 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/data/training/.gitignore: -------------------------------------------------------------------------------- 1 | # The .gitignore file specifies things that git should ignore. 2 | # 3 | # Whilst data should typically be consumed from the datalake or some other source, 4 | # this folder could contain local raw data files that should be copied in. 5 | # 6 | # Git should typically ignore everything in this directory (except for this file) 7 | 8 | * 9 | !.gitignore 10 | #!SomeOtherFileToInclude 11 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/data/interim_[desc]/.gitignore: -------------------------------------------------------------------------------- 1 | # The .gitignore file specifies things that git should ignore. 2 | # 3 | # Whilst data should typically be consumed from the datalake or some other source, 4 | # this folder could contain local raw data files that should be copied in. 5 | # 6 | # Git should typically ignore everything in this directory (except for this file) 7 | 8 | * 9 | !.gitignore 10 | #!SomeOtherFileToInclude 11 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/results/eda.rst: -------------------------------------------------------------------------------- 1 | Exploratory Analysis 2 | ==================== 3 | 4 | Introduction 5 | ------------ 6 | 7 | .. note:: 8 | These documentation page are for your own use as you best see fit for your project. 9 | 10 | Here you could add information and links to present results from EDA or 11 | one of your other experiments. This might include links back to notebooks 12 | or other artifacts. 13 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/scripts/train/amlrun.py: -------------------------------------------------------------------------------- 1 | try: 2 | from azureml.core import Run 3 | except ImportError: 4 | pass 5 | 6 | 7 | # access the Azure ML run 8 | # init run param to check if running within AML 9 | def get_AMLRun(): 10 | """Try and get the Azure Machine Learning run 11 | 12 | Returns: 13 | Run: The Experiment run or None if no active run. 14 | """ 15 | try: 16 | run = Run.get_context(allow_offline=False) 17 | return run 18 | except Exception as e: 19 | print("Caught = {}".format(e.message)) 20 | return None 21 | -------------------------------------------------------------------------------- /cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "project_name": "project_name", 3 | "project_description": "A short description of the project.", 4 | "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}", 5 | "conda_name": "{{ cookiecutter.project_name.lower().replace(' ', '-') }}", 6 | "package_name": "{{ cookiecutter.project_name.lower().replace(' ', '').replace('-', '') }}", 7 | "mlops_name": "{{ cookiecutter.project_name.lower().replace(' ', '-') }}", 8 | "mlops_compute_name": "cpu-compute", 9 | "author": "Equinor ASA", 10 | "open_source_license": ["Not open source", "MIT", "LGPL3", "GPL3"], 11 | "devops_organisation": "" 12 | } -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/api-{{cookiecutter.package_name}}.rst: -------------------------------------------------------------------------------- 1 | api-{{cookiecutter.package_name}} package 2 | ================================================================================ 3 | 4 | This page contains information about the mhewtest package. 5 | 6 | .. note:: 7 | Edit this page to add additional description above and list whatever modules 8 | you would like included below. Documentation pages for the individual modules 9 | will be automatically generated when you 'make' the documentation. 10 | 11 | .. autosummary:: 12 | :toctree: _generated 13 | 14 | {{cookiecutter.package_name}}.examplemodule 15 | {{cookiecutter.package_name}}.features 16 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/scripts/example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """An example python script template. 4 | 5 | Argv: 6 | infile: Input file 7 | outfile: Output file 8 | 9 | """ 10 | 11 | import sys 12 | import argparse 13 | 14 | 15 | def main(arguments): 16 | 17 | parser = argparse.ArgumentParser( 18 | description="Put your description here", 19 | formatter_class=argparse.RawDescriptionHelpFormatter) 20 | parser.add_argument('infile', help="Input file", type=argparse.FileType('r')) 21 | parser.add_argument('-o', '--outfile', help="Output file", 22 | default=sys.stdout, type=argparse.FileType('w')) 23 | 24 | args = parser.parse_args(arguments) 25 | 26 | print(args) 27 | 28 | # Add your code here 29 | 30 | 31 | if __name__ == '__main__': 32 | sys.exit(main(sys.argv[1:])) 33 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/pipeline.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module should contain your main project pipeline(s). 3 | 4 | Whilst the pipeline may change during the analysis phases, any more stable pipeline should be implemented here so 5 | that it can be reused and easily reproduced. 6 | """ 7 | # import pandas as pd 8 | 9 | # from examplepackage import features 10 | # from examplepackage.io import IO 11 | 12 | 13 | def run_pipeline(local_data_path: str): 14 | """ 15 | Run the main processing pipeline. 16 | 17 | Returns: 18 | A dataframe containing the output of the pipeline 19 | """ 20 | 21 | # io = IO(path) 22 | # df = io.load_cleaned_file(download_always=False) 23 | # df = add_choke_events(df) 24 | 25 | # Add calls to features.Xxx here 26 | 27 | # save (or return) dataframe here? 28 | 29 | 30 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/tests/{{cookiecutter.package_name}}/examplemodule/test_hello_world.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | # Explicitly set path so don't need to run setup.py - if we have multiple copies of the code we would otherwise need 5 | # to setup a separate environment for each to ensure the code pointers are correct. 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'src'))) # noqa 7 | 8 | # from pandas.util.testing import assert_frame_equal 9 | from {{cookiecutter.package_name}} import examplemodule 10 | 11 | 12 | def test_something(): 13 | # print(os.getcwd()) 14 | assert True, "A comment to show if the test fails" 15 | 16 | 17 | # def test_that_fails(): 18 | # assert False, "We expected this to fail" 19 | 20 | 21 | def test_hello_world(): 22 | assert examplemodule.hello_world() == "Hello World", "The Hello World strings should be the same" 23 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/features.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module should contain project specific feature engineering functionality. 3 | 4 | You should avoid engineering features in a notebook as it is not transferable later if you want to automate the 5 | process. Add functions here to create your features, such functions should include those to generate specific features 6 | along with any more generic functions. 7 | 8 | Consider moving generic functions into the shared statoilds package. 9 | """ 10 | import pandas as pd 11 | 12 | 13 | def my_feature_xxx(df: pd.DataFrame): 14 | """ 15 | Description goes here. 16 | You might also add additional arguments such as column etc... 17 | Would be nice with some test cases also :) 18 | 19 | Args: 20 | df: Dataframe upon which to operate 21 | 22 | Returns: 23 | A dataframe with the Xxx feature appended 24 | """ 25 | 26 | # CODE HERE 27 | 28 | return df 29 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/tests/{{cookiecutter.package_name}}/examplemodule/test_add_value_to_numpy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import pytest 5 | 6 | # Explicitly set path so don't need to run setup.py - if we have multiple copies of the code we would otherwise need 7 | # to setup a separate environment for each to ensure the code pointers are correct. 8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'src'))) # noqa 9 | 10 | # from pandas.util.testing import assert_frame_equal 11 | from {{cookiecutter.package_name}} import examplemodule 12 | 13 | 14 | def test_add_value_to_numpy(): 15 | array = np.array([1, 1, 1, 1, 1]) 16 | expected_result = np.array([2, 2, 2, 2, 2]) 17 | result_array = examplemodule.add_value_to_numpy(array, 1) 18 | assert np.array_equal(expected_result, result_array), "The Hello World strings should be the same" 19 | 20 | 21 | def test_add_value_to_numpy_wrong_type(): 22 | with pytest.raises(ValueError) as _: 23 | examplemodule.add_value_to_numpy([1, 1], 1) 24 | 25 | 26 | def test_add_value_to_numpy_empty(): 27 | with pytest.raises(ValueError) as _: 28 | examplemodule.add_value_to_numpy(None, 1) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/examplemodule.py: -------------------------------------------------------------------------------- 1 | """ 2 | This example module shows some simple methods and best practices for documentation 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | def hello_world() -> str: 9 | """ 10 | Method description - A simple method to get the hello world string 11 | 12 | Returns: 13 | The string "Hello World" 14 | """ 15 | return "Hello World" 16 | 17 | 18 | def add_value_to_numpy(array: np.ndarray, amount: float = 1) -> np.ndarray: 19 | """ 20 | A sample method to add a value to every element in a pandas DataFrame. 21 | 22 | Args: 23 | array: The source DataFrame to work on. 24 | amount: The amount to add to each element in the DataFrame 25 | 26 | Returns: 27 | A new DataFrame with each value increased by amount. 28 | 29 | Examples: 30 | Examples should be written in doctest format, and should illustrate how 31 | to use the function. 32 | >>> array = np.array([1, 1, 1 ,1, 1]) 33 | >>> result_array = add_value_to_numpy(array, 1) 34 | 35 | """ 36 | if array is None or \ 37 | not isinstance(array, np.ndarray): 38 | raise ValueError("array must be a valid ndarray") 39 | # if isinstance(a, np.ndarray): 40 | 41 | return array + amount 42 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | - job: Build_and_Test 3 | displayName: Build and Test 4 | condition: succeeded() 5 | pool: 6 | name: Hosted Ubuntu 1604 7 | strategy: 8 | matrix: 9 | Python36: 10 | python.version: '3.6' 11 | Python37: 12 | python.version: '3.7' 13 | maxParallel: 3 14 | 15 | steps: 16 | - task: UsePythonVersion@0 17 | displayName: 'Use Python $(python.version)' 18 | inputs: 19 | versionSpec: '$(python.version)' 20 | 21 | - script: python -m pip install --upgrade pip 22 | displayName: 'Upgrade pip' 23 | 24 | - script: pip install -r requirements.txt 25 | displayName: 'Install requirements' 26 | 27 | - script: | 28 | flake8 --max-line-length=120 *.py hooks/ tests/ 29 | displayName: 'Run lint (flake8) tests' 30 | 31 | - script: | 32 | pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html 33 | displayName: pytest 34 | 35 | - task: PublishTestResults@2 36 | displayName: 'Publish Test Results **/test-results.xml' 37 | inputs: 38 | testResultsFiles: '**/test-results.xml' 39 | testRunTitle: 'Python $(python.version)' 40 | 41 | - task: PublishCodeCoverageResults@1 42 | inputs: 43 | codeCoverageTool: Cobertura 44 | summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' 45 | reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov' 46 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | - job: Build_and_Test 3 | displayName: Build and Test 4 | condition: succeeded() 5 | pool: 6 | name: Hosted Ubuntu 1604 7 | strategy: 8 | matrix: 9 | Python36: 10 | python.version: '3.6' 11 | Python37: 12 | python.version: '3.7' 13 | maxParallel: 3 14 | 15 | steps: 16 | - task: UsePythonVersion@0 17 | displayName: 'Use Python $(python.version)' 18 | inputs: 19 | versionSpec: '$(python.version)' 20 | 21 | - script: python -m pip install --upgrade pip 22 | displayName: 'Upgrade pip' 23 | 24 | - script: pip install -r requirements.txt 25 | displayName: 'Install requirements' 26 | 27 | - script: | 28 | flake8 --max-line-length=120 *.py tests/ scripts/ 29 | displayName: 'Run lint (flake8) tests' 30 | 31 | - script: | 32 | pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html 33 | displayName: pytest 34 | 35 | - task: PublishTestResults@2 36 | displayName: 'Publish Test Results **/test-results.xml' 37 | inputs: 38 | testResultsFiles: '**/test-results.xml' 39 | testRunTitle: 'Python $(python.version)' 40 | 41 | - task: PublishCodeCoverageResults@1 42 | inputs: 43 | codeCoverageTool: Cobertura 44 | summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' 45 | reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov' 46 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/tests/test_notebook.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | import nbformat 5 | 6 | 7 | def run_notebook(filename): 8 | """ 9 | Execute the specified notebook via jupyter nbconvert and collect output. 10 | :returns (parsed nb object, execution errors) 11 | """ 12 | os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 13 | 14 | # get temporary file ( and then close to avoid multiple write problems 15 | with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout: 16 | temp_name = fout.name 17 | 18 | # run jupyter nbconvert 19 | args = ["jupyter", "nbconvert", "--to", "notebook", "--execute", 20 | "--ExecutePreprocessor.timeout=60", "--output", fout.name, filename] 21 | subprocess.check_call(args, shell=True) 22 | 23 | # read and parse notebook 24 | with open(temp_name, "r") as fout: 25 | fout.seek(0) 26 | nb = nbformat.read(fout, nbformat.current_nbformat) 27 | 28 | errors = [output for cell in nb.cells if "outputs" in cell 29 | for output in cell["outputs"] 30 | if output.output_type == "error"] 31 | 32 | return nb, errors 33 | 34 | 35 | # Commented out for now pending an update to automatically set the conda environment. 36 | # This will work, but only if all libraries are in your default python environment. As I am using miniconda and the 37 | # example notebook uses numpy from within a separate environment this doesn't work for me. 38 | # def test_notebook(): 39 | # nb, errors = run_notebook('notebooks\example.ipynb') 40 | # assert errors == [] 41 | -------------------------------------------------------------------------------- /hooks/post_gen_project.py: -------------------------------------------------------------------------------- 1 | # import datetime 2 | import os 3 | # import shutil 4 | # from os.path import join 5 | 6 | 7 | def replace_contents(filename: str, what: str, replacement: str) -> None: 8 | """ 9 | Replace instances of a given string in a file 10 | 11 | Args: 12 | filename: The filename to replace within 13 | what: The text that should be matched 14 | replacement: The text that what should be replaced with 15 | """ 16 | with open(filename) as fh: 17 | changelog = fh.read() 18 | with open(filename, 'w') as fh: 19 | fh.write(changelog.replace(what, replacement)) 20 | 21 | 22 | if __name__ == "__main__": 23 | # today = datetime.date.today() 24 | # replace_contents('LICENSE', '', today.strftime("%Y")) 25 | 26 | if '{{ cookiecutter.open_source_license }}' == "Not open source": 27 | os.remove('LICENSE') 28 | # shutil.rmtree('LICENSE') 29 | 30 | # Print out some information on setup and next steps 31 | print(""" 32 | 33 | Data Science Project '{{ cookiecutter.repo_name }}' created using the following 34 | parameters: 35 | 36 | {% for key, value in cookiecutter.items()|sort %} 37 | {{ "{0:26}".format(key + ":") }} {{ "{0!r}".format(value).strip("u") }} 38 | {%- endfor %} 39 | 40 | You are now ready to get started, however you should create a new github 41 | repository for your new project and add your project using the following 42 | commands (substitute REMOTE-REPOSITORY-URL with the remote repository url). 43 | 44 | cd {{ cookiecutter.repo_name }} 45 | git init 46 | git add --all 47 | git commit -m "Initial commit" 48 | git remote add origin REMOTE-REPOSITORY-URL 49 | git push -u origin master 50 | """) 51 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/src/{{cookiecutter.package_name}}/io.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module should contain project specific io functionality. 3 | 4 | Loading and saving of files should be deferred to this class for easy and consistent file handling between different 5 | sources and to have a single location where file references are held. 6 | """ 7 | import os 8 | import pandas as pd 9 | from statoilds import datalake 10 | 11 | 12 | class IO: 13 | local_data_path = '.' 14 | 15 | def __init__(self, local_data_path: str): 16 | """ 17 | Constructor that can set the data path from where we will access local data.. 18 | 19 | Args: 20 | path: Path to the data folder. 21 | """ 22 | self.local_data_path = local_data_path 23 | 24 | def load_cleaned_file(self, download_always: bool = True): 25 | """ 26 | Load the cleaned file, optionally logging into to Azure to download. 27 | 28 | If token is passed then this will only login if token isn't already valid 29 | 30 | Args: 31 | download_always: Whether to always download the file even if it exists locally 32 | 33 | Returns: 34 | A dataframe used for logging in and the login token 35 | """ 36 | local_path = os.path.join(self.local_data_path, self.cleaned_file_local) 37 | 38 | token = datalake.login_and_download_file(self.cleaned_file_remote, 39 | local_path, 40 | download_always=download_always) 41 | 42 | df = pd.read_csv(local_path, 43 | dtype={'Well_name': 'category'}, 44 | parse_dates=['start']) 45 | return df 46 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This file contains build instructions for Travis CI. 2 | 3 | # language and versions (note if using conda we don't use Travis python but add here for consistency 4 | language: python 5 | python: 6 | - "3.6" 7 | 8 | # install dependencies 9 | install: 10 | # if using pip then either install dependencies directly or add on seperate lines in a pip_requirements.txt file 11 | #- pip install pytest pytest-cov 12 | #- pip install coveralls 13 | #- pip install -r pip_requirements.txt 14 | 15 | # if using conda then we setup an environment from an conda_env.yml file 16 | # first install miniconda 17 | - sudo apt-get update 18 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 19 | - bash miniconda.sh -b -p $HOME/miniconda 20 | - export PATH="$HOME/miniconda/bin:$PATH" 21 | - hash -r 22 | - conda config --set always_yes yes --set changeps1 no 23 | - conda update -q conda 24 | - conda info -a # Useful for debugging any issues with conda 25 | # now create the environment and install any extra packages 26 | - conda env create -n test-environment -f conda_env.yml 27 | - source activate test-environment 28 | # pip install and non conda packages 29 | - pip install coveralls 30 | 31 | # setup packages 32 | - python setup.py install 33 | 34 | # command to run tests 35 | script: 36 | # - py.test --cov-report term-missing --cov=maths 37 | # - py.test --doctest-modules --cov=maths3 --cov-report term-missing 38 | - py.test --cov . --cov-report term-missing 39 | branches: 40 | only: 41 | - master 42 | after_success: 43 | - coveralls 44 | #notifications: 45 | # email: 46 | # recipients: 47 | # - xxx@statoil.com 48 | # on_success: always 49 | # on_failure: always -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import find_packages, setup 3 | 4 | 5 | # Utility function to read the README file. 6 | # Used for the long_description. It's nice, because now 1) we have a top level 7 | # README file and 2) it's easier to type in the README file than to put a raw 8 | # string in below ... 9 | def read(file_name): 10 | return open(os.path.join(os.path.dirname(__file__), file_name)).read() 11 | 12 | {%- set license_classifiers = { 13 | 'MIT': 'License :: OSI Approved :: MIT License', 14 | 'LGPL3': 'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)', 15 | 'GPL3': 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)' 16 | } %} 17 | 18 | 19 | install_requires = [ 20 | 'numpy', 21 | 'pandas', 22 | 'pytest' 23 | ] 24 | 25 | setup_requirements = [ 26 | 'pytest-runner', 27 | 'better-setuptools-git-version' 28 | ] 29 | 30 | test_requirements = [ 31 | 'pytest', 32 | 'nbformat' 33 | ] 34 | 35 | setup( 36 | author='{{cookiecutter.author}}', 37 | author_email="Name@equinor.com", 38 | classifiers=[ 39 | "Development Status :: 3 - Alpha", 40 | "Topic :: Utilities", 41 | {%- if cookiecutter.open_source_license in license_classifiers %} 42 | '{{ license_classifiers[cookiecutter.open_source_license] }}', 43 | {%- endif %} 44 | ], 45 | 46 | name="{{cookiecutter.project_name}}", 47 | # version="0.0.1", 48 | version_config={ 49 | "version_format": "{tag}.dev{sha}", 50 | "starting_version": "0.0.1" 51 | }, 52 | description="{{cookiecutter.project_description}}", 53 | long_description=open('README.md').read(), 54 | packages=find_packages('src'), 55 | package_dir={'': 'src'}, 56 | setup_requires=setup_requirements, 57 | test_suite='tests', 58 | tests_require=test_requirements, 59 | install_requires=install_requires 60 | ) 61 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/extras/add_explorer_context_shortcuts.reg: -------------------------------------------------------------------------------- 1 | Windows Registry Editor Version 5.00 2 | ; 3 | ; This adds additional explorer menus. Verify any path's below and add / remove conda environments as needed 4 | ; 5 | 6 | ; 7 | ; Right click on folder commands 8 | ; 9 | 10 | [HKEY_CLASSES_ROOT\Directory\Shell\Data Science] 11 | "MUIVerb"="Data Science" 12 | "SubCommands"="" 13 | "Position"=- 14 | 15 | [HKEY_CLASSES_ROOT\Directory\Shell\Data Science\Shell\Run Jupyter Here] 16 | @="Run Jupyter Here" 17 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico" 18 | 19 | [HKEY_CLASSES_ROOT\Directory\Shell\Data Science\Shell\Run Jupyter Here\command] 20 | @="cmd.exe /K jupyter notebook" 21 | 22 | ; 23 | ; Right click on background commands 24 | ; 25 | 26 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science] 27 | "MUIVerb"="Data Science" 28 | "SubCommands"="" 29 | "Position"=- 30 | 31 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here] 32 | @="Run Jupyter Here" 33 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico" 34 | 35 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here\command] 36 | @="cmd.exe /K jupyter notebook" 37 | 38 | 39 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (deeplearning env)] 40 | @="Run Jupyter Here (deeplearning env)" 41 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico" 42 | 43 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (deeplearning env)\command] 44 | @="cmd.exe /K activate deeplearning & jupyter notebook" 45 | 46 | 47 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (anaconda env)] 48 | @="Run Jupyter Here (anaconda env)" 49 | "Icon"="C:\\appl\\Applications\\Anaconda3\\Menu\\jupyter.ico" 50 | 51 | [HKEY_CLASSES_ROOT\Directory\Background\Shell\Data Science\Shell\Run Jupyter Here (anaconda env)\command] 52 | @="cmd.exe /K activate anaconda & jupyter notebook" 53 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/scripts/deploy/score.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Model scoring (WIP) - Contributions welcome!! 4 | """ 5 | # import argparse 6 | import joblib 7 | import json 8 | import numpy 9 | 10 | from azureml.core.model import Model 11 | 12 | from sklearn import datasets 13 | from sklearn.metrics import accuracy_score 14 | from sklearn.model_selection import train_test_split 15 | 16 | 17 | def init(): 18 | global model 19 | model_path = Model.get_model_path('{{cookiecutter.mlops_name}}') 20 | # deserialize the model file back into a sklearn model 21 | model = joblib.load(model_path) 22 | 23 | 24 | # note you can pass in multiple rows for scoring 25 | def run(raw_data): 26 | try: 27 | data = json.loads(raw_data)['data'] 28 | data = numpy.array(data) 29 | result = model.predict(data) 30 | # you can return any datatype if it is JSON-serializable 31 | return result.tolist() 32 | except Exception as e: 33 | error = str(e) 34 | return error 35 | 36 | 37 | def main(): 38 | # parser = argparse.ArgumentParser() 39 | # environment parameters 40 | # parser.add_argument( 41 | # '--data-folder', 42 | # help="local path to training data", 43 | # required=True 44 | # ) 45 | # parser.add_argument( 46 | # "--output-dir", type=str, default=os.path.join('..', 'outputs'), 47 | # help='location to writeoutput relative to this script' 48 | # ) 49 | 50 | # parse the arguments 51 | # args = parser.parse_args() 52 | 53 | # ws = Workspace.from_config() 54 | # model = Model(ws, 'sklearn_mnist') 55 | 56 | # model.download(target_dir=os.getcwd(), exist_ok=True) 57 | 58 | # verify the downloaded model file 59 | file_path = "ml-service/{{cookiecutter.mlops_name}}.joblib" 60 | model = joblib.load(file_path) 61 | 62 | # loading the iris dataset 63 | iris = datasets.load_iris() 64 | 65 | # X -> features, y -> label 66 | X = iris.data 67 | y = iris.target 68 | 69 | # dividing X, y into train and test data 70 | _, X_test, _, y_test = train_test_split(X, y, random_state=0) 71 | 72 | # training a linear SVM classifier 73 | y_pred = model.predict(X_test) 74 | 75 | # model accuracy 76 | accuracy = accuracy_score(y_test, y_pred) 77 | print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy)) 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/index.rst: -------------------------------------------------------------------------------- 1 | {{cookiecutter.project_name}} 2 | ====================================================================== 3 | 4 | .. note:: 5 | This documentation page is for your own use as you best see fit for your project. 6 | 7 | In some cases it might be enough with the README.md in the project root, however 8 | you might use this if you want to publish API documentation, or have a website 9 | where you want to make detailed project information available (you can e.g. 10 | publish direct from blob storage). 11 | 12 | To generate documentation install make and from the docs folder run: 13 | 14 | .. code-block:: 15 | 16 | make html 17 | 18 | On Windows you can use the .bat file so from the docs folder just run: 19 | 20 | .. code-block:: 21 | 22 | make html 23 | 24 | Usage and setup 25 | --------------- 26 | 27 | Information about this project including steps on how to setup, run examples 28 | to reproduce results, and other guidelines. 29 | 30 | .. note:: 31 | Here you might include information about this project including steps on how to 32 | setup and reproduce results and findings, and other guidelines. As default we 33 | include the Equinor code of conduct, process documentation and any .rst files 34 | under the info folder. Edit / add / remove as needed. The table of contents is 35 | generated automatically based upon the referenced document headings. 36 | 37 | .. toctree:: 38 | :glob: 39 | :maxdepth: 2 40 | 41 | info/* 42 | 43 | Results and findings 44 | -------------------- 45 | 46 | Results and findings generated during the course of this project. 47 | 48 | .. note:: 49 | Here you might include a write up of results or links to notebooks or other 50 | information that contain results or other findings. As default we 51 | include any .rst files under the results folder. Edit / add / remove as needed. 52 | The table of contents is generated automatically based upon the referenced 53 | document headings. 54 | 55 | .. toctree:: 56 | :glob: 57 | :maxdepth: 2 58 | 59 | results/* 60 | 61 | API Documentation 62 | ----------------- 63 | 64 | Information on the underlying API including function, class and method 65 | documentation. 66 | 67 | .. note:: 68 | If you don't want this, then your project probably isn't written according 69 | to best practices and likely not production ready. If you disagree, just 70 | edit and remove this section. 71 | 72 | .. toctree:: 73 | :maxdepth: 2 74 | 75 | api-{{cookiecutter.package_name}} 76 | 77 | Indices and tables 78 | ------------------ 79 | 80 | * :ref:`genindex` 81 | * :ref:`modindex` 82 | * :ref:`search` 83 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/scripts/train/submit-train-local.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Train a model locally using Azure ML. 4 | 5 | This will re-use the current python environment. 6 | 7 | Argv: 8 | output-dir: A folder to store any output to 9 | kernel: Kernel type to be used in the algorithm 10 | penalty: Penalty parameter of the error term 11 | """ 12 | import argparse 13 | import sys 14 | 15 | import azureml.core 16 | from azureml.core import Experiment, ScriptRunConfig, Workspace 17 | 18 | 19 | def submit(experiment_name: str, 20 | kernal: str, 21 | penalty: float): 22 | 23 | print("This notebook was created using version 1.0.83 of the Azure ML SDK") 24 | print("You are using version", azureml.core.VERSION, "of the SDK") 25 | 26 | # Get a reference to the workspace. Be sure to download the config.json 27 | # from your workspace and place in the parent folder. 28 | ws = Workspace.from_config() 29 | print('Loaded workspace', ws.name) 30 | 31 | # Reference the experiment 32 | experiment = Experiment(workspace=ws, name=experiment_name) 33 | print('Logging to experiment', experiment_name) 34 | 35 | # Create the RunConfiguration that will be used 36 | arguments = [ 37 | '--output-dir', "outputs", 38 | '--kernel', kernal, 39 | '--penalty', penalty, 40 | ] 41 | script_run_config = ScriptRunConfig(source_directory='.', 42 | script='train.py', 43 | arguments=arguments) 44 | 45 | # As we will run locally we can use our existing python environment 46 | script_run_config.run_config.environment. \ 47 | python.user_managed_dependencies = True 48 | 49 | # Submit the experiment to get a run and wait for completion 50 | run = experiment.submit(script_run_config) 51 | print('Submitted please wait...') 52 | run.wait_for_completion(show_output=True) 53 | 54 | # register the trained model 55 | model = run.register_model( 56 | model_name='{{cookiecutter.mlops_name}}', 57 | model_path='outputs/model/{{cookiecutter.mlops_name}}.joblib') 58 | 59 | print('Run number:', run.number) 60 | print('Run id:', run.id) 61 | print("Run details are available at:", run.get_portal_url()) 62 | print("Model: {} v{}".format(model.name, model.version)) 63 | 64 | if 'azureml.git.dirty' in run.properties: 65 | if run.properties['azureml.git.dirty']: 66 | print("WARNNG: You have uncomitted changes. To ensure " 67 | "reproducability check in your code before you train.") 68 | else: 69 | print('WARNNG: To ensure reproducability you should be using git!') 70 | 71 | 72 | def main(arguments: list): 73 | parser = argparse.ArgumentParser( 74 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 75 | 76 | # environment parameters 77 | parser.add_argument('--experiment', type=str, 78 | default='{{cookiecutter.mlops_name}}-local', 79 | help='The name of the Azure ML Experiment') 80 | 81 | # training specific parameters 82 | parser.add_argument('--kernel', type=str, default='linear', 83 | help='Kernel type to be used in the algorithm') 84 | parser.add_argument('--penalty', type=float, default=1.0, 85 | help='Penalty parameter of the error term') 86 | 87 | # parse the arguments 88 | args = parser.parse_args(arguments) 89 | 90 | # submit the job 91 | submit(args.experiment, 92 | args.kernel, 93 | args.penalty) 94 | 95 | 96 | if __name__ == '__main__': 97 | sys.exit(main(sys.argv[1:])) 98 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/scripts/train/submit-train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Train a model remotely using Azure ML compute. 4 | 5 | This will re-use the current python environment. 6 | 7 | Argv: 8 | output-dir: A folder to store any output to 9 | kernel: Kernel type to be used in the algorithm 10 | penalty: Penalty parameter of the error term 11 | """ 12 | import argparse 13 | import sys 14 | 15 | import azureml.core 16 | from azureml.core import Experiment, Workspace 17 | from azureml.train.sklearn import SKLearn 18 | 19 | 20 | def submit(experiment_name: str, 21 | compute_name: str, 22 | kernal: str, 23 | penalty: float): 24 | 25 | print("This notebook was created using version 1.0.83 of the Azure ML SDK") 26 | print("You are using version", azureml.core.VERSION, "of the SDK") 27 | 28 | # Get a reference to the workspace. Be sure to download the config.json 29 | # from your workspace and place in the parent folder. 30 | ws = Workspace.from_config() 31 | print('Loaded workspace', ws.name) 32 | 33 | # Reference the experiment 34 | experiment = Experiment(workspace=ws, name=experiment_name) 35 | print('Logging to experiment', experiment_name) 36 | 37 | # Get a reference to an existing the compute target. 38 | compute_target = ws.compute_targets[compute_name] 39 | 40 | # Setup an Estimator for submitting the job. An Estimator further wraps 41 | # RunConfig with additional configuration for specific cases. There are 42 | # Estimators provided for many common runtimes such as PyTorch and 43 | # Tensorflow. In this case we use the SKLearn specific estimator. 44 | script_params = { 45 | '--output-dir': "outputs", 46 | '--kernel': kernal, 47 | '--penalty': penalty, 48 | } 49 | 50 | # NOTE: scikit-learn added below until default image includes v22.1+ 51 | estimator = SKLearn(source_directory=".", 52 | entry_script='train.py', 53 | script_params=script_params, 54 | compute_target=compute_target, 55 | pip_packages=['matplotlib', 'scikit-learn']) 56 | 57 | # Submit the experiment to get a run and wait for completion 58 | run = experiment.submit(estimator) 59 | print('Submitted please wait...') 60 | run.wait_for_completion(show_output=True) 61 | 62 | # register the trained model 63 | model = run.register_model( 64 | model_name='{{cookiecutter.mlops_name}}', 65 | model_path='outputs/model/{{cookiecutter.mlops_name}}.joblib') 66 | 67 | print('Run number:', run.number) 68 | print('Run id:', run.id) 69 | print("Run details are available at:", run.get_portal_url()) 70 | print("Model: {} v{}".format(model.name, model.version)) 71 | 72 | if 'azureml.git.dirty' in run.properties: 73 | if run.properties['azureml.git.dirty']: 74 | print("WARNNG: You have uncomitted changes. To ensure " 75 | "reproducability check in your code before you train.") 76 | else: 77 | print('WARNNG: To ensure reproducability you should be using git!') 78 | 79 | 80 | def main(arguments: list): 81 | parser = argparse.ArgumentParser( 82 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 83 | 84 | # environment parameters 85 | parser.add_argument('--experiment', type=str, 86 | default='{{cookiecutter.mlops_name}}', 87 | help='The name of the Azure ML Experiment') 88 | parser.add_argument('--compute-name', type=str, 89 | default='{{cookiecutter.mlops_compute_name}}', 90 | help='The name of the Azure ML compute cluster') 91 | 92 | # training specific parameters 93 | parser.add_argument('--kernel', type=str, default='linear', 94 | help='Kernel type to be used in the algorithm') 95 | parser.add_argument('--penalty', type=float, default=1.0, 96 | help='Penalty parameter of the error term') 97 | 98 | # parse the arguments 99 | args = parser.parse_args(arguments) 100 | 101 | # submit the job 102 | submit(args.experiment, 103 | args.compute_name, 104 | args.kernel, 105 | args.penalty) 106 | 107 | 108 | if __name__ == '__main__': 109 | sys.exit(main(sys.argv[1:])) 110 | -------------------------------------------------------------------------------- /tests/test_create.py: -------------------------------------------------------------------------------- 1 | # Some original code Copyright (c) Audrey Roy Greenfeld and individual contributors - 2 | # see https://github.com/audreyr/cookiecutter-pypackage/blob/master/LICENSE 3 | 4 | from contextlib import contextmanager 5 | import shlex 6 | import os 7 | import subprocess 8 | import datetime 9 | from cookiecutter.utils import rmtree 10 | 11 | 12 | @contextmanager 13 | def inside_dir(dir_path): 14 | """ 15 | Execute code from inside the given directory 16 | :param dir_path: String, path of the directory the command is being run. 17 | """ 18 | old_path = os.getcwd() 19 | try: 20 | os.chdir(dir_path) 21 | yield 22 | finally: 23 | os.chdir(old_path) 24 | 25 | 26 | @contextmanager 27 | def bake_in_temp_dir(cookies, *args, **kwargs): 28 | """ 29 | Delete the temporal directory that is created when executing the tests 30 | :param cookies: pytest_cookies.Cookies, 31 | cookie to be baked and its temporal files will be removed 32 | """ 33 | result = cookies.bake(*args, **kwargs) 34 | try: 35 | yield result 36 | finally: 37 | rmtree(str(result.project)) 38 | 39 | 40 | def run_inside_dir(command, dir_path): 41 | """ 42 | Run a command from inside a given directory, returning the exit status 43 | :param command: Command that will be executed 44 | :param dir_path: String, path of the directory the command is being run. 45 | """ 46 | with inside_dir(dir_path): 47 | return subprocess.check_call(shlex.split(command)) 48 | 49 | 50 | def check_output_inside_dir(command, dir_path): 51 | """Run a command from inside a given directory, returning the command output""" 52 | with inside_dir(dir_path): 53 | return subprocess.check_output(shlex.split(command)) 54 | 55 | 56 | def test_bake_with_defaults(cookies): 57 | with bake_in_temp_dir(cookies) as result: 58 | assert result.project.isdir() 59 | assert result.exit_code == 0 60 | assert result.exception is None 61 | 62 | found_top_level_files = [f.basename for f in result.project.listdir()] 63 | assert '.gitignore' in found_top_level_files 64 | assert 'conda_env.yml' in found_top_level_files 65 | assert 'README.md' in found_top_level_files 66 | assert 'setup.py' in found_top_level_files 67 | 68 | assert os.path.isdir(os.path.join(result.project, 'src', 'project_name')) 69 | assert os.path.isdir(os.path.join(result.project, 'tests', 'project_name')) 70 | 71 | 72 | def test_bake_and_run_tests(cookies): 73 | with bake_in_temp_dir(cookies) as result: 74 | assert result.project.isdir() 75 | assert run_inside_dir('python setup.py pytest', str(result.project)) == 0 76 | print("test_bake_and_run_tests path", str(result.project)) 77 | 78 | 79 | def test_bake_selecting_license(cookies): 80 | license_strings = { 81 | 'MIT': ('MIT ', 'MIT License', True), 82 | 'LGPL3': ('GNU LESSER GENERAL PUBLIC LICENSE', 'GNU Lesser General Public License v3 (LGPLv3)', False), 83 | 'GPL3': ('GNU GENERAL PUBLIC LICENSE', 'GNU General Public License v3 (GPLv3)', False) 84 | } 85 | for project_license, (license_subtext, setup_subtext, should_contain_year) in license_strings.items(): 86 | with bake_in_temp_dir(cookies, extra_context={'open_source_license': project_license}) as result: 87 | license_file_path = result.project.join('LICENSE') 88 | print(license_file_path) 89 | assert license_subtext in license_file_path.read() 90 | if should_contain_year: 91 | now = datetime.datetime.now() 92 | assert str(now.year) in license_file_path.read() 93 | assert setup_subtext in result.project.join('setup.py').read() 94 | 95 | 96 | def test_bake_not_open_source(cookies): 97 | with bake_in_temp_dir(cookies, extra_context={'open_source_license': 'Not open source'}) as result: 98 | found_top_level_files = [f.basename for f in result.project.listdir()] 99 | assert 'setup.py' in found_top_level_files 100 | assert 'LICENSE' not in found_top_level_files 101 | 102 | 103 | def test_bake_package_name(cookies): 104 | with bake_in_temp_dir(cookies, extra_context={'package_name': 'my_package'}) as result: 105 | with inside_dir(result.project): 106 | assert os.path.isdir(os.path.join('src', 'my_package')) 107 | assert os.path.isdir(os.path.join('tests', 'my_package')) 108 | 109 | 'from my_package import examplemodule' in \ 110 | open(os.path.join('tests', 'my_package', 'examplemodule', 'test_add_value_to_numpy.py')).read() 111 | 'from my_package import examplemodule' in \ 112 | open(os.path.join('tests', 'my_package', 'examplemodule', 'test_hello_world.py')).read() 113 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/scripts/train/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Train a model. 4 | 5 | Argv: 6 | output-dir: A folder to store any output to 7 | kernel: Kernel type to be used in the algorithm 8 | penalty: Penalty parameter of the error term 9 | """ 10 | import argparse 11 | import joblib 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import os 15 | import sys 16 | 17 | from sklearn import datasets 18 | from sklearn.metrics import classification_report 19 | from sklearn.metrics import plot_confusion_matrix 20 | from sklearn.model_selection import train_test_split 21 | from sklearn.model_selection import cross_val_score 22 | from sklearn.model_selection import StratifiedKFold 23 | from sklearn.svm import SVC 24 | 25 | from amlrun import get_AMLRun 26 | 27 | 28 | def train(output_dir='outputs', kernel='linear', penalty=1.0): 29 | # make sure output directory exist 30 | os.makedirs(output_dir, exist_ok=True) 31 | 32 | # Safely get the Azure ML run 33 | run = get_AMLRun() 34 | 35 | # loading the iris dataset 36 | iris = datasets.load_iris() 37 | 38 | # X -> features, y -> label 39 | X = iris.data 40 | y = iris.target 41 | class_names = iris.target_names 42 | 43 | # dividing X, y into train and test data. Random seed for reproducability 44 | X_train, X_test, y_train, y_test = \ 45 | train_test_split(X, y, test_size=0.20, random_state=0) 46 | 47 | # create our model - a linear SVM classifier 48 | svm_model_linear = SVC(kernel=kernel, C=penalty) 49 | 50 | # evaluate each model in turn 51 | kfold = StratifiedKFold(n_splits=10, random_state=1) 52 | cv_results = cross_val_score(svm_model_linear, X_train, y_train, 53 | cv=kfold, scoring='accuracy') 54 | 55 | print('Cross Validation Mean: ', cv_results.mean()) 56 | print('Cross Validation Std: ', cv_results.std()) 57 | if run is not None: 58 | run.log_list('Cross Validation Accuracies', cv_results) 59 | run.log('Cross Validation Mean', cv_results.mean()) 60 | run.log('Cross Validation Std', cv_results.std()) 61 | 62 | # now training on the full dataset 63 | svm_model_linear.fit(X_train, y_train) 64 | y_pred = svm_model_linear.predict(X_test) 65 | 66 | # model accuracy for X_test 67 | accuracy = svm_model_linear.score(X_test, y_test) 68 | print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy)) 69 | if run is not None: 70 | run.log('Accuracy', np.float(accuracy)) 71 | 72 | # Plot non-normalized confusion matrix 73 | title = 'Test confusion matrix' 74 | disp = plot_confusion_matrix(svm_model_linear, X_test, y_test, 75 | display_labels=class_names, 76 | cmap=plt.cm.Blues) 77 | disp.ax_.set_title(title) 78 | print(title) 79 | print(disp.confusion_matrix) 80 | 81 | if run is not None: 82 | run.log_image(title, plot=plt) 83 | else: 84 | plt.savefig(os.path.join(output_dir, 'confusion_matrix.png')) 85 | 86 | # Plot normalized confusion matrix 87 | title = 'Normalized test confusion matrix' 88 | disp = plot_confusion_matrix(svm_model_linear, X_test, y_test, 89 | display_labels=class_names, 90 | cmap=plt.cm.Blues, 91 | normalize='true') 92 | disp.ax_.set_title(title) 93 | print(title) 94 | print(disp.confusion_matrix) 95 | 96 | if run is not None: 97 | run.log_image(title, plot=plt) 98 | else: 99 | plt.savefig( 100 | os.path.join(output_dir, 'confusion_matrix_normalised.png')) 101 | 102 | # Print classification report 103 | print(classification_report(y_test, y_pred)) 104 | 105 | # files saved in the "outputs" folder are automatically uploaded into 106 | # Azure ML Service run history 107 | model_folder = os.path.join(output_dir, 'model') 108 | model_path = os.path.join(model_folder, '{{cookiecutter.mlops_name}}.joblib') 109 | os.makedirs(model_folder, exist_ok=True) 110 | joblib.dump(svm_model_linear, model_path) 111 | print('Output saved to', output_dir) 112 | 113 | 114 | def main(arguments): 115 | parser = argparse.ArgumentParser( 116 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 117 | # environment parameters 118 | # parser.add_argument( 119 | # '--data-folder', 120 | # help="local path to training data", 121 | # required=True 122 | # ) 123 | 124 | parser.add_argument( 125 | "--output-dir", type=str, 126 | default=os.path.join('..', '..', 'data', 'training', 'outputs'), 127 | help='location to write output' 128 | ) 129 | 130 | # training specific parameters 131 | parser.add_argument('--kernel', type=str, default='linear', 132 | help='Kernel type to be used in the algorithm') 133 | parser.add_argument('--penalty', type=float, default=1.0, 134 | help='Penalty parameter of the error term') 135 | 136 | # parse the arguments 137 | args = parser.parse_args(arguments) 138 | 139 | # setup output directory 140 | # model_output_dir = os.path.join( 141 | # os.path.dirname(os.path.realpath(__file__)), 142 | # args.output_dir) 143 | # os.makedirs(args.output-dir, exist_ok=True) 144 | 145 | train(args.output_dir, args.kernel, args.penalty) 146 | 147 | 148 | if __name__ == '__main__': 149 | sys.exit(main(sys.argv[1:])) 150 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/writeup/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath(os.path.join('..', '..', 'src'))) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = '{{cookiecutter.project_name}}' 21 | copyright = 'Equinor' 22 | author = '{{cookiecutter.project_name}}' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = 'version' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autosummary', 35 | 'sphinx.ext.napoleon', 36 | 'm2r' 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # List of patterns, relative to source directory, that match files and 43 | # directories to ignore when looking for source files. 44 | # This pattern also affects html_static_path and html_extra_path. 45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 46 | 47 | # Extensions that recomonmark will use 48 | source_suffix = { 49 | '.rst': 'restructuredtext', 50 | '.txt': 'markdown', 51 | '.md': 'markdown', 52 | } 53 | 54 | # Auto generate API documentation for modules 55 | autosummary_generate = True 56 | 57 | # Default flags used by autodoc directives 58 | autodoc_default_options = { 59 | 'members': True, 60 | 'member-order': 'bysource', 61 | 'special-members': '__init__', 62 | } 63 | 64 | # -- Options for HTML output ------------------------------------------------- 65 | 66 | # The theme to use for HTML and HTML Help pages. See the documentation for 67 | # a list of builtin themes. 68 | # 69 | # html_theme = 'sphinx_rtd_theme' 70 | html_theme = 'alabaster' 71 | 72 | # Theme options are theme-specific and customize the look and feel of a theme 73 | # further. For a list of options available for each theme, see the 74 | # documentation. 75 | # alabaster theme options: https://alabaster.readthedocs.io/en/latest/customization.html 76 | html_theme_options = { 77 | "description": "{{cookiecutter.project_description}}", 78 | "extra_nav_links": { 79 | "Index": "genindex.html", 80 | "Module Index": "py-modindex.html", 81 | "Search Page": "search.html" 82 | }, 83 | "github_banner": False, 84 | "note_bg": "#FFF59C", 85 | "show_powered_by": False, 86 | "show_related": False, 87 | "sidebar_collapse": False, 88 | } 89 | 90 | # Custom sidebar templates (often theme specific), maps document names to template names. 91 | # alabaster options: https://alabaster.readthedocs.io/en/latest/customization.html 92 | html_sidebars = { 93 | "index": [ 94 | "about.html", 95 | "navigation.html", 96 | "searchbox.html" 97 | ], 98 | "**": [ 99 | "about.html", 100 | 'navigation.html', 101 | "searchbox.html" 102 | ], 103 | } 104 | 105 | # Add any paths that contain custom themes here, relative to this directory. 106 | # html_theme_path = [] 107 | 108 | # The name for this set of Sphinx documents. If None, it defaults to 109 | # " v documentation". 110 | # html_title = None 111 | 112 | # A shorter title for the navigation bar. Default is the same as html_title. 113 | # html_short_title = None 114 | 115 | # The name of an image file (relative to this directory) to place at the top 116 | # of the sidebar. 117 | # html_logo = None 118 | 119 | # The name of an image file (within the static path) to use as favicon of the 120 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 121 | # pixels large. 122 | # html_favicon = None 123 | 124 | # Add any paths that contain custom static files (such as style sheets) here, 125 | # relative to this directory. They are copied after the builtin static files, 126 | # so a file named "default.css" will overwrite the builtin "default.css". 127 | html_static_path = [ 128 | # "_static" 129 | ] 130 | 131 | # Add any extra paths that contain custom files (such as robots.txt or 132 | # .htaccess) here, relative to this directory. These files are copied 133 | # directly to the root of the documentation. 134 | # html_extra_path = [] 135 | 136 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 137 | # using the given strftime format. 138 | # html_last_updated_fmt = '%b %d, %Y' 139 | 140 | # If true, SmartyPants will be used to convert quotes and dashes to 141 | # typographically correct entities. 142 | html_use_smartypants = False 143 | 144 | # Additional templates that should be rendered to pages, maps page names to 145 | # template names. 146 | # html_additional_pages = {} 147 | 148 | # If false, no module index is generated. 149 | # html_domain_indices = True 150 | 151 | # If false, no index is generated. 152 | # html_use_index = True 153 | 154 | # If true, the index is split into individual pages for each letter. 155 | # html_split_index = False 156 | 157 | # If true, links to the reST sources are added to the pages. 158 | html_show_sourcelink = False 159 | 160 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 161 | html_show_sphinx = False 162 | 163 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 164 | html_show_copyright = True 165 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/notebooks/example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example Notebook\n", 8 | "\n", 9 | "This is an example notebook.\n", 10 | "\n", 11 | "Modify / remove any of the below as suited for your needs" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# Standard python packages\n", 28 | "import os\n", 29 | "import sys\n", 30 | "\n", 31 | "# Other package imports\n", 32 | "# import numpy as np\n", 33 | "# import pandas as pd\n", 34 | "# from matplotlib import pyplot as plt" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "Setup some global settings and configuration" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))\n", 51 | "data_folder = os.path.join(project_root, 'data')\n", 52 | "data_folder_raw = os.path.join(data_folder, 'raw')\n", 53 | "src_folder = os.path.join(project_root, 'src')" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "This notebook uses the shared package however first we need to ensure it is available (otherwise you get an error about the module not being found). You can either run setup.py as discussed in the readme to install the package or modify the path to include the src folder." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "# Explicitly set path so don't need to run setup.py - if we have multiple copies of \n", 70 | "# the code we would otherwise need to setup a seperate environment for each to\n", 71 | "# ensure the code pointers are correct.\n", 72 | "sys.path.insert(0, src_folder)\n", 73 | "\n", 74 | "from {{cookiecutter.package_name}} import examplemodule" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Some Processing" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 4, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "'Hello World'" 93 | ] 94 | }, 95 | "execution_count": 4, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "# Use our package\n", 102 | "examplemodule.hello_world()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Appendix 1 - Environment Configuration" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 5, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "D:\\Development\\DataScience\\Projects\\DataScienceTemplate\\notebooks\\eda\n", 122 | "3.6.4 |Anaconda custom (64-bit)| (default, Mar 12 2018, 20:20:50) [MSC v.1900 64 bit (AMD64)]\n", 123 | "C:\\Applications\\Miniconda3\\envs\\anaconda\\python.exe\n", 124 | "['D:\\\\Development\\\\DataScience\\\\Projects\\\\DataScienceTemplate\\\\src', '', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\python36.zip', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\DLLs', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages', 'd:\\\\development\\\\datascience\\\\projects\\\\data-science-shared\\\\python', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\xgboost-0.7-py3.6.egg', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\win32', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\win32\\\\lib', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\Pythonwin', 'C:\\\\Applications\\\\Miniconda3\\\\envs\\\\anaconda\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\mark_\\\\.ipython']\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "print (os.getcwd())\n", 130 | "print (sys.version)\n", 131 | "print (sys.executable)\n", 132 | "print (sys.path)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "## Appendix 2 - Automated Tests" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 6, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "============================= test session starts =============================\n", 152 | "platform win32 -- Python 3.6.4, pytest-3.4.2, py-1.5.2, pluggy-0.6.0\n", 153 | "rootdir: D:\\Development\\DataScience\\Projects\\DataScienceTemplate, inifile:\n", 154 | "plugins: remotedata-0.2.0, openfiles-0.2.0, doctestplus-0.1.2, cov-2.5.1, arraydiff-0.2\n", 155 | "collected 5 items\n", 156 | "\n", 157 | "tests\\examplepackage\\examplemodule\\test_add_value_to_numpy.py ... [ 60%]\n", 158 | "tests\\examplepackage\\examplemodule\\test_hello_world.py .. [100%]\n", 159 | "\n", 160 | "========================== 5 passed in 0.37 seconds ===========================\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "# Run tests within notebook\n", 166 | "f_path = os.getcwd()\n", 167 | "os.chdir(os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir)))\n", 168 | "\n", 169 | "# Run pytest from the repository root\n", 170 | "!pytest\n", 171 | "\n", 172 | "os.chdir(f_path)" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 3", 179 | "language": "python", 180 | "name": "python3" 181 | }, 182 | "language_info": { 183 | "codemirror_mode": { 184 | "name": "ipython", 185 | "version": 3 186 | }, 187 | "file_extension": ".py", 188 | "mimetype": "text/x-python", 189 | "name": "python", 190 | "nbconvert_exporter": "python", 191 | "pygments_lexer": "ipython3", 192 | "version": "3.6.4" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/data_science_code_of_conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Authorship and validity 4 | This code of conduct is derived from the “Data Science Code of Professional Conduct” written by the Data Science Association. The full version is available at [http://www.datascienceassn.org/code-of-conduct.html](http://www.datascienceassn.org/code-of-conduct.html). For definitions of the terminology in this code of conduct please consult the full version. 5 | 6 | This code of conduct attempts to distill the fuller code to a more manageable level, a code which is easier to communicate whilst (hopefully) still retaining the spirit of the Data Association’s code of conduct. We have done this through highlighting what we believe to be the most important points for data science in Equinor. Points conspicuous in their absence should not be assumed to be invalid. The full code of conduct is embraced in its entirety. 7 | 8 | The aim of this code of conduct is to help ensure the quality of Data Science carried out at Equinor is at least in accordance with the guidelines set out below. This includes the work of internal Data Scientists, other internal resources and external partners and vendors. Indeed, all data science which is delivered to Equinor. 9 | 10 | Key tenets are competence, honesty, scientific integrity and repeatability. 11 | 12 | The rules detailed here are intended to augment relevant laws and regulations which are valid in the local where the data science work is being delivered (e.g. the European Union’s General Data Protection Regulation). In the event of any perceived conflict between this code of conduct and local rules and regulations it is the local rules and regulations which take precedence. 13 | 14 | ## Success criteria and validation 15 | 16 | * **Rule 0:** The data scientist shall, in collaboration with the client, develop success criteria for the purposes of establishing metrics by which the quality of the delivery can be assessed. Success criteria shall: 17 | * **a)** As much as possible, be based on objective, industry accepted quantitative metrics such as precision and recall. 18 | * **b)** Be agreed as early as possible in the data science delivery 19 | * **c)** Remain unchanged throughout the delivery, unless it is agreed with the client that the success criteria are not appropriate 20 | * **d)** Not be repeatedly adjusted in light of unfavourable evidence. When re-establishing success criteria (as of rule 0c), great care must be taken to ensure that this will change will support the quality of the delivery 21 | * **e)** Where quantitative metrics cannot be agreed due to the nature of the problem, e.g. language models, anomaly detection with low rate of known anomalies, an honest assessment of the quality of the delivery is still required 22 | * **f)** Success criteria should focus on generalisation of models and workflows. 23 | 24 | ## Client Communication 25 | 26 | * **Rule 1:** A data scientist shall consult with the client about any real, perceived and potentially hidden risks in relying on data science results. 27 | 28 | * **Rule 2:** A data scientist shall explain data science results to the extent reasonably necessary to permit the client to make informed decisions regarding the data science. 29 | Confidential Information 30 | 31 | * **Rule 3:** Confidential information is information that the data scientist creates, develops, receives, uses or learns during employment as a data scientist for a client, either working directly in-house as an employee of an organization or as an independent professional. It includes information that is not generally known by the public about the client, including client affiliates, employees, customers or other parties with whom the client has a relationship and who have an expectation of confidentiality. The data scientist has a professional duty to protect all confidential information, regardless of its form or format, from the time of its creation or receipt until its authorised disposal. 32 | 33 | * **Rule 4:** A data scientist shall make reasonable efforts to prevent the inadvertent or unauthorized disclosure of, or unauthorized access to, information relating to the representation of a client, which means: 34 | * **a)** Not displaying, reviewing or discussing confidential information in public places, in the presence of third parties or that may be overheard; 35 | * **b)** Not e-mailing confidential information outside of the organization or professional practice to a personal e-mail account or otherwise removing confidential information from the client by removing hard copies or copying it to any form of recordable digital media device; and 36 | * **c)** Communicating confidential information only to client employees and authorized agents (such as legal professionals or external auditors) who have a legitimate business reason to know the information. 37 | 38 | * **Rule 5:** A data scientist shall comply with client policies that apply to the acceptance, proper use and handling of confidential information, as well as any written agreements between the data scientist and the client relating to confidential information. 39 | 40 | * **Rule 6:** A data scientist shall protect client confidential information after termination of work for the client. 41 | * **Rule 7:** A data scientist shall return any and all confidential information in possession or control upon termination of the data scientist - client relationship and, if requested, execute an affidavit affirming compliance with obligations relating to confidential information. 42 | 43 | ## Data Science Evidence, Quality of Data and Quality of Evidence 44 | 45 | * **Rule 8:** A data scientist shall inform the client of all data science results and material facts known to the data scientist that will enable the client to make informed decisions, whether or not the data science evidence is adverse. 46 | 47 | * **Rule 9:** The data scientist understands that bad or uncertain data quality may compromise data science professional practice and may communicate a false reality or promote an illusion of understanding. The data scientist shall take reasonable measures to protect the client from relying and making decisions based on bad or uncertain data quality. 48 | 49 | * **Rule 10:** The data scientist understands that evidence may be weak or strong or uncertain and shall take reasonable measures to protect the client from relying and making decisions based on weak or uncertain evidence. 50 | 51 | * **Rule 11:** A data scientist shall not knowingly: 52 | * **a)** fail to use scientific methods in performing data science; 53 | * **b)** fail to convey the quality of evidence in a reasonable and understandable manner for the client; 54 | * **c)** claim weak or uncertain evidence is strong evidence; 55 | * **d)** misuse weak or uncertain evidence to communicate a false reality or promote an illusion of understanding; 56 | * **e)** fail to convey the quality of data in a reasonable and understandable manner for the client; 57 | * **f)** claim bad or uncertain data quality is good data quality; 58 | * **g)** misuse bad or uncertain data quality to communicate a false reality or promote an illusion of understanding; 59 | * **h)** engage in cherry-picking (pointing to individual cases or data that seem to confirm a particular position, while ignoring a significant portion of related cases or data that may contradict that position of data or data science evidence); 60 | * **i)** fail to attempt to replicate data science results; 61 | * **j)** fail to disclose that data science results could not be replicated; 62 | * **k)** misuse data science results to communicate a false reality or promote an illusion of understanding; 63 | * **l)** fail to disclose failed experiments or disconfirming evidence known to the data scientist to be directly adverse to the position of the client; 64 | * **m)** offer evidence that the data scientist knows to be false. If a data scientist questions the quality of data or evidence the data scientist must disclose this to the client. If a data scientist has offered material evidence and the data scientist comes to know of its falsity, the data scientist shall take reasonable remedial measures, including disclosure to the client. A data scientist may disclose and label evidence the data scientist reasonably believes is false. 65 | 66 | * **Rule 12:** A data scientist shall use reasonable diligence when assigning value and meaning to the following concepts when conducting data science: 67 | * **a)** "Statistically Significant" 68 | * **b)** "Correlation" 69 | * **c)** "Spurious Correlation" 70 | * **d)** "Causation" 71 | 72 | * **Rule 13:** A data scientist shall not present incomplete evidence as real data science evidence. A data scientist may present a theory constituting incomplete evidence but shall label and clearly communicate the use of incomplete evidence. 73 | 74 | * **Rule 14:** A data scientist shall use the data science method which consists of the following steps: 75 | * **a)** Careful observations of data, data sets and relationships between data; 76 | * **b)** Deduction of meaning from the data and different data relationships; 77 | * **c)** Formation of hypotheses; 78 | * **d)** Experimental or observational testing of the validity of the hypotheses. To be termed scientific, a method of inquiry must be based on empirical and measurable evidence subject to specific principles of reasoning. 79 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/.gitignore: -------------------------------------------------------------------------------- 1 | ## The .gitignore file specifies things that git should ignore. 2 | ## This default template includes entries for R, Python and visual studio 3 | 4 | ## 5 | ## Add custom entries below here. 6 | ## 7 | scripts/config.json 8 | 9 | ## 10 | ## R Section - See https://github.com/github/gitignore/blob/master/R.gitignore 11 | ## 12 | 13 | # History files 14 | .Rhistory 15 | .Rapp.history 16 | 17 | # Session Data files 18 | .RData 19 | 20 | # Example code in package build process 21 | *-Ex.R 22 | 23 | # Output files from R CMD build 24 | /*.tar.gz 25 | 26 | # Output files from R CMD check 27 | /*.Rcheck/ 28 | 29 | # RStudio files 30 | .Rproj.user/ 31 | 32 | # produced vignettes 33 | vignettes/*.html 34 | vignettes/*.pdf 35 | 36 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 37 | .httr-oauth 38 | 39 | # knitr and R markdown default cache directories 40 | /*_cache/ 41 | /cache/ 42 | 43 | # Temporary files created by R markdown 44 | *.utf8.md 45 | *.knit.md 46 | 47 | ## 48 | ## Python Section - See https://github.com/github/gitignore/blob/master/Python.gitignore 49 | ## 50 | 51 | # PyCharm ide files 52 | .idea 53 | 54 | # Byte-compiled / optimized / DLL files 55 | __pycache__/ 56 | *.py[cod] 57 | *$py.class 58 | 59 | # C extensions 60 | *.so 61 | 62 | # Distribution / packaging 63 | .Python 64 | env/ 65 | build/ 66 | develop-eggs/ 67 | dist/ 68 | downloads/ 69 | eggs/ 70 | .eggs/ 71 | lib/ 72 | lib64/ 73 | parts/ 74 | sdist/ 75 | var/ 76 | wheels/ 77 | *.egg-info/ 78 | .installed.cfg 79 | *.egg 80 | 81 | # PyInstaller 82 | # Usually these files are written by a python script from a template 83 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 84 | *.manifest 85 | *.spec 86 | 87 | # Installer logs 88 | pip-log.txt 89 | pip-delete-this-directory.txt 90 | 91 | # Unit test / coverage reports 92 | htmlcov/ 93 | .tox/ 94 | .coverage 95 | .coverage.* 96 | .cache 97 | nosetests.xml 98 | coverage.xml 99 | *.cover 100 | .hypothesis/ 101 | 102 | # Translations 103 | *.mo 104 | *.pot 105 | 106 | # Django stuff: 107 | *.log 108 | local_settings.py 109 | 110 | # Flask stuff: 111 | instance/ 112 | .webassets-cache 113 | 114 | # Scrapy stuff: 115 | .scrapy 116 | 117 | # Sphinx documentation 118 | docs/_build/ 119 | 120 | # PyBuilder 121 | target/ 122 | 123 | # Jupyter Notebook 124 | .ipynb_checkpoints 125 | 126 | # pyenv 127 | .python-version 128 | 129 | # celery beat schedule file 130 | celerybeat-schedule 131 | 132 | # SageMath parsed files 133 | *.sage.py 134 | 135 | # dotenv 136 | .env 137 | 138 | # virtualenv 139 | .venv 140 | venv/ 141 | ENV/ 142 | 143 | # Spyder project settings 144 | .spyderproject 145 | .spyproject 146 | 147 | # Rope project settings 148 | .ropeproject 149 | 150 | # mkdocs documentation 151 | /site 152 | 153 | # mypy 154 | .mypy_cache/ 155 | 156 | ## Ignore Visual Studio temporary files, build results, and 157 | ## files generated by popular Visual Studio add-ons. 158 | ## 159 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 160 | 161 | # User-specific files 162 | *.suo 163 | *.user 164 | *.userosscache 165 | *.sln.docstates 166 | 167 | # User-specific files (MonoDevelop/Xamarin Studio) 168 | *.userprefs 169 | 170 | # Build results 171 | [Dd]ebug/ 172 | [Dd]ebugPublic/ 173 | [Rr]elease/ 174 | [Rr]eleases/ 175 | x64/ 176 | x86/ 177 | bld/ 178 | [Bb]in/ 179 | [Oo]bj/ 180 | [Ll]og/ 181 | 182 | # Visual Studio 2015 cache/options directory 183 | .vs/ 184 | # Uncomment if you have tasks that create the project's static files in wwwroot 185 | #wwwroot/ 186 | 187 | # MSTest test Results 188 | [Tt]est[Rr]esult*/ 189 | [Bb]uild[Ll]og.* 190 | 191 | # NUNIT 192 | *.VisualState.xml 193 | TestResult.xml 194 | 195 | # Build Results of an ATL Project 196 | [Dd]ebugPS/ 197 | [Rr]eleasePS/ 198 | dlldata.c 199 | 200 | # Benchmark Results 201 | BenchmarkDotNet.Artifacts/ 202 | 203 | # .NET Core 204 | project.lock.json 205 | project.fragment.lock.json 206 | artifacts/ 207 | **/Properties/launchSettings.json 208 | 209 | *_i.c 210 | *_p.c 211 | *_i.h 212 | *.ilk 213 | *.meta 214 | *.obj 215 | *.pch 216 | *.pdb 217 | *.pgc 218 | *.pgd 219 | *.rsp 220 | *.sbr 221 | *.tlb 222 | *.tli 223 | *.tlh 224 | *.tmp 225 | *.tmp_proj 226 | *.log 227 | *.vspscc 228 | *.vssscc 229 | .builds 230 | *.pidb 231 | *.svclog 232 | *.scc 233 | 234 | # Chutzpah Test files 235 | _Chutzpah* 236 | 237 | # Visual C++ cache files 238 | ipch/ 239 | *.aps 240 | *.ncb 241 | *.opendb 242 | *.opensdf 243 | *.sdf 244 | *.cachefile 245 | *.VC.db 246 | *.VC.VC.opendb 247 | 248 | # Visual Studio profiler 249 | *.psess 250 | *.vsp 251 | *.vspx 252 | *.sap 253 | 254 | # Visual Studio Trace Files 255 | *.e2e 256 | 257 | # TFS 2012 Local Workspace 258 | $tf/ 259 | 260 | # Guidance Automation Toolkit 261 | *.gpState 262 | 263 | # ReSharper is a .NET coding add-in 264 | _ReSharper*/ 265 | *.[Rr]e[Ss]harper 266 | *.DotSettings.user 267 | 268 | # JustCode is a .NET coding add-in 269 | .JustCode 270 | 271 | # TeamCity is a build add-in 272 | _TeamCity* 273 | 274 | # DotCover is a Code Coverage Tool 275 | *.dotCover 276 | 277 | # AxoCover is a Code Coverage Tool 278 | .axoCover/* 279 | !.axoCover/settings.json 280 | 281 | # Visual Studio code coverage results 282 | *.coverage 283 | *.coveragexml 284 | 285 | # NCrunch 286 | _NCrunch_* 287 | .*crunch*.local.xml 288 | nCrunchTemp_* 289 | 290 | # MightyMoose 291 | *.mm.* 292 | AutoTest.Net/ 293 | 294 | # Web workbench (sass) 295 | .sass-cache/ 296 | 297 | # Installshield output folder 298 | [Ee]xpress/ 299 | 300 | # DocProject is a documentation generator add-in 301 | DocProject/buildhelp/ 302 | DocProject/Help/*.HxT 303 | DocProject/Help/*.HxC 304 | DocProject/Help/*.hhc 305 | DocProject/Help/*.hhk 306 | DocProject/Help/*.hhp 307 | DocProject/Help/Html2 308 | DocProject/Help/html 309 | 310 | # Click-Once directory 311 | publish/ 312 | 313 | # Publish Web Output 314 | *.[Pp]ublish.xml 315 | *.azurePubxml 316 | # Note: Comment the next line if you want to checkin your web deploy settings, 317 | # but database connection strings (with potential passwords) will be unencrypted 318 | *.pubxml 319 | *.publishproj 320 | 321 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 322 | # checkin your Azure Web App publish settings, but sensitive information contained 323 | # in these scripts will be unencrypted 324 | PublishScripts/ 325 | 326 | # NuGet Packages 327 | *.nupkg 328 | # The packages folder can be ignored because of Package Restore 329 | **/[Pp]ackages/* 330 | # except build/, which is used as an MSBuild target. 331 | !**/[Pp]ackages/build/ 332 | # Uncomment if necessary however generally it will be regenerated when needed 333 | #!**/[Pp]ackages/repositories.config 334 | # NuGet v3's project.json files produces more ignorable files 335 | *.nuget.props 336 | *.nuget.targets 337 | 338 | # Microsoft Azure Build Output 339 | csx/ 340 | *.build.csdef 341 | 342 | # Microsoft Azure Emulator 343 | ecf/ 344 | rcf/ 345 | 346 | # Windows Store app package directories and files 347 | AppPackages/ 348 | BundleArtifacts/ 349 | Package.StoreAssociation.xml 350 | _pkginfo.txt 351 | *.appx 352 | 353 | # Visual Studio cache files 354 | # files ending in .cache can be ignored 355 | *.[Cc]ache 356 | # but keep track of directories ending in .cache 357 | !*.[Cc]ache/ 358 | 359 | # Others 360 | ClientBin/ 361 | ~$* 362 | *~ 363 | *.dbmdl 364 | *.dbproj.schemaview 365 | *.jfm 366 | *.pfx 367 | *.publishsettings 368 | orleans.codegen.cs 369 | 370 | # Since there are multiple workflows, uncomment next line to ignore bower_components 371 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 372 | #bower_components/ 373 | 374 | # RIA/Silverlight projects 375 | Generated_Code/ 376 | 377 | # Backup & report files from converting an old project file 378 | # to a newer Visual Studio version. Backup files are not needed, 379 | # because we have git ;-) 380 | _UpgradeReport_Files/ 381 | Backup*/ 382 | UpgradeLog*.XML 383 | UpgradeLog*.htm 384 | 385 | # SQL Server files 386 | *.mdf 387 | *.ldf 388 | *.ndf 389 | 390 | # Business Intelligence projects 391 | *.rdl.data 392 | *.bim.layout 393 | *.bim_*.settings 394 | 395 | # Microsoft Fakes 396 | FakesAssemblies/ 397 | 398 | # GhostDoc plugin setting file 399 | *.GhostDoc.xml 400 | 401 | # Node.js Tools for Visual Studio 402 | .ntvs_analysis.dat 403 | node_modules/ 404 | 405 | # Typescript v1 declaration files 406 | typings/ 407 | 408 | # Visual Studio 6 build log 409 | *.plg 410 | 411 | # Visual Studio 6 workspace options file 412 | *.opt 413 | 414 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 415 | *.vbw 416 | 417 | # Visual Studio LightSwitch build output 418 | **/*.HTMLClient/GeneratedArtifacts 419 | **/*.DesktopClient/GeneratedArtifacts 420 | **/*.DesktopClient/ModelManifest.xml 421 | **/*.Server/GeneratedArtifacts 422 | **/*.Server/ModelManifest.xml 423 | _Pvt_Extensions 424 | 425 | # Paket dependency manager 426 | .paket/paket.exe 427 | paket-files/ 428 | 429 | # FAKE - F# Make 430 | .fake/ 431 | 432 | # JetBrains Rider 433 | .idea/ 434 | *.sln.iml 435 | 436 | # CodeRush 437 | .cr/ 438 | 439 | # Python Tools for Visual Studio (PTVS) 440 | __pycache__/ 441 | *.pyc 442 | 443 | # Cake - Uncomment if you are using it 444 | # tools/** 445 | # !tools/packages.config 446 | 447 | # Tabs Studio 448 | *.tss 449 | 450 | # Telerik's JustMock configuration file 451 | *.jmconfig 452 | 453 | # BizTalk build output 454 | *.btp.cs 455 | *.btm.cs 456 | *.odx.cs 457 | *.xsd.cs 458 | 459 | # OpenCover UI analysis results 460 | OpenCover/ 461 | .cache/v/cache/lastfailed 462 | tests/.cache/v/cache/lastfailed 463 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## The .gitignore file specifies things that git should ignore. 2 | ## This default template includes entries for R, Python and visual studio 3 | 4 | ## 5 | ## Add custom entries below here. 6 | ## 7 | dst-env/ 8 | .cache/v/cache/lastfailed 9 | tests/.cache/v/cache/lastfailed 10 | .vscode/settings.json 11 | 12 | ## 13 | ## R Section - See https://github.com/github/gitignore/blob/master/R.gitignore 14 | ## 15 | 16 | # History files 17 | .Rhistory 18 | .Rapp.history 19 | 20 | # Session Data files 21 | .RData 22 | 23 | # Example code in package build process 24 | *-Ex.R 25 | 26 | # Output files from R CMD build 27 | /*.tar.gz 28 | 29 | # Output files from R CMD check 30 | /*.Rcheck/ 31 | 32 | # RStudio files 33 | .Rproj.user/ 34 | 35 | # produced vignettes 36 | vignettes/*.html 37 | vignettes/*.pdf 38 | 39 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 40 | .httr-oauth 41 | 42 | # knitr and R markdown default cache directories 43 | /*_cache/ 44 | /cache/ 45 | 46 | # Temporary files created by R markdown 47 | *.utf8.md 48 | *.knit.md 49 | 50 | ## 51 | ## Python Section - See https://github.com/github/gitignore/blob/master/Python.gitignore 52 | ## 53 | 54 | # PyCharm ide files 55 | .idea 56 | 57 | # Byte-compiled / optimized / DLL files 58 | __pycache__/ 59 | *.py[cod] 60 | *$py.class 61 | 62 | # C extensions 63 | *.so 64 | 65 | # Distribution / packaging 66 | .Python 67 | env/ 68 | build/ 69 | develop-eggs/ 70 | dist/ 71 | downloads/ 72 | eggs/ 73 | .eggs/ 74 | lib/ 75 | lib64/ 76 | parts/ 77 | sdist/ 78 | var/ 79 | wheels/ 80 | *.egg-info/ 81 | .installed.cfg 82 | *.egg 83 | 84 | # PyInstaller 85 | # Usually these files are written by a python script from a template 86 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 87 | *.manifest 88 | *.spec 89 | 90 | # Installer logs 91 | pip-log.txt 92 | pip-delete-this-directory.txt 93 | 94 | # Unit test / coverage reports 95 | htmlcov/ 96 | .tox/ 97 | .coverage 98 | .coverage.* 99 | .cache 100 | nosetests.xml 101 | coverage.xml 102 | *.cover 103 | .hypothesis/ 104 | 105 | # Translations 106 | *.mo 107 | *.pot 108 | 109 | # Django stuff: 110 | *.log 111 | local_settings.py 112 | 113 | # Flask stuff: 114 | instance/ 115 | .webassets-cache 116 | 117 | # Scrapy stuff: 118 | .scrapy 119 | 120 | # Sphinx documentation 121 | docs/_build/ 122 | 123 | # PyBuilder 124 | target/ 125 | 126 | # Jupyter Notebook 127 | .ipynb_checkpoints 128 | 129 | # pyenv 130 | .python-version 131 | 132 | # celery beat schedule file 133 | celerybeat-schedule 134 | 135 | # SageMath parsed files 136 | *.sage.py 137 | 138 | # dotenv 139 | .env 140 | 141 | # virtualenv 142 | .venv 143 | venv/ 144 | ENV/ 145 | 146 | # Spyder project settings 147 | .spyderproject 148 | .spyproject 149 | 150 | # Rope project settings 151 | .ropeproject 152 | 153 | # mkdocs documentation 154 | /site 155 | 156 | # mypy 157 | .mypy_cache/ 158 | 159 | ## Ignore Visual Studio temporary files, build results, and 160 | ## files generated by popular Visual Studio add-ons. 161 | ## 162 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 163 | 164 | # User-specific files 165 | *.suo 166 | *.user 167 | *.userosscache 168 | *.sln.docstates 169 | 170 | # User-specific files (MonoDevelop/Xamarin Studio) 171 | *.userprefs 172 | 173 | # Build results 174 | [Dd]ebug/ 175 | [Dd]ebugPublic/ 176 | [Rr]elease/ 177 | [Rr]eleases/ 178 | x64/ 179 | x86/ 180 | bld/ 181 | [Bb]in/ 182 | [Oo]bj/ 183 | [Ll]og/ 184 | 185 | # Visual Studio 2015 cache/options directory 186 | .vs/ 187 | # Uncomment if you have tasks that create the project's static files in wwwroot 188 | #wwwroot/ 189 | 190 | # MSTest test Results 191 | [Tt]est[Rr]esult*/ 192 | [Bb]uild[Ll]og.* 193 | 194 | # NUNIT 195 | *.VisualState.xml 196 | TestResult.xml 197 | 198 | # Build Results of an ATL Project 199 | [Dd]ebugPS/ 200 | [Rr]eleasePS/ 201 | dlldata.c 202 | 203 | # Benchmark Results 204 | BenchmarkDotNet.Artifacts/ 205 | 206 | # .NET Core 207 | project.lock.json 208 | project.fragment.lock.json 209 | artifacts/ 210 | **/Properties/launchSettings.json 211 | 212 | *_i.c 213 | *_p.c 214 | *_i.h 215 | *.ilk 216 | *.meta 217 | *.obj 218 | *.pch 219 | *.pdb 220 | *.pgc 221 | *.pgd 222 | *.rsp 223 | *.sbr 224 | *.tlb 225 | *.tli 226 | *.tlh 227 | *.tmp 228 | *.tmp_proj 229 | *.log 230 | *.vspscc 231 | *.vssscc 232 | .builds 233 | *.pidb 234 | *.svclog 235 | *.scc 236 | 237 | # Chutzpah Test files 238 | _Chutzpah* 239 | 240 | # Visual C++ cache files 241 | ipch/ 242 | *.aps 243 | *.ncb 244 | *.opendb 245 | *.opensdf 246 | *.sdf 247 | *.cachefile 248 | *.VC.db 249 | *.VC.VC.opendb 250 | 251 | # Visual Studio profiler 252 | *.psess 253 | *.vsp 254 | *.vspx 255 | *.sap 256 | 257 | # Visual Studio Trace Files 258 | *.e2e 259 | 260 | # TFS 2012 Local Workspace 261 | $tf/ 262 | 263 | # Guidance Automation Toolkit 264 | *.gpState 265 | 266 | # ReSharper is a .NET coding add-in 267 | _ReSharper*/ 268 | *.[Rr]e[Ss]harper 269 | *.DotSettings.user 270 | 271 | # JustCode is a .NET coding add-in 272 | .JustCode 273 | 274 | # TeamCity is a build add-in 275 | _TeamCity* 276 | 277 | # DotCover is a Code Coverage Tool 278 | *.dotCover 279 | 280 | # AxoCover is a Code Coverage Tool 281 | .axoCover/* 282 | !.axoCover/settings.json 283 | 284 | # Visual Studio code coverage results 285 | *.coverage 286 | *.coveragexml 287 | 288 | # NCrunch 289 | _NCrunch_* 290 | .*crunch*.local.xml 291 | nCrunchTemp_* 292 | 293 | # MightyMoose 294 | *.mm.* 295 | AutoTest.Net/ 296 | 297 | # Web workbench (sass) 298 | .sass-cache/ 299 | 300 | # Installshield output folder 301 | [Ee]xpress/ 302 | 303 | # DocProject is a documentation generator add-in 304 | DocProject/buildhelp/ 305 | DocProject/Help/*.HxT 306 | DocProject/Help/*.HxC 307 | DocProject/Help/*.hhc 308 | DocProject/Help/*.hhk 309 | DocProject/Help/*.hhp 310 | DocProject/Help/Html2 311 | DocProject/Help/html 312 | 313 | # Click-Once directory 314 | publish/ 315 | 316 | # Publish Web Output 317 | *.[Pp]ublish.xml 318 | *.azurePubxml 319 | # Note: Comment the next line if you want to checkin your web deploy settings, 320 | # but database connection strings (with potential passwords) will be unencrypted 321 | *.pubxml 322 | *.publishproj 323 | 324 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 325 | # checkin your Azure Web App publish settings, but sensitive information contained 326 | # in these scripts will be unencrypted 327 | PublishScripts/ 328 | 329 | # NuGet Packages 330 | *.nupkg 331 | # The packages folder can be ignored because of Package Restore 332 | **/[Pp]ackages/* 333 | # except build/, which is used as an MSBuild target. 334 | !**/[Pp]ackages/build/ 335 | # Uncomment if necessary however generally it will be regenerated when needed 336 | #!**/[Pp]ackages/repositories.config 337 | # NuGet v3's project.json files produces more ignorable files 338 | *.nuget.props 339 | *.nuget.targets 340 | 341 | # Microsoft Azure Build Output 342 | csx/ 343 | *.build.csdef 344 | 345 | # Microsoft Azure Emulator 346 | ecf/ 347 | rcf/ 348 | 349 | # Windows Store app package directories and files 350 | AppPackages/ 351 | BundleArtifacts/ 352 | Package.StoreAssociation.xml 353 | _pkginfo.txt 354 | *.appx 355 | 356 | # Visual Studio cache files 357 | # files ending in .cache can be ignored 358 | *.[Cc]ache 359 | # but keep track of directories ending in .cache 360 | !*.[Cc]ache/ 361 | 362 | # Others 363 | ClientBin/ 364 | ~$* 365 | *~ 366 | *.dbmdl 367 | *.dbproj.schemaview 368 | *.jfm 369 | *.pfx 370 | *.publishsettings 371 | orleans.codegen.cs 372 | 373 | # Since there are multiple workflows, uncomment next line to ignore bower_components 374 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 375 | #bower_components/ 376 | 377 | # RIA/Silverlight projects 378 | Generated_Code/ 379 | 380 | # Backup & report files from converting an old project file 381 | # to a newer Visual Studio version. Backup files are not needed, 382 | # because we have git ;-) 383 | _UpgradeReport_Files/ 384 | Backup*/ 385 | UpgradeLog*.XML 386 | UpgradeLog*.htm 387 | 388 | # SQL Server files 389 | *.mdf 390 | *.ldf 391 | *.ndf 392 | 393 | # Business Intelligence projects 394 | *.rdl.data 395 | *.bim.layout 396 | *.bim_*.settings 397 | 398 | # Microsoft Fakes 399 | FakesAssemblies/ 400 | 401 | # GhostDoc plugin setting file 402 | *.GhostDoc.xml 403 | 404 | # Node.js Tools for Visual Studio 405 | .ntvs_analysis.dat 406 | node_modules/ 407 | 408 | # Typescript v1 declaration files 409 | typings/ 410 | 411 | # Visual Studio 6 build log 412 | *.plg 413 | 414 | # Visual Studio 6 workspace options file 415 | *.opt 416 | 417 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 418 | *.vbw 419 | 420 | # Visual Studio LightSwitch build output 421 | **/*.HTMLClient/GeneratedArtifacts 422 | **/*.DesktopClient/GeneratedArtifacts 423 | **/*.DesktopClient/ModelManifest.xml 424 | **/*.Server/GeneratedArtifacts 425 | **/*.Server/ModelManifest.xml 426 | _Pvt_Extensions 427 | 428 | # Paket dependency manager 429 | .paket/paket.exe 430 | paket-files/ 431 | 432 | # FAKE - F# Make 433 | .fake/ 434 | 435 | # JetBrains Rider 436 | .idea/ 437 | *.sln.iml 438 | 439 | # CodeRush 440 | .cr/ 441 | 442 | # Python Tools for Visual Studio (PTVS) 443 | __pycache__/ 444 | *.pyc 445 | 446 | # Cake - Uncomment if you are using it 447 | # tools/** 448 | # !tools/packages.config 449 | 450 | # Tabs Studio 451 | *.tss 452 | 453 | # Telerik's JustMock configuration file 454 | *.jmconfig 455 | 456 | # BizTalk build output 457 | *.btp.cs 458 | *.btm.cs 459 | *.odx.cs 460 | *.xsd.cs 461 | 462 | # OpenCover UI analysis results 463 | OpenCover/ 464 | junit/ 465 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/README.md: -------------------------------------------------------------------------------- 1 | {% if cookiecutter.devops_organisation != '' %} 2 | [![Build Status](https://dev.azure.com/{{cookiecutter.devops_organisation}}/{{cookiecutter.repo_name}}/_apis/build/status/equinor.{{cookiecutter.repo_name}}?branchName=master)](https://dev.azure.com/{{cookiecutter.devops_organisation}}/{{cookiecutter.repo_name}}/_build/latest?definitionId=1&branchName=master) 3 | {% endif %} 4 | 5 | # {{cookiecutter.project_name}} 6 | 7 | {{cookiecutter.project_description}} 8 | 9 | ## Setup 10 | 1. Install git and checkout the [git code repository] 11 | 2. Install [anaconda] python version 3.6+ 12 | 3. Change working directory into the git code repository root 13 | 4. Create the self contained conda environment. In a terminal go to the git code repository root and enter the command: 14 | 15 | `conda env create --file conda_env.yml` 16 | 17 | 5. Any python modules under src need to be available to other scripts. This can be done in a couple of ways. You can 18 | setup and install the python modules by executing the setup.py command below which will install the packages to the 19 | conda environments site-packages folder but with a symlink to the src folder so modifications are reflected immediately. 20 | 21 | `python setup.py develop` 22 | 23 | As an alternative you may prefer to set the python path directly from the console, within notebooks, test scripts 24 | etc. From Pycharm you can also right click the src folder and select the _Mark Directory As | Source Root_ option. 25 | 26 | 6. .. Place your own project specific setup steps here e.g. copying data files ... 27 | 28 | When distributing your module, you can create a Python egg with the command `python setup.py bdist_egg` and upload the egg. 29 | 30 | NOTE: When working in the project notebooks from within the Equinor network, you may need to include the lines below if your proxy is not otherwise setup. 31 | 32 | `os.environ['HTTP_PROXY']="http://www-proxy.statoil.no:80"`
33 | `os.environ['HTTPS_PROXY']="http://www-proxy.statoil.no:80"` 34 | 35 | ## Using the Python Conda environment 36 | 37 | Once the Python Conda environment has been set up, you can 38 | 39 | * Activate the environment using the following command in a terminal window: 40 | 41 | * Windows: `activate {{cookiecutter.conda_name}}` 42 | * Linux, OS X: `source activate {{cookiecutter.conda_name}}` 43 | * The __environment is activated per terminal session__, so you must activate it every time you open terminal. 44 | 45 | * Deactivate the environment using the following command in a terminal window: 46 | 47 | * Windows: `deactivate {{cookiecutter.conda_name}}` 48 | * Linux, OS X: `source deactivate {{cookiecutter.conda_name}}` 49 | 50 | * Delete the environment using the command (can't be undone): 51 | 52 | * `conda remove --name {{cookiecutter.conda_name}} --all` 53 | 54 | ## Initial File Structure 55 | 56 | ``` 57 | ├── .gitignore <- Files that should be ignored by git. Add seperate .gitignore files in sub folders if 58 | │ needed 59 | ├── conda_env.yml <- Conda environment definition for ensuring consistent setup across environments 60 | ├── LICENSE 61 | ├── README.md <- The top-level README for developers using this project. 62 | ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. 63 | │ generated with `pip freeze > requirements.txt`. Might not be needed if using conda. 64 | ├── setup.py <- Metadata about your project for easy distribution. 65 | │ 66 | ├── data 67 | │ ├── interim_[desc] <- Interim files - give these folders whatever name makes sense. 68 | │ ├── processed <- The final, canonical data sets for modeling. 69 | │ ├── raw <- The original, immutable data dump. 70 | │ ├── temp <- Temporary files. 71 | │ └── training <- Files relating to the training process 72 | │ 73 | ├── docs <- Documentation 74 | │ ├── data_science_code_of_conduct.md <- Code of conduct. 75 | │ ├── process_documentation.md <- Standard template for documenting process and decisions. 76 | │ └── writeup <- Sphinx project for project writeup including auto generated API. 77 | │ ├── conf.py <- Sphinx configurtation file. 78 | │ ├── index.rst <- Start page. 79 | │ ├── make.bat <- For generating documentation (Windows) 80 | │ └── Makefikle <- For generating documentation (make) 81 | │ 82 | ├── examples <- Add folders as needed e.g. examples, eda, use case 83 | │ 84 | ├── extras <- Miscellaneous extras. 85 | │ └── add_explorer_context_shortcuts.reg <- Adds additional Windows Explorer context menus for starting jupyter. 86 | │ 87 | ├── notebooks <- Notebooks for analysis and testing 88 | │ ├── eda <- Notebooks for EDA 89 | │ │ └── example.ipynb <- Example python notebook 90 | │ ├── features <- Notebooks for generating and analysing features (1 per feature) 91 | │ ├── modelling <- Notebooks for modelling 92 | │ └── preprocessing <- Notebooks for Preprocessing 93 | │ 94 | ├── scripts <- Standalone scripts 95 | │ ├── deploy <- MLOps scripts for deployment (WIP) 96 | │ │ └── score.py <- Scoring script 97 | │ ├── train <- MLOps scripts for training 98 | │ │ ├── submit-train.py <- Script for submitting a training run to Azure ML Service 99 | │ │ ├── submit-train-local.py <- Script for local training using Azure ML 100 | │ │ └── train.py <- Example training script using the iris dataset 101 | │ ├── example.py <- Example sctipt 102 | │ └── MLOps.ipynb <- End to end MLOps example (To be refactored into the above) 103 | │ 104 | ├── src <- Code for use in this project. 105 | │ └── {{cookiecutter.package_name}} <- Example python package - place shared code in such a package 106 | │ ├── __init__.py <- Python package initialisation 107 | │ ├── examplemodule.py <- Example module with functions and naming / commenting best practices 108 | │ ├── features.py <- Feature engineering functionality 109 | │ ├── io.py <- IO functionality 110 | │ └── pipeline.py <- Pipeline functionality 111 | │ 112 | └── tests <- Test cases (named after module) 113 | ├── test_notebook.py <- Example testing that Jupyter notebooks run without errors 114 | └── {{cookiecutter.package_name}} <- {{cookiecutter.package_name}} tests 115 | ├── examplemodule <- examplemodule tests (1 file per method tested) 116 | ├── features <- features tests 117 | ├── io <- io tests 118 | └── pipeline <- pipeline tests 119 | ``` 120 | 121 | ## MLOps 122 | Starter scripts for MLOps with Azure ML Service are included as a part of this template in the scripts folder and may be 123 | customised for your own purposes. Please browse the contents of the scripts folder for more details. 124 | 125 | For model training, the provided setup allows for running locally without any dependency on Azure ML by running train.py 126 | in the scripts/train folder directly. Alternatively you can submit local or remote runs using the submit scripts in the 127 | same folder. 128 | 129 | ## Testing 130 | Reproducability and the correct functioning of code are essential to avoid wasted time. If a code block is copied more 131 | than once then it should be placed into a common script / module under src and unit tests added. The same applies for 132 | any other non trivial code to ensure the correct functioning. 133 | 134 | To run tests, install pytest using pip or conda (should have been setup already if you used the conda_env.yml file) and 135 | then from the repository root run 136 | 137 | ``` 138 | pytest 139 | ``` 140 | 141 | ## Automated Document Generation 142 | A [sphinx](https://www.sphinx-doc.org/) project is provided under docs/writeup that will generate writeup that 143 | also includes automatically generated API information for any packages. THe output can be created in multiple 144 | formats including html and pdf. If you are using CI then this can be run automatically. To run 145 | locally execute the following commands: 146 | 147 | ``` 148 | cd docs/writeup 149 | make html 150 | ``` 151 | 152 | On Windows this will run the make.bat, a Makefile is also included for those using the 'make' command. 153 | 154 | ## Development Process 155 | Contributions to this template are greatly appreciated and encouraged. 156 | 157 | To contribute an update simply: 158 | * Create a new branch / fork for your updates. 159 | * Check that your code follows the PEP8 guidelines (line lengths up to 120 are ok) and other general conventions within this document. 160 | * Ensure that as far as possible there are unit tests covering the functionality of any new code. 161 | * Check that all existing unit tests still pass. 162 | * Edit this document if needed to describe new files or other important information. 163 | * Create a pull request. 164 | 165 | ## Important Links 166 | * https://wiki.equinor.com/wiki/index.php/Statoil_Data_Science_Technical_Standards - Data Science Technical Standards (Equinor Internal) 167 | * https://dataplatformwiki.azurewebsites.net/doku.php - Data Platform wiki (Equinor internal) 168 | * https://github.com/equinor/data-science-shared - Shared Data Science Code Repository (Equinor internal) 169 | 170 | ## References 171 | * https://github.com/equinor/data-science-template/ - The master template for this project 172 | * http://docs.python-guide.org/en/latest/writing/structure/ 173 | * https://github.com/Azure/Microsoft-TDSP 174 | * https://drivendata.github.io/cookiecutter-data-science/ 175 | 176 | [//]: # 177 | [anaconda]: 178 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | > [!WARNING] 2 | > This repository is no longer maintained and as it has diverged from changes to Azure ML is now archived. 3 | 4 | [![Build Status](https://dev.azure.com/mhew/data-science-template/_apis/build/status/data-science-template?branchName=master)](https://dev.azure.com/mhew/data-science-template/_build/latest?definitionId=15&branchName=master) 5 | 6 | # Data Science Template 7 | This is a starter template for data science projects in Equinor, although it may also be useful for others. It contains many of the essential artifacts that you will need and presents a number of best practices including code setup, samples, MLOps using Azure, a standard document to guide and gather information relating to the data science process and more. 8 | 9 | As it is impossible to create a single template that will meet every projects needs, this example should be considered 10 | a starting point and changed based upon the working and evolution of your project. 11 | 12 | Before working with the contents of this template or Data Science projects in general it is recommended to familiarise yourself with the Equinor [Data Science Technical Standards](https://wiki.statoil.no/wiki/index.php/Statoil_Data_Science_Technical_Standards) (Currently Equinor internal only) 13 | 14 | ## Getting Started With This Template 15 | This template is provided as a [Cookiecutter template](http://cookiecutter.readthedocs.org/en/latest/installation.html) so you 16 | can quickly create an instance customised for your project. An assumption is that you have a working python installation. 17 | 18 | To get running, first install the latest Cookiecutter if you haven't installed it yet (this requires 19 | Cookiecutter 1.4.0 or higher): 20 | 21 | pip install -U cookiecutter 22 | 23 | ### Create project 24 | Then generate a new project for your own use based upon the template, answering the questions to customise the generated 25 | project: 26 | 27 | cookiecutter https://github.com/equinor/data-science-template.git 28 | 29 | The values you are prompted for are: 30 | 31 | | Value | Description | 32 | | :--- | --- | 33 | | project_name | A name for your project. Used mostly within documentation | 34 | | project_description | A description to include in the README.md | 35 | | repo_name | The name of the github repository where the project will be held | 36 | | conda_name | The name of the conda environment to use | 37 | | package_name | A name for the generated python package. | 38 | | mlops_name | Default name for Azure ML. | 39 | | mlops_compute_name | Default Azure ML compute cluster name to use. | 40 | | author | The main author of the solution. Included in the setup.py file | 41 | | open_source_license | What type of open source license the project will be released under | 42 | | devops_organisation | An Azure DevOps organisation. Leave blank if you aren't using Azure DevOps | 43 | 44 | If you are uncertain about what to enter for any value then just accept the defaults. You can always change the generated project later. 45 | 46 | *Getting problems? You can always download this repository using the download button above and reference the local copy e.g. cookiecutter c:\Downloads\data-science-template, however ideally fix any git proxy or other issues that are causing problems.* 47 | 48 | You are now ready to get started, however you should first create a new github repository for your new project and add your 49 | project using the following commands (substitute myproject with the name of your project and REMOTE-REPOSITORY-URL 50 | with the remote repository url). 51 | 52 | cd myproject 53 | git init 54 | git add . 55 | git commit -m "Initial commit" 56 | git remote add origin REMOTE-REPOSITORY-URL 57 | git remote -v 58 | git push origin master 59 | 60 | ### Continuous Integration 61 | Continuous Integration (CI) increase quality by building, running tests and performing other validation whenever 62 | code is committed. The template contains a build pipeline for Azure DevOps, however requires a couple of manual 63 | steps to setup: 64 | 65 | * Log in to http://dev.azure.com and browse to, or create an organisation & project. The project name should be the same as your github repository name. 66 | * Under *Pipelines -> Builds select* *New Pipeline* 67 | * Select github and then your repository. Login / grant any permissions as prompted 68 | * In the review pane click *run* 69 | 70 | You are now setup for CI and automated test / building. You should verify the badge link in this README corresponds 71 | with your DevOps project, and as a further step might setup any release pipelines for automated deployment. 72 | 73 | At this stage the build pipeline doesn't include MLOps steps, although these can be added based uon your needs. 74 | 75 | ### Finally 76 | 77 | * Update the project readme file with additional project specific details including setup, configuration and usage. 78 | * The docs\process_documentation.md file should be completed phase by phase, and each phase result shall be submitted for review and approval before the project moves on to the next phase. This is to assist with the gathering of essential information required to deliver a correct and robust solution. The git respoitory shall be added to the script that populates the [knowledge repository](https://git.statoil.no/DataScience/projects) to ease future knowledge sharing. 79 | 80 | ## Generated Project Contents 81 | Depending upon the selected options when creating the project, the generated structure will look similar to the below: 82 | 83 | ``` 84 | ├── .gitignore <- Files that should be ignored by git. Add seperate .gitignore files in sub folders if 85 | │ needed 86 | ├── conda_env.yml <- Conda environment definition for ensuring consistent setup across environments 87 | ├── LICENSE 88 | ├── README.md <- The top-level README for developers using this project. 89 | ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. 90 | │ generated with `pip freeze > requirements.txt`. Might not be needed if using conda. 91 | ├── setup.py <- Metadata about your project for easy distribution. 92 | │ 93 | ├── data 94 | │ ├── interim_[desc] <- Interim files - give these folders whatever name makes sense. 95 | │ ├── processed <- The final, canonical data sets for modeling. 96 | │ ├── raw <- The original, immutable data dump. 97 | │ ├── temp <- Temporary files. 98 | │ └── training <- Files relating to the training process 99 | │ 100 | ├── docs <- Documentation 101 | │ ├── data_science_code_of_conduct.md <- Code of conduct. 102 | │ ├── process_documentation.md <- Standard template for documenting process and decisions. 103 | │ └── writeup <- Sphinx project for project writeup including auto generated API. 104 | │ ├── conf.py <- Sphinx configurtation file. 105 | │ ├── index.rst <- Start page. 106 | │ ├── make.bat <- For generating documentation (Windows) 107 | │ └── Makefikle <- For generating documentation (make) 108 | │ 109 | ├── examples <- Add folders as needed e.g. examples, eda, use case 110 | │ 111 | ├── extras <- Miscellaneous extras. 112 | │ └── add_explorer_context_shortcuts.reg <- Adds additional Windows Explorer context menus for starting jupyter. 113 | │ 114 | ├── notebooks <- Notebooks for analysis and testing 115 | │ ├── eda <- Notebooks for EDA 116 | │ │ └── example.ipynb <- Example python notebook 117 | │ ├── features <- Notebooks for generating and analysing features (1 per feature) 118 | │ ├── modelling <- Notebooks for modelling 119 | │ └── preprocessing <- Notebooks for Preprocessing 120 | │ 121 | ├── scripts <- Standalone scripts 122 | │ ├── deploy <- MLOps scripts for deployment (WIP) 123 | │ │ └── score.py <- Scoring script 124 | │ ├── train <- MLOps scripts for training 125 | │ │ ├── submit-train.py <- Script for submitting a training run to Azure ML Service 126 | │ │ ├── submit-train-local.py <- Script for local training using Azure ML 127 | │ │ └── train.py <- Example training script using the iris dataset 128 | │ ├── example.py <- Example sctipt 129 | │ └── MLOps.ipynb <- End to end MLOps example (To be refactored into the above) 130 | │ 131 | ├── src <- Code for use in this project. 132 | │ └── examplepackage <- Example python package - place shared code in such a package 133 | │ ├── __init__.py <- Python package initialisation 134 | │ ├── examplemodule.py <- Example module with functions and naming / commenting best practices 135 | │ ├── features.py <- Feature engineering functionality 136 | │ ├── io.py <- IO functionality 137 | │ └── pipeline.py <- Pipeline functionality 138 | │ 139 | └── tests <- Test cases (named after module) 140 | ├── test_notebook.py <- Example testing that Jupyter notebooks run without errors 141 | ├── examplepackage <- examplepackage tests 142 | ├── examplemodule <- examplemodule tests (1 file per method tested) 143 | ├── features <- features tests 144 | ├── io <- io tests 145 | └── pipeline <- pipeline tests 146 | ``` 147 | 148 | ## Contributing to This Template 149 | Contributions to this template are greatly appreciated and encouraged. 150 | 151 | To contribute an update simply: 152 | * Submit an issue describing your proposed change to the repo in question. 153 | * The repo owner will respond to your issue promptly. 154 | * Fork the desired repo, develop and test your code changes. 155 | * Check that your code follows the PEP8 guidelines (line lengths up to 120 are ok) and other general conventions within this document. 156 | * Ensure that your code adheres to the existing style. Refer to the 157 | [Google Cloud Platform Samples Style Guide]( 158 | https://github.com/GoogleCloudPlatform/Template/wiki/style.html) for the 159 | recommended coding standards for this organization. 160 | * Ensure that as far as possible there are unit tests covering the functionality of any new code. 161 | * Check that all existing unit tests still pass. 162 | * Edit this document and the template README.md if needed to describe new files or other important information. 163 | * Submit a pull request. 164 | 165 | 166 | ### Template development environment 167 | To develop this template further you might want to setup a virtual environment 168 | 169 | #### Setup using 170 | ``` 171 | cd data-science-template 172 | python -m venv dst-env 173 | ``` 174 | 175 | #### Activate environment 176 | Max / Linux 177 | ``` 178 | source dst-env/bin/activate 179 | ``` 180 | 181 | Windows 182 | ``` 183 | dst-env\Scripts\activate 184 | ``` 185 | 186 | #### Install Dependencies 187 | ``` 188 | pip install -r requirements.txt 189 | ``` 190 | 191 | 192 | #### Testing 193 | To run the template tests, install pytest using pip or conda and then from the repository root run 194 | 195 | pytest tests 196 | 197 | #### Linting 198 | To verify that your code adheres to python standards run linting as shown below: 199 | 200 | flake8 --max-line-length=120 *.py hooks/ tests/ 201 | 202 | ## Important Links 203 | * https://wiki.statoil.no/wiki/index.php/Statoil_Data_Science_Technical_Standards - Data Science Technical Standards (Equinor Internal) 204 | * https://dataplatformwiki.azurewebsites.net/doku.php - Data Platform wiki (Equinor internal) 205 | * https://github.com/Statoil/data-science-shared - Shared Data Science Code Repository (Equinor internal) 206 | 207 | ## References 208 | * https://github.com/Statoil/data-science-template/ - The master template for this project 209 | * http://docs.python-guide.org/en/latest/writing/structure/ 210 | * https://github.com/Azure/Microsoft-TDSP 211 | * https://drivendata.github.io/cookiecutter-data-science/ 212 | * https://github.com/audreyr/cookiecutter-pypackage 213 | 214 | [//]: # 215 | [anaconda]: 216 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/docs/process_documentation.md: -------------------------------------------------------------------------------- 1 | # Data Science Process 2 | 3 | ## Introduction 4 | This DS process standard takes the form of a template. Each DS project shall complete this template to make sure that DS project in Statoil can deliver value to business to the maximal extent. 5 | 6 | The document is structured into 6 sections according to the 6 DS phases: 7 | * Business understanding and problem definition 8 | * Project planning 9 | * Collecting and preparing data 10 | * Modeling 11 | * Evaluation 12 | * Deployment and monitoring 13 | 14 | For each phase, a set of tasks are identified. A group of questions are listed under each task. These questions are designed to guide DS teams to carry out DS projects in a standard way. 15 | 16 | The DS team shall complete all the questions as a joint effort of the whole team. The answers for each phase shall be reviewed/approved by QA team (for example, DS VP, PM, DS discipline advisor, Peers, Stakeholders) before the project moves to the next phase (Table 1. shall contain the review/approval records for each phase). The questions answered in previous iterations shall be updated if needed, and the updated document shall be reviewed and approved. 17 | 18 | DS project leader (DS team responsible) owns (is responsible for completing) this document. 19 | 20 | Figure 1. shows the adopted data science process phases, and the document/quality control process is plotted in the middle. A more detailed document/quality control flowchart is illustrated in Figure 2. 21 | 22 | The document shall be added to the script that populates the [knowledge repository](https://git.statoil.no/DataScience/projects) to ease future knowledge sharing. 23 | 24 | * * * 25 | **Table 1.** Data Science phase review/approval table 26 | 27 | 28 | 29 | * * * 30 | 31 | **Figure 1.** Data Science process diagram 32 | 33 | 34 | 35 | _ _ _ 36 | 37 | **Figure 2.** Data Science process FlowChart 38 | 39 | 40 | 41 | _ _ _ 42 | 43 | ## 1 Business Understanding and Problem Definition 44 | 45 | ### 1.1 Connecting business 46 | ##### 1.1.1 Who is the sponsor? 47 | ##### 1.1.2 What is the business organization chart? 48 | ##### 1.1.3 What is the business goal/objective? 49 | ##### 1.1.4 What is the business strategy? 50 | ##### 1.1.5 What is the business process? 51 | ##### 1.1.6 How is the business efficiency measured? 52 | ##### 1.1.7 Who is the main contact person? 53 | ##### 1.1.8 Who is the SME? 54 | ##### 1.1.9 Who is the end user? 55 | 56 | ### 1.2 Understanding business challenge 57 | ##### 1.2.1 What is the business challenge/pain point/bottleneck? 58 | ##### 1.2.2 What are the identified use cases? 59 | ##### 1.2.3 Has the discovery workshop been executed, and what is the result? 60 | ##### 1.2.4 What is the current solution / how might a theoretical human “expert” perform the task today? 61 | ##### 1.2.5 If your human expert were to perform this task, how would you respond to them so they improved for the next time? For classification, do this for all four phases of a typical confusion matrix (e.g. true / false) 62 | ##### 1.2.6 If a human were to perform this task, what assumptions would the user want them to make? 63 | ##### 1.2.7 What are the constraints? 64 | 65 | 66 | ### 1.3 Identifying requirements 67 | ##### 1.3.1 What are the functional requirements? 68 | ##### 1.3.2 What are the identified end user UX requirements? 69 | ##### 1.3.3 How should the potential solution fit into the business process? 70 | ##### 1.3.4 What is the solution delivery deadline? 71 | 72 | ### 1.4 Discovering data 73 | ##### 1.4.1 What data do you need ideally to solve the problem? 74 | ##### 1.4.2 What data is available? 75 | ##### 1.4.3 Have you use Statoil data catalog for data discovering [link to data catalog](https://eun-su1.azuredatacatalog.com/#/home)? 76 | ##### 1.4.4 What is the data format e.g. structured in Database, free text or image? 77 | ##### 1.4.5 Who owns the data? 78 | ##### 1.4.6 How is the data accessed? 79 | ##### 1.4.7 Who is the contact person to access the data? 80 | ##### 1.4.8 What are the risks for accessing, understanding, analyzing the data? 81 | ##### 1.4.9 What is the effort estimation for data collecting and preparing for each data source? 82 | ##### 1.4.10 What are the data governance issues? 83 | ##### 1.4.11 How is each use case/process/entity covered by the discovered data? 84 | 85 | ### 1.5 Identifying DS opportunity 86 | ##### 1.5.1 What is the problem type from DS perspective, e.g. regression, classification, clustering etc.? 87 | ##### 1.5.2 What is the problem-solving process from DS perspective (solution framework)? 88 | ##### 1.5.3 What are the previous relevant experience/components that can be reused? 89 | ##### 1.5.4 Has the feasibility study been done? What is the result? 90 | ##### 1.5.5 What is the business value the DS solution can bring? 91 | ##### 1.5.6 What is the consequence of a potential DS solution error? How can we control it? 92 | ##### 1.5.7 How can the DS solution fit into the business process? 93 | ##### 1.5.8 What are the main risks to fail? How can we control them? 94 | ##### 1.5.9 What is the feedback from SME on the proposed DS solution? 95 | 96 | ### 1.6 Setting success/stop criteria 97 | ##### 1.6.1 What is the evaluation strategy, objective evaluation or subjective evaluation? 98 | ##### 1.6.2 What is the objective success criteria (recall, precision, accuracy, etc.)? 99 | #### 1.6.3 What is the subjective success criteria? 100 | #### 1.6.4 What is the stop criteria? 101 | #### 1.6.5 Have success/stop criteria communicated and agreed with stakeholders? 102 | #### 1.6.6 Is insufficient SME involvement part of the stop criteria? What has been agreed on with respect to the SME involvement and SME experience level? 103 | 104 | ### 1.7 Document review and artifact archiving 105 | #### 1.7.1 Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 106 | #### 1.7.2 What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference? 107 | 108 | ## 2 Project Planning 109 | 110 | ### 2.1 Resource planning 111 | #### 2.1.1 What is the competence and resource plan? 112 | #### 2.1.2 What are the competences needed (competence matrix)? 113 | #### 2.1.3 How can team members meet the competence matrix? 114 | #### 2.1.4 Does team include the following roles: data scientist, ML engineer, and data engineer? 115 | #### 2.1.5 Are there dedicated SMEs allocated for the project? 116 | #### 2.1.6 What is the cooperation model with SME and end user? 117 | 118 | ### 2.2 Time planning 119 | #### 2.2.1 What is the time plan? 120 | #### 2.2.2 How is the DS time plan aligned with main project plan (if relevant)? 121 | #### 2.2.3 How does DS iterations fit into the time plan? 122 | #### 2.2.4 What are the definition of project phases/steps and what are milestones for them? 123 | #### 2.2.5 What are the risks to follow the plan? How can the risks be controlled? 124 | #### 2.2.6 What is the feedback from stakeholders on time plan? 125 | 126 | ### 2.3 Process planning 127 | #### 2.3.1 What is the scope of the DS project? 128 | #### 2.3.2 Is the DS project a standalone process or running parallel with a bigger SW implementation project where DS solution is part of? 129 | #### 2.3.3 If parallel, what are the cooperation model between the main process and the DS process? 130 | #### 2.3.4 If parallel, has DS project research/iteration feature communicated with the main project management? 131 | #### 2.3.5 Do you follow the rule: start with simple, get value into business and iteration for improvement? 132 | #### 2.3.6 How are the DS iterations planned in the project? 133 | #### 2.3.7 What is the plan to build up pipeline as early as possible to speed up the iteration? 134 | #### 2.3.8 Is Kanban agile project process considered as the first option? If not, why? 135 | #### 2.3.9 What is the feedback loop from stakeholders? 136 | #### 2.3.10 How can the results from each phase be reviewed by stakeholders? 137 | #### 2.3.11 Has an architecture contract been completed? 138 | 139 | ### 2.4 DS tools planning 140 | #### 2.4.1 What tools/platforms/systems are planned to be used in the DS project? 141 | #### 2.4.2 Are there experience/competence gap to use these tools? If yes, what is the plan to close the gap? 142 | #### 2.4.3 Are all the tools standard tools according to the DCOE DS tech standard? If not, why? ([link to the standard wiki page](https://wiki.statoil.no/wiki/index.php/Statoil_Data_Science_Technical_Standards)) 143 | #### 2.4.4 What is the plan for code/document review, artifact archiving, and knowledge sharing? 144 | 145 | ### 2.5 Document review and artifact archiving 146 | #### 2.5.1 Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 147 | #### 2.5.2 What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference? 148 | 149 | ## 3 Data collecting and preparing 150 | 151 | ### 3.1 Collecting data 152 | #### 3.1.1 What are the rules to select relevant and irrelevant data? 153 | #### 3.1.2 What is the frequency/granularity the data is collected? Is it enough for the target problem? 154 | #### 3.1.3 Is the raw data kept untouched after collection? 155 | #### 3.1.4 Is the data to be stored in the data platform? If not, why? 156 | #### 3.1.5 Is open data format used to store the data, for example, txt, json, csv? If not, why? 157 | #### 3.1.6 Is an automated pipeline set up for processing new data? If not, why? 158 | #### 3.1.7 Document the data collection process? 159 | 160 | ### 3.2 Exploring data 161 | #### 3.2.1 What is the structure of the data? 162 | #### 3.2.2 What are the relationships between data items? 163 | #### 3.2.3 How are the data from different source mapped together? 164 | #### 3.2.4 What tools, statistical methods, visualization tools have been used to explore the data? 165 | #### 3.2.5 What is the data quality: completeness, consistency, validity, and accuracy? 166 | #### 3.2.6 What are the other issues with data quality? 167 | #### 3.2.7 How should duplicated data be filtered or removed? 168 | #### 3.2.8 Are there outliers in the data? 169 | #### 3.2.9 What patterns have you found in the data? 170 | 171 | ### 3.3 Understanding data from domain 172 | #### 3.3.1 How was the data generated? 173 | #### 3.3.2 How was the data sampled/transferred? 174 | #### 3.3.3 What is the meaning of each data item from business perspective? 175 | #### 3.3.4 What is the relationship between each data item and the target output? 176 | #### 3.3.5 Are there unstable data period, how to identify and remove? 177 | #### 3.3.6 Are all the identified outliers noise? 178 | #### 3.3.7 What is the valid data range for each data item? 179 | 180 | ### 3.4 Preparing data 181 | #### 3.4.1 What is the plan to prepare the data? 182 | #### 3.4.2 What is the feedback from SME on the data preparation plan? 183 | #### 3.4.3 How is missing data filled/removed? 184 | #### 3.4.4 How is noise data removed/replaced? 185 | #### 3.4.5 How is the overlapped data combined/filtered/removed? 186 | #### 3.4.6 How the data is transformed? 187 | #### 3.4.7 How are training, validation and test dataset split? 188 | 189 | ### 3.5 Feature engineering 190 | #### 3.5.1 Is ML method used to reduce the complexity of the input feature space, for example, PCA or autoencoder? 191 | #### 3.5.2 What are the useful domain characteristics that are not represented in the dataset? 192 | #### 3.5.3 What features can be identified/created to represent the identified missing characters? 193 | 194 | ### 3.6 Document review and artifact archiving 195 | #### 3.6.1 Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 196 | #### 3.6.2 What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference? 197 | 198 | ## 4 Modeling 199 | 200 | ### 4.1 Selecting model 201 | #### 4.1.1 What DS models have been considered, and what are the ones chosen for further evaluation? 202 | #### 4.1.2 What are the advantages and disadvantages of the chosen models? 203 | #### 4.1.3 Is the simplest model chosen as benchmark in the first iteration? 204 | #### 4.1.4 What is the criteria to compare candidate models? 205 | 206 | ### 4.2 Building model 207 | #### 4.2.1 What are the hyper-parameters for the selected models? 208 | #### 4.2.2 What are the processes to optimize the hyper-parameters? 209 | #### 4.2.3 Has the data been normalized? Give explanation. 210 | #### 4.2.4 What is the time used to train the model? Is it acceptable for offline or online training? 211 | 212 | ### 4.3 Testing model 213 | #### 4.3.1 Is separate test dataset used to test models? 214 | #### 4.3.2 Are success criteria met by models? 215 | #### 4.3.3 How easily can the end user use/understand model output? 216 | #### 4.3.4 Is model performance part of the criteria? 217 | #### 4.3.5 What is the subjective evaluation result? 218 | #### 4.3.6 If more than one models meet the criteria, how should one/ones have chosen from them? 219 | 220 | ### 4.4 Document review and artifact archiving 221 | #### 4.4.1 Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 222 | #### 4.4.2 What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference? 223 | 224 | ## 5 Evaluation 225 | 226 | ### 5.1 Technical evaluation 227 | #### 5.1.1 Have predefined success/acceptance criteria been met by the chosen model? 228 | #### 5.1.2 Have all the identified requirements been met? 229 | #### 5.1.3 Have all the identified use cases been covered? 230 | #### 5.1.4 What are the preconditions and limitations of the chosen model? 231 | #### 5.1.5 How easily can the model be integrated into the work process? 232 | #### 5.1.6 What are the end user competence needed to use the model? Is this competence requirement acceptable by the end user organization? 233 | #### 5.1.7 Has the business changed so that the solution cannot be applied anymore? 234 | 235 | ### 5.2 Process evaluation 236 | #### 5.2.1 Has the planned artifact peer review been executed as planed? 237 | #### 5.2.2 Have the communication channels with stakeholders worked as expected? 238 | #### 5.2.3 Has the time plan been met? If not, why? 239 | #### 5.2.4 Have all the relevant documents been in place and approved? 240 | #### 5.2.5 What is the go/no-go decision and reason behind it? 241 | #### 5.2.6 What is the feedback from SME or end user on the evaluation result? 242 | 243 | ### 5.3 Document review and artifact archiving 244 | #### 5.3.1 Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 245 | #### 5.3.2 What artifacts (document, code and data) are achieved in this phase? How are they archived for reuse and future reference? 246 | 247 | ## 6 Deployment and Monitoring 248 | 249 | ### 6.1 Deploying model 250 | #### 6.1.1 How is the chosen model implemented/deployed? 251 | #### 6.1.2 How is the result to be presented to the end user? Is there a graphic way to do it? 252 | #### 6.1.3 If deployed as SW, does the Statoil architecture contract been met? 253 | #### 6.1.4 Which TRL (Technology readiness level) level is the SW classified as? 254 | #### 6.1.5 How are the results to be interpreted or utilized? 255 | #### 6.1.6 Are there IP governance issues? 256 | 257 | ### 6.2 Managing the process change 258 | #### 6.2.1 What are the changes the new DS solution brings to the business process? 259 | #### 6.2.2 What are the efforts/process to adopt the new solution by the management? 260 | #### 6.2.3 What effort is used to train the end user to use the new solution? 261 | 262 | ### 6.3 Monitoring and maintaining DS model 263 | #### 6.3.1 What is the operation/maintenance plan for the DS solution? 264 | #### 6.3.2 How often shall the model be re-trained and re-deployed? Automatically or manually? 265 | #### 6.3.3 What is the usage monitoring plan for the DS solution? 266 | #### 6.3.4 Are the DS solution constraints/limitations under monitoring? 267 | #### 6.3.5 What is the user feedback loop? 268 | 269 | ### 6.4 Knowledge sharing 270 | #### 6.4.1 What can be learned from the project process? 271 | #### 6.4.2 How can the experience be shared with others and retrieved for future reference? 272 | #### 6.4.3 What DS components (knowledge, product, process and data) can we reuse or share? 273 | #### 6.4.4 What improvement suggestions do you have for this DS standard template? 274 | 275 | ### 6.5 Document review 276 | #### 6.5.1 Has the document (answers to the questions) in this phase been reviewed and approved? If not, document the reason. (this question shall be answered the same number of times as the document review rounds for this phase) 277 | -------------------------------------------------------------------------------- /{{cookiecutter.repo_name}}/LICENSE: -------------------------------------------------------------------------------- 1 | {% if cookiecutter.open_source_license == 'MIT' %} 2 | The MIT License (MIT) 3 | Copyright (c) {% now 'utc', '%Y' %}, {{ cookiecutter.author }} 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | 11 | {% elif cookiecutter.open_source_license == 'LGPL3' %} 12 | GNU LESSER GENERAL PUBLIC LICENSE 13 | Version 3, 29 June 2007 14 | 15 | Copyright (C) 2007 Free Software Foundation, Inc. 16 | Everyone is permitted to copy and distribute verbatim copies 17 | of this license document, but changing it is not allowed. 18 | 19 | 20 | This version of the GNU Lesser General Public License incorporates 21 | the terms and conditions of version 3 of the GNU General Public 22 | License, supplemented by the additional permissions listed below. 23 | 24 | 0. Additional Definitions. 25 | 26 | As used herein, "this License" refers to version 3 of the GNU Lesser 27 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 28 | General Public License. 29 | 30 | "The Library" refers to a covered work governed by this License, 31 | other than an Application or a Combined Work as defined below. 32 | 33 | An "Application" is any work that makes use of an interface provided 34 | by the Library, but which is not otherwise based on the Library. 35 | Defining a subclass of a class defined by the Library is deemed a mode 36 | of using an interface provided by the Library. 37 | 38 | A "Combined Work" is a work produced by combining or linking an 39 | Application with the Library. The particular version of the Library 40 | with which the Combined Work was made is also called the "Linked 41 | Version". 42 | 43 | The "Minimal Corresponding Source" for a Combined Work means the 44 | Corresponding Source for the Combined Work, excluding any source code 45 | for portions of the Combined Work that, considered in isolation, are 46 | based on the Application, and not on the Linked Version. 47 | 48 | The "Corresponding Application Code" for a Combined Work means the 49 | object code and/or source code for the Application, including any data 50 | and utility programs needed for reproducing the Combined Work from the 51 | Application, but excluding the System Libraries of the Combined Work. 52 | 53 | 1. Exception to Section 3 of the GNU GPL. 54 | 55 | You may convey a covered work under sections 3 and 4 of this License 56 | without being bound by section 3 of the GNU GPL. 57 | 58 | 2. Conveying Modified Versions. 59 | 60 | If you modify a copy of the Library, and, in your modifications, a 61 | facility refers to a function or data to be supplied by an Application 62 | that uses the facility (other than as an argument passed when the 63 | facility is invoked), then you may convey a copy of the modified 64 | version: 65 | 66 | a) under this License, provided that you make a good faith effort to 67 | ensure that, in the event an Application does not supply the 68 | function or data, the facility still operates, and performs 69 | whatever part of its purpose remains meaningful, or 70 | 71 | b) under the GNU GPL, with none of the additional permissions of 72 | this License applicable to that copy. 73 | 74 | 3. Object Code Incorporating Material from Library Header Files. 75 | 76 | The object code form of an Application may incorporate material from 77 | a header file that is part of the Library. You may convey such object 78 | code under terms of your choice, provided that, if the incorporated 79 | material is not limited to numerical parameters, data structure 80 | layouts and accessors, or small macros, inline functions and templates 81 | (ten or fewer lines in length), you do both of the following: 82 | 83 | a) Give prominent notice with each copy of the object code that the 84 | Library is used in it and that the Library and its use are 85 | covered by this License. 86 | 87 | b) Accompany the object code with a copy of the GNU GPL and this license 88 | document. 89 | 90 | 4. Combined Works. 91 | 92 | You may convey a Combined Work under terms of your choice that, 93 | taken together, effectively do not restrict modification of the 94 | portions of the Library contained in the Combined Work and reverse 95 | engineering for debugging such modifications, if you also do each of 96 | the following: 97 | 98 | a) Give prominent notice with each copy of the Combined Work that 99 | the Library is used in it and that the Library and its use are 100 | covered by this License. 101 | 102 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 103 | document. 104 | 105 | c) For a Combined Work that displays copyright notices during 106 | execution, include the copyright notice for the Library among 107 | these notices, as well as a reference directing the user to the 108 | copies of the GNU GPL and this license document. 109 | 110 | d) Do one of the following: 111 | 112 | 0) Convey the Minimal Corresponding Source under the terms of this 113 | License, and the Corresponding Application Code in a form 114 | suitable for, and under terms that permit, the user to 115 | recombine or relink the Application with a modified version of 116 | the Linked Version to produce a modified Combined Work, in the 117 | manner specified by section 6 of the GNU GPL for conveying 118 | Corresponding Source. 119 | 120 | 1) Use a suitable shared library mechanism for linking with the 121 | Library. A suitable mechanism is one that (a) uses at run time 122 | a copy of the Library already present on the user's computer 123 | system, and (b) will operate properly with a modified version 124 | of the Library that is interface-compatible with the Linked 125 | Version. 126 | 127 | e) Provide Installation Information, but only if you would otherwise 128 | be required to provide such information under section 6 of the 129 | GNU GPL, and only to the extent that such information is 130 | necessary to install and execute a modified version of the 131 | Combined Work produced by recombining or relinking the 132 | Application with a modified version of the Linked Version. (If 133 | you use option 4d0, the Installation Information must accompany 134 | the Minimal Corresponding Source and Corresponding Application 135 | Code. If you use option 4d1, you must provide the Installation 136 | Information in the manner specified by section 6 of the GNU GPL 137 | for conveying Corresponding Source.) 138 | 139 | 5. Combined Libraries. 140 | 141 | You may place library facilities that are a work based on the 142 | Library side by side in a single library together with other library 143 | facilities that are not Applications and are not covered by this 144 | License, and convey such a combined library under terms of your 145 | choice, if you do both of the following: 146 | 147 | a) Accompany the combined library with a copy of the same work based 148 | on the Library, uncombined with any other library facilities, 149 | conveyed under the terms of this License. 150 | 151 | b) Give prominent notice with the combined library that part of it 152 | is a work based on the Library, and explaining where to find the 153 | accompanying uncombined form of the same work. 154 | 155 | 6. Revised Versions of the GNU Lesser General Public License. 156 | 157 | The Free Software Foundation may publish revised and/or new versions 158 | of the GNU Lesser General Public License from time to time. Such new 159 | versions will be similar in spirit to the present version, but may 160 | differ in detail to address new problems or concerns. 161 | 162 | Each version is given a distinguishing version number. If the 163 | Library as you received it specifies that a certain numbered version 164 | of the GNU Lesser General Public License "or any later version" 165 | applies to it, you have the option of following the terms and 166 | conditions either of that published version or of any later version 167 | published by the Free Software Foundation. If the Library as you 168 | received it does not specify a version number of the GNU Lesser 169 | General Public License, you may choose any version of the GNU Lesser 170 | General Public License ever published by the Free Software Foundation. 171 | 172 | If the Library as you received it specifies that a proxy can decide 173 | whether future versions of the GNU Lesser General Public License shall 174 | apply, that proxy's public statement of acceptance of any version is 175 | permanent authorization for you to choose that version for the 176 | Library. 177 | 178 | {% elif cookiecutter.open_source_license == 'GPL3' %} 179 | GNU GENERAL PUBLIC LICENSE 180 | Version 3, 29 June 2007 181 | 182 | Copyright (C) 2007 Free Software Foundation, Inc. 183 | Everyone is permitted to copy and distribute verbatim copies 184 | of this license document, but changing it is not allowed. 185 | 186 | Preamble 187 | 188 | The GNU General Public License is a free, copyleft license for 189 | software and other kinds of works. 190 | 191 | The licenses for most software and other practical works are designed 192 | to take away your freedom to share and change the works. By contrast, 193 | the GNU General Public License is intended to guarantee your freedom to 194 | share and change all versions of a program--to make sure it remains free 195 | software for all its users. We, the Free Software Foundation, use the 196 | GNU General Public License for most of our software; it applies also to 197 | any other work released this way by its authors. You can apply it to 198 | your programs, too. 199 | 200 | When we speak of free software, we are referring to freedom, not 201 | price. Our General Public Licenses are designed to make sure that you 202 | have the freedom to distribute copies of free software (and charge for 203 | them if you wish), that you receive source code or can get it if you 204 | want it, that you can change the software or use pieces of it in new 205 | free programs, and that you know you can do these things. 206 | 207 | To protect your rights, we need to prevent others from denying you 208 | these rights or asking you to surrender the rights. Therefore, you have 209 | certain responsibilities if you distribute copies of the software, or if 210 | you modify it: responsibilities to respect the freedom of others. 211 | 212 | For example, if you distribute copies of such a program, whether 213 | gratis or for a fee, you must pass on to the recipients the same 214 | freedoms that you received. You must make sure that they, too, receive 215 | or can get the source code. And you must show them these terms so they 216 | know their rights. 217 | 218 | Developers that use the GNU GPL protect your rights with two steps: 219 | (1) assert copyright on the software, and (2) offer you this License 220 | giving you legal permission to copy, distribute and/or modify it. 221 | 222 | For the developers' and authors' protection, the GPL clearly explains 223 | that there is no warranty for this free software. For both users' and 224 | authors' sake, the GPL requires that modified versions be marked as 225 | changed, so that their problems will not be attributed erroneously to 226 | authors of previous versions. 227 | 228 | Some devices are designed to deny users access to install or run 229 | modified versions of the software inside them, although the manufacturer 230 | can do so. This is fundamentally incompatible with the aim of 231 | protecting users' freedom to change the software. The systematic 232 | pattern of such abuse occurs in the area of products for individuals to 233 | use, which is precisely where it is most unacceptable. Therefore, we 234 | have designed this version of the GPL to prohibit the practice for those 235 | products. If such problems arise substantially in other domains, we 236 | stand ready to extend this provision to those domains in future versions 237 | of the GPL, as needed to protect the freedom of users. 238 | 239 | Finally, every program is threatened constantly by software patents. 240 | States should not allow patents to restrict development and use of 241 | software on general-purpose computers, but in those that do, we wish to 242 | avoid the special danger that patents applied to a free program could 243 | make it effectively proprietary. To prevent this, the GPL assures that 244 | patents cannot be used to render the program non-free. 245 | 246 | The precise terms and conditions for copying, distribution and 247 | modification follow. 248 | 249 | TERMS AND CONDITIONS 250 | 251 | 0. Definitions. 252 | 253 | "This License" refers to version 3 of the GNU General Public License. 254 | 255 | "Copyright" also means copyright-like laws that apply to other kinds of 256 | works, such as semiconductor masks. 257 | 258 | "The Program" refers to any copyrightable work licensed under this 259 | License. Each licensee is addressed as "you". "Licensees" and 260 | "recipients" may be individuals or organizations. 261 | 262 | To "modify" a work means to copy from or adapt all or part of the work 263 | in a fashion requiring copyright permission, other than the making of an 264 | exact copy. The resulting work is called a "modified version" of the 265 | earlier work or a work "based on" the earlier work. 266 | 267 | A "covered work" means either the unmodified Program or a work based 268 | on the Program. 269 | 270 | To "propagate" a work means to do anything with it that, without 271 | permission, would make you directly or secondarily liable for 272 | infringement under applicable copyright law, except executing it on a 273 | computer or modifying a private copy. Propagation includes copying, 274 | distribution (with or without modification), making available to the 275 | public, and in some countries other activities as well. 276 | 277 | To "convey" a work means any kind of propagation that enables other 278 | parties to make or receive copies. Mere interaction with a user through 279 | a computer network, with no transfer of a copy, is not conveying. 280 | 281 | An interactive user interface displays "Appropriate Legal Notices" 282 | to the extent that it includes a convenient and prominently visible 283 | feature that (1) displays an appropriate copyright notice, and (2) 284 | tells the user that there is no warranty for the work (except to the 285 | extent that warranties are provided), that licensees may convey the 286 | work under this License, and how to view a copy of this License. If 287 | the interface presents a list of user commands or options, such as a 288 | menu, a prominent item in the list meets this criterion. 289 | 290 | 1. Source Code. 291 | 292 | The "source code" for a work means the preferred form of the work 293 | for making modifications to it. "Object code" means any non-source 294 | form of a work. 295 | 296 | A "Standard Interface" means an interface that either is an official 297 | standard defined by a recognized standards body, or, in the case of 298 | interfaces specified for a particular programming language, one that 299 | is widely used among developers working in that language. 300 | 301 | The "System Libraries" of an executable work include anything, other 302 | than the work as a whole, that (a) is included in the normal form of 303 | packaging a Major Component, but which is not part of that Major 304 | Component, and (b) serves only to enable use of the work with that 305 | Major Component, or to implement a Standard Interface for which an 306 | implementation is available to the public in source code form. A 307 | "Major Component", in this context, means a major essential component 308 | (kernel, window system, and so on) of the specific operating system 309 | (if any) on which the executable work runs, or a compiler used to 310 | produce the work, or an object code interpreter used to run it. 311 | 312 | The "Corresponding Source" for a work in object code form means all 313 | the source code needed to generate, install, and (for an executable 314 | work) run the object code and to modify the work, including scripts to 315 | control those activities. However, it does not include the work's 316 | System Libraries, or general-purpose tools or generally available free 317 | programs which are used unmodified in performing those activities but 318 | which are not part of the work. For example, Corresponding Source 319 | includes interface definition files associated with source files for 320 | the work, and the source code for shared libraries and dynamically 321 | linked subprograms that the work is specifically designed to require, 322 | such as by intimate data communication or control flow between those 323 | subprograms and other parts of the work. 324 | 325 | The Corresponding Source need not include anything that users 326 | can regenerate automatically from other parts of the Corresponding 327 | Source. 328 | 329 | The Corresponding Source for a work in source code form is that 330 | same work. 331 | 332 | 2. Basic Permissions. 333 | 334 | All rights granted under this License are granted for the term of 335 | copyright on the Program, and are irrevocable provided the stated 336 | conditions are met. This License explicitly affirms your unlimited 337 | permission to run the unmodified Program. The output from running a 338 | covered work is covered by this License only if the output, given its 339 | content, constitutes a covered work. This License acknowledges your 340 | rights of fair use or other equivalent, as provided by copyright law. 341 | 342 | You may make, run and propagate covered works that you do not 343 | convey, without conditions so long as your license otherwise remains 344 | in force. You may convey covered works to others for the sole purpose 345 | of having them make modifications exclusively for you, or provide you 346 | with facilities for running those works, provided that you comply with 347 | the terms of this License in conveying all material for which you do 348 | not control copyright. Those thus making or running the covered works 349 | for you must do so exclusively on your behalf, under your direction 350 | and control, on terms that prohibit them from making any copies of 351 | your copyrighted material outside their relationship with you. 352 | 353 | Conveying under any other circumstances is permitted solely under 354 | the conditions stated below. Sublicensing is not allowed; section 10 355 | makes it unnecessary. 356 | 357 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 358 | 359 | No covered work shall be deemed part of an effective technological 360 | measure under any applicable law fulfilling obligations under article 361 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 362 | similar laws prohibiting or restricting circumvention of such 363 | measures. 364 | 365 | When you convey a covered work, you waive any legal power to forbid 366 | circumvention of technological measures to the extent such circumvention 367 | is effected by exercising rights under this License with respect to 368 | the covered work, and you disclaim any intention to limit operation or 369 | modification of the work as a means of enforcing, against the work's 370 | users, your or third parties' legal rights to forbid circumvention of 371 | technological measures. 372 | 373 | 4. Conveying Verbatim Copies. 374 | 375 | You may convey verbatim copies of the Program's source code as you 376 | receive it, in any medium, provided that you conspicuously and 377 | appropriately publish on each copy an appropriate copyright notice; 378 | keep intact all notices stating that this License and any 379 | non-permissive terms added in accord with section 7 apply to the code; 380 | keep intact all notices of the absence of any warranty; and give all 381 | recipients a copy of this License along with the Program. 382 | 383 | You may charge any price or no price for each copy that you convey, 384 | and you may offer support or warranty protection for a fee. 385 | 386 | 5. Conveying Modified Source Versions. 387 | 388 | You may convey a work based on the Program, or the modifications to 389 | produce it from the Program, in the form of source code under the 390 | terms of section 4, provided that you also meet all of these conditions: 391 | 392 | a) The work must carry prominent notices stating that you modified 393 | it, and giving a relevant date. 394 | 395 | b) The work must carry prominent notices stating that it is 396 | released under this License and any conditions added under section 397 | 7. This requirement modifies the requirement in section 4 to 398 | "keep intact all notices". 399 | 400 | c) You must license the entire work, as a whole, under this 401 | License to anyone who comes into possession of a copy. This 402 | License will therefore apply, along with any applicable section 7 403 | additional terms, to the whole of the work, and all its parts, 404 | regardless of how they are packaged. This License gives no 405 | permission to license the work in any other way, but it does not 406 | invalidate such permission if you have separately received it. 407 | 408 | d) If the work has interactive user interfaces, each must display 409 | Appropriate Legal Notices; however, if the Program has interactive 410 | interfaces that do not display Appropriate Legal Notices, your 411 | work need not make them do so. 412 | 413 | A compilation of a covered work with other separate and independent 414 | works, which are not by their nature extensions of the covered work, 415 | and which are not combined with it such as to form a larger program, 416 | in or on a volume of a storage or distribution medium, is called an 417 | "aggregate" if the compilation and its resulting copyright are not 418 | used to limit the access or legal rights of the compilation's users 419 | beyond what the individual works permit. Inclusion of a covered work 420 | in an aggregate does not cause this License to apply to the other 421 | parts of the aggregate. 422 | 423 | 6. Conveying Non-Source Forms. 424 | 425 | You may convey a covered work in object code form under the terms 426 | of sections 4 and 5, provided that you also convey the 427 | machine-readable Corresponding Source under the terms of this License, 428 | in one of these ways: 429 | 430 | a) Convey the object code in, or embodied in, a physical product 431 | (including a physical distribution medium), accompanied by the 432 | Corresponding Source fixed on a durable physical medium 433 | customarily used for software interchange. 434 | 435 | b) Convey the object code in, or embodied in, a physical product 436 | (including a physical distribution medium), accompanied by a 437 | written offer, valid for at least three years and valid for as 438 | long as you offer spare parts or customer support for that product 439 | model, to give anyone who possesses the object code either (1) a 440 | copy of the Corresponding Source for all the software in the 441 | product that is covered by this License, on a durable physical 442 | medium customarily used for software interchange, for a price no 443 | more than your reasonable cost of physically performing this 444 | conveying of source, or (2) access to copy the 445 | Corresponding Source from a network server at no charge. 446 | 447 | c) Convey individual copies of the object code with a copy of the 448 | written offer to provide the Corresponding Source. This 449 | alternative is allowed only occasionally and noncommercially, and 450 | only if you received the object code with such an offer, in accord 451 | with subsection 6b. 452 | 453 | d) Convey the object code by offering access from a designated 454 | place (gratis or for a charge), and offer equivalent access to the 455 | Corresponding Source in the same way through the same place at no 456 | further charge. You need not require recipients to copy the 457 | Corresponding Source along with the object code. If the place to 458 | copy the object code is a network server, the Corresponding Source 459 | may be on a different server (operated by you or a third party) 460 | that supports equivalent copying facilities, provided you maintain 461 | clear directions next to the object code saying where to find the 462 | Corresponding Source. Regardless of what server hosts the 463 | Corresponding Source, you remain obligated to ensure that it is 464 | available for as long as needed to satisfy these requirements. 465 | 466 | e) Convey the object code using peer-to-peer transmission, provided 467 | you inform other peers where the object code and Corresponding 468 | Source of the work are being offered to the general public at no 469 | charge under subsection 6d. 470 | 471 | A separable portion of the object code, whose source code is excluded 472 | from the Corresponding Source as a System Library, need not be 473 | included in conveying the object code work. 474 | 475 | A "User Product" is either (1) a "consumer product", which means any 476 | tangible personal property which is normally used for personal, family, 477 | or household purposes, or (2) anything designed or sold for incorporation 478 | into a dwelling. In determining whether a product is a consumer product, 479 | doubtful cases shall be resolved in favor of coverage. For a particular 480 | product received by a particular user, "normally used" refers to a 481 | typical or common use of that class of product, regardless of the status 482 | of the particular user or of the way in which the particular user 483 | actually uses, or expects or is expected to use, the product. A product 484 | is a consumer product regardless of whether the product has substantial 485 | commercial, industrial or non-consumer uses, unless such uses represent 486 | the only significant mode of use of the product. 487 | 488 | "Installation Information" for a User Product means any methods, 489 | procedures, authorization keys, or other information required to install 490 | and execute modified versions of a covered work in that User Product from 491 | a modified version of its Corresponding Source. The information must 492 | suffice to ensure that the continued functioning of the modified object 493 | code is in no case prevented or interfered with solely because 494 | modification has been made. 495 | 496 | If you convey an object code work under this section in, or with, or 497 | specifically for use in, a User Product, and the conveying occurs as 498 | part of a transaction in which the right of possession and use of the 499 | User Product is transferred to the recipient in perpetuity or for a 500 | fixed term (regardless of how the transaction is characterized), the 501 | Corresponding Source conveyed under this section must be accompanied 502 | by the Installation Information. But this requirement does not apply 503 | if neither you nor any third party retains the ability to install 504 | modified object code on the User Product (for example, the work has 505 | been installed in ROM). 506 | 507 | The requirement to provide Installation Information does not include a 508 | requirement to continue to provide support service, warranty, or updates 509 | for a work that has been modified or installed by the recipient, or for 510 | the User Product in which it has been modified or installed. Access to a 511 | network may be denied when the modification itself materially and 512 | adversely affects the operation of the network or violates the rules and 513 | protocols for communication across the network. 514 | 515 | Corresponding Source conveyed, and Installation Information provided, 516 | in accord with this section must be in a format that is publicly 517 | documented (and with an implementation available to the public in 518 | source code form), and must require no special password or key for 519 | unpacking, reading or copying. 520 | 521 | 7. Additional Terms. 522 | 523 | "Additional permissions" are terms that supplement the terms of this 524 | License by making exceptions from one or more of its conditions. 525 | Additional permissions that are applicable to the entire Program shall 526 | be treated as though they were included in this License, to the extent 527 | that they are valid under applicable law. If additional permissions 528 | apply only to part of the Program, that part may be used separately 529 | under those permissions, but the entire Program remains governed by 530 | this License without regard to the additional permissions. 531 | 532 | When you convey a copy of a covered work, you may at your option 533 | remove any additional permissions from that copy, or from any part of 534 | it. (Additional permissions may be written to require their own 535 | removal in certain cases when you modify the work.) You may place 536 | additional permissions on material, added by you to a covered work, 537 | for which you have or can give appropriate copyright permission. 538 | 539 | Notwithstanding any other provision of this License, for material you 540 | add to a covered work, you may (if authorized by the copyright holders of 541 | that material) supplement the terms of this License with terms: 542 | 543 | a) Disclaiming warranty or limiting liability differently from the 544 | terms of sections 15 and 16 of this License; or 545 | 546 | b) Requiring preservation of specified reasonable legal notices or 547 | author attributions in that material or in the Appropriate Legal 548 | Notices displayed by works containing it; or 549 | 550 | c) Prohibiting misrepresentation of the origin of that material, or 551 | requiring that modified versions of such material be marked in 552 | reasonable ways as different from the original version; or 553 | 554 | d) Limiting the use for publicity purposes of names of licensors or 555 | authors of the material; or 556 | 557 | e) Declining to grant rights under trademark law for use of some 558 | trade names, trademarks, or service marks; or 559 | 560 | f) Requiring indemnification of licensors and authors of that 561 | material by anyone who conveys the material (or modified versions of 562 | it) with contractual assumptions of liability to the recipient, for 563 | any liability that these contractual assumptions directly impose on 564 | those licensors and authors. 565 | 566 | All other non-permissive additional terms are considered "further 567 | restrictions" within the meaning of section 10. If the Program as you 568 | received it, or any part of it, contains a notice stating that it is 569 | governed by this License along with a term that is a further 570 | restriction, you may remove that term. If a license document contains 571 | a further restriction but permits relicensing or conveying under this 572 | License, you may add to a covered work material governed by the terms 573 | of that license document, provided that the further restriction does 574 | not survive such relicensing or conveying. 575 | 576 | If you add terms to a covered work in accord with this section, you 577 | must place, in the relevant source files, a statement of the 578 | additional terms that apply to those files, or a notice indicating 579 | where to find the applicable terms. 580 | 581 | Additional terms, permissive or non-permissive, may be stated in the 582 | form of a separately written license, or stated as exceptions; 583 | the above requirements apply either way. 584 | 585 | 8. Termination. 586 | 587 | You may not propagate or modify a covered work except as expressly 588 | provided under this License. Any attempt otherwise to propagate or 589 | modify it is void, and will automatically terminate your rights under 590 | this License (including any patent licenses granted under the third 591 | paragraph of section 11). 592 | 593 | However, if you cease all violation of this License, then your 594 | license from a particular copyright holder is reinstated (a) 595 | provisionally, unless and until the copyright holder explicitly and 596 | finally terminates your license, and (b) permanently, if the copyright 597 | holder fails to notify you of the violation by some reasonable means 598 | prior to 60 days after the cessation. 599 | 600 | Moreover, your license from a particular copyright holder is 601 | reinstated permanently if the copyright holder notifies you of the 602 | violation by some reasonable means, this is the first time you have 603 | received notice of violation of this License (for any work) from that 604 | copyright holder, and you cure the violation prior to 30 days after 605 | your receipt of the notice. 606 | 607 | Termination of your rights under this section does not terminate the 608 | licenses of parties who have received copies or rights from you under 609 | this License. If your rights have been terminated and not permanently 610 | reinstated, you do not qualify to receive new licenses for the same 611 | material under section 10. 612 | 613 | 9. Acceptance Not Required for Having Copies. 614 | 615 | You are not required to accept this License in order to receive or 616 | run a copy of the Program. Ancillary propagation of a covered work 617 | occurring solely as a consequence of using peer-to-peer transmission 618 | to receive a copy likewise does not require acceptance. However, 619 | nothing other than this License grants you permission to propagate or 620 | modify any covered work. These actions infringe copyright if you do 621 | not accept this License. Therefore, by modifying or propagating a 622 | covered work, you indicate your acceptance of this License to do so. 623 | 624 | 10. Automatic Licensing of Downstream Recipients. 625 | 626 | Each time you convey a covered work, the recipient automatically 627 | receives a license from the original licensors, to run, modify and 628 | propagate that work, subject to this License. You are not responsible 629 | for enforcing compliance by third parties with this License. 630 | 631 | An "entity transaction" is a transaction transferring control of an 632 | organization, or substantially all assets of one, or subdividing an 633 | organization, or merging organizations. If propagation of a covered 634 | work results from an entity transaction, each party to that 635 | transaction who receives a copy of the work also receives whatever 636 | licenses to the work the party's predecessor in interest had or could 637 | give under the previous paragraph, plus a right to possession of the 638 | Corresponding Source of the work from the predecessor in interest, if 639 | the predecessor has it or can get it with reasonable efforts. 640 | 641 | You may not impose any further restrictions on the exercise of the 642 | rights granted or affirmed under this License. For example, you may 643 | not impose a license fee, royalty, or other charge for exercise of 644 | rights granted under this License, and you may not initiate litigation 645 | (including a cross-claim or counterclaim in a lawsuit) alleging that 646 | any patent claim is infringed by making, using, selling, offering for 647 | sale, or importing the Program or any portion of it. 648 | 649 | 11. Patents. 650 | 651 | A "contributor" is a copyright holder who authorizes use under this 652 | License of the Program or a work on which the Program is based. The 653 | work thus licensed is called the contributor's "contributor version". 654 | 655 | A contributor's "essential patent claims" are all patent claims 656 | owned or controlled by the contributor, whether already acquired or 657 | hereafter acquired, that would be infringed by some manner, permitted 658 | by this License, of making, using, or selling its contributor version, 659 | but do not include claims that would be infringed only as a 660 | consequence of further modification of the contributor version. For 661 | purposes of this definition, "control" includes the right to grant 662 | patent sublicenses in a manner consistent with the requirements of 663 | this License. 664 | 665 | Each contributor grants you a non-exclusive, worldwide, royalty-free 666 | patent license under the contributor's essential patent claims, to 667 | make, use, sell, offer for sale, import and otherwise run, modify and 668 | propagate the contents of its contributor version. 669 | 670 | In the following three paragraphs, a "patent license" is any express 671 | agreement or commitment, however denominated, not to enforce a patent 672 | (such as an express permission to practice a patent or covenant not to 673 | sue for patent infringement). To "grant" such a patent license to a 674 | party means to make such an agreement or commitment not to enforce a 675 | patent against the party. 676 | 677 | If you convey a covered work, knowingly relying on a patent license, 678 | and the Corresponding Source of the work is not available for anyone 679 | to copy, free of charge and under the terms of this License, through a 680 | publicly available network server or other readily accessible means, 681 | then you must either (1) cause the Corresponding Source to be so 682 | available, or (2) arrange to deprive yourself of the benefit of the 683 | patent license for this particular work, or (3) arrange, in a manner 684 | consistent with the requirements of this License, to extend the patent 685 | license to downstream recipients. "Knowingly relying" means you have 686 | actual knowledge that, but for the patent license, your conveying the 687 | covered work in a country, or your recipient's use of the covered work 688 | in a country, would infringe one or more identifiable patents in that 689 | country that you have reason to believe are valid. 690 | 691 | If, pursuant to or in connection with a single transaction or 692 | arrangement, you convey, or propagate by procuring conveyance of, a 693 | covered work, and grant a patent license to some of the parties 694 | receiving the covered work authorizing them to use, propagate, modify 695 | or convey a specific copy of the covered work, then the patent license 696 | you grant is automatically extended to all recipients of the covered 697 | work and works based on it. 698 | 699 | A patent license is "discriminatory" if it does not include within 700 | the scope of its coverage, prohibits the exercise of, or is 701 | conditioned on the non-exercise of one or more of the rights that are 702 | specifically granted under this License. You may not convey a covered 703 | work if you are a party to an arrangement with a third party that is 704 | in the business of distributing software, under which you make payment 705 | to the third party based on the extent of your activity of conveying 706 | the work, and under which the third party grants, to any of the 707 | parties who would receive the covered work from you, a discriminatory 708 | patent license (a) in connection with copies of the covered work 709 | conveyed by you (or copies made from those copies), or (b) primarily 710 | for and in connection with specific products or compilations that 711 | contain the covered work, unless you entered into that arrangement, 712 | or that patent license was granted, prior to 28 March 2007. 713 | 714 | Nothing in this License shall be construed as excluding or limiting 715 | any implied license or other defenses to infringement that may 716 | otherwise be available to you under applicable patent law. 717 | 718 | 12. No Surrender of Others' Freedom. 719 | 720 | If conditions are imposed on you (whether by court order, agreement or 721 | otherwise) that contradict the conditions of this License, they do not 722 | excuse you from the conditions of this License. If you cannot convey a 723 | covered work so as to satisfy simultaneously your obligations under this 724 | License and any other pertinent obligations, then as a consequence you may 725 | not convey it at all. For example, if you agree to terms that obligate you 726 | to collect a royalty for further conveying from those to whom you convey 727 | the Program, the only way you could satisfy both those terms and this 728 | License would be to refrain entirely from conveying the Program. 729 | 730 | 13. Use with the GNU Affero General Public License. 731 | 732 | Notwithstanding any other provision of this License, you have 733 | permission to link or combine any covered work with a work licensed 734 | under version 3 of the GNU Affero General Public License into a single 735 | combined work, and to convey the resulting work. The terms of this 736 | License will continue to apply to the part which is the covered work, 737 | but the special requirements of the GNU Affero General Public License, 738 | section 13, concerning interaction through a network will apply to the 739 | combination as such. 740 | 741 | 14. Revised Versions of this License. 742 | 743 | The Free Software Foundation may publish revised and/or new versions of 744 | the GNU General Public License from time to time. Such new versions will 745 | be similar in spirit to the present version, but may differ in detail to 746 | address new problems or concerns. 747 | 748 | Each version is given a distinguishing version number. If the 749 | Program specifies that a certain numbered version of the GNU General 750 | Public License "or any later version" applies to it, you have the 751 | option of following the terms and conditions either of that numbered 752 | version or of any later version published by the Free Software 753 | Foundation. If the Program does not specify a version number of the 754 | GNU General Public License, you may choose any version ever published 755 | by the Free Software Foundation. 756 | 757 | If the Program specifies that a proxy can decide which future 758 | versions of the GNU General Public License can be used, that proxy's 759 | public statement of acceptance of a version permanently authorizes you 760 | to choose that version for the Program. 761 | 762 | Later license versions may give you additional or different 763 | permissions. However, no additional obligations are imposed on any 764 | author or copyright holder as a result of your choosing to follow a 765 | later version. 766 | 767 | 15. Disclaimer of Warranty. 768 | 769 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 770 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 771 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 772 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 773 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 774 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 775 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 776 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 777 | 778 | 16. Limitation of Liability. 779 | 780 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 781 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 782 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 783 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 784 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 785 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 786 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 787 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 788 | SUCH DAMAGES. 789 | 790 | 17. Interpretation of Sections 15 and 16. 791 | 792 | If the disclaimer of warranty and limitation of liability provided 793 | above cannot be given local legal effect according to their terms, 794 | reviewing courts shall apply local law that most closely approximates 795 | an absolute waiver of all civil liability in connection with the 796 | Program, unless a warranty or assumption of liability accompanies a 797 | copy of the Program in return for a fee. 798 | 799 | END OF TERMS AND CONDITIONS 800 | 801 | How to Apply These Terms to Your New Programs 802 | 803 | If you develop a new program, and you want it to be of the greatest 804 | possible use to the public, the best way to achieve this is to make it 805 | free software which everyone can redistribute and change under these terms. 806 | 807 | To do so, attach the following notices to the program. It is safest 808 | to attach them to the start of each source file to most effectively 809 | state the exclusion of warranty; and each file should have at least 810 | the "copyright" line and a pointer to where the full notice is found. 811 | 812 | 813 | Copyright (C) 814 | 815 | This program is free software: you can redistribute it and/or modify 816 | it under the terms of the GNU General Public License as published by 817 | the Free Software Foundation, either version 3 of the License, or 818 | (at your option) any later version. 819 | 820 | This program is distributed in the hope that it will be useful, 821 | but WITHOUT ANY WARRANTY; without even the implied warranty of 822 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 823 | GNU General Public License for more details. 824 | 825 | You should have received a copy of the GNU General Public License 826 | along with this program. If not, see . 827 | 828 | Also add information on how to contact you by electronic and paper mail. 829 | 830 | If the program does terminal interaction, make it output a short 831 | notice like this when it starts in an interactive mode: 832 | 833 | Copyright (C) 834 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 835 | This is free software, and you are welcome to redistribute it 836 | under certain conditions; type `show c' for details. 837 | 838 | The hypothetical commands `show w' and `show c' should show the appropriate 839 | parts of the General Public License. Of course, your program's commands 840 | might be different; for a GUI interface, you would use an "about box". 841 | 842 | You should also get your employer (if you work as a programmer) or school, 843 | if any, to sign a "copyright disclaimer" for the program, if necessary. 844 | For more information on this, and how to apply and follow the GNU GPL, see 845 | . 846 | 847 | The GNU General Public License does not permit incorporating your program 848 | into proprietary programs. If your program is a subroutine library, you 849 | may consider it more useful to permit linking proprietary applications with 850 | the library. If this is what you want to do, use the GNU Lesser General 851 | Public License instead of this License. But first, please read 852 | . 853 | 854 | {% endif %} 855 | --------------------------------------------------------------------------------