├── data ├── .gitkeep └── external │ └── .gitkeep ├── config ├── README.md ├── logging │ ├── local.conf │ └── testing.conf └── example-training-config.yml ├── figures └── .gitkeep ├── src ├── __init__.py ├── postprocess.py ├── sql │ └── .gitkeep ├── evaluate_model.py ├── helpers │ ├── __init__.py │ └── helpers.py ├── score_model.py ├── generate_features.py ├── train_model.py └── load_data.py ├── test ├── __init__.py ├── true │ ├── .gitkeep │ ├── example-boston-validate.csv │ ├── example-boston-test.csv │ ├── example-boston-validate-targets.csv │ ├── example-boston-test-targets.csv │ ├── example-boston-train.csv │ ├── example-boston-train-targets.csv │ ├── example-boston-validate-features.csv │ └── example-boston-test-features.csv ├── test_config.yml ├── README.md └── test.py ├── deliverables └── .gitkeep ├── references └── .gitkeep ├── docs ├── build │ ├── html │ │ ├── .nojekyll │ │ ├── objects.inv │ │ ├── _static │ │ │ ├── up.png │ │ │ ├── down.png │ │ │ ├── file.png │ │ │ ├── minus.png │ │ │ ├── plus.png │ │ │ ├── comment.png │ │ │ ├── up-pressed.png │ │ │ ├── ajax-loader.gif │ │ │ ├── comment-close.png │ │ │ ├── down-pressed.png │ │ │ ├── comment-bright.png │ │ │ ├── fonts │ │ │ │ ├── Lato │ │ │ │ │ ├── lato-bold.eot │ │ │ │ │ ├── lato-bold.ttf │ │ │ │ │ ├── lato-bold.woff │ │ │ │ │ ├── lato-bold.woff2 │ │ │ │ │ ├── lato-italic.eot │ │ │ │ │ ├── lato-italic.ttf │ │ │ │ │ ├── lato-italic.woff │ │ │ │ │ ├── lato-regular.eot │ │ │ │ │ ├── lato-regular.ttf │ │ │ │ │ ├── lato-italic.woff2 │ │ │ │ │ ├── lato-regular.woff │ │ │ │ │ ├── lato-regular.woff2 │ │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ │ └── lato-bolditalic.woff2 │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ ├── fontawesome-webfont.woff2 │ │ │ │ └── RobotoSlab │ │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── documentation_options.js │ │ │ ├── css │ │ │ │ └── badge_only.css │ │ │ ├── js │ │ │ │ └── theme.js │ │ │ ├── pygments.css │ │ │ └── doctools.js │ │ ├── _sources │ │ │ ├── model │ │ │ │ ├── src │ │ │ │ │ ├── modules.rst.txt │ │ │ │ │ ├── src.helpers.rst.txt │ │ │ │ │ └── src.rst.txt │ │ │ │ ├── test │ │ │ │ │ ├── modules.rst.txt │ │ │ │ │ └── test.rst.txt │ │ │ │ ├── modules.rst.txt │ │ │ │ ├── run.rst.txt │ │ │ │ └── conf.rst.txt │ │ │ └── index.rst.txt │ │ ├── .buildinfo │ │ ├── searchindex.js │ │ ├── model │ │ │ ├── conf.html │ │ │ ├── modules.html │ │ │ ├── run.html │ │ │ ├── test │ │ │ │ ├── modules.html │ │ │ │ └── test.html │ │ │ └── src │ │ │ │ ├── modules.html │ │ │ │ └── src.helpers.html │ │ ├── _modules │ │ │ ├── index.html │ │ │ └── src │ │ │ │ └── helpers │ │ │ │ └── helpers.html │ │ ├── search.html │ │ ├── index.html │ │ └── py-modindex.html │ └── doctrees │ │ ├── index.doctree │ │ ├── environment.pickle │ │ └── model │ │ ├── conf.doctree │ │ ├── run.doctree │ │ ├── modules.doctree │ │ ├── src │ │ ├── src.doctree │ │ ├── modules.doctree │ │ └── src.helpers.doctree │ │ └── test │ │ ├── test.doctree │ │ └── modules.doctree ├── source │ ├── model │ │ ├── src │ │ │ ├── modules.rst │ │ │ ├── src.helpers.rst │ │ │ └── src.rst │ │ ├── test │ │ │ ├── modules.rst │ │ │ └── test.rst │ │ ├── modules.rst │ │ ├── run.rst │ │ └── conf.rst │ ├── index.rst │ └── conf.py ├── Makefile └── README.md ├── notebooks ├── deliver │ └── .gitkeep └── develop │ └── .gitkeep ├── models └── README.md ├── requirements.txt ├── .gitignore ├── environment.yml ├── run.py └── README.md /data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /figures/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/postprocess.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sql/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/true/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/external/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deliverables/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /references/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/evaluate_model.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/build/html/.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/deliver/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/develop/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | from .helpers import * -------------------------------------------------------------------------------- /docs/source/model/src/modules.rst: -------------------------------------------------------------------------------- 1 | src 2 | === 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | src 8 | -------------------------------------------------------------------------------- /models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | This should contain trained model objects. It is not synced with git. -------------------------------------------------------------------------------- /docs/source/model/test/modules.rst: -------------------------------------------------------------------------------- 1 | test 2 | ==== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | test 8 | -------------------------------------------------------------------------------- /docs/build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/objects.inv -------------------------------------------------------------------------------- /docs/build/html/_static/up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/up.png -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/html/_sources/model/src/modules.rst.txt: -------------------------------------------------------------------------------- 1 | src 2 | === 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | src 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/model/test/modules.rst.txt: -------------------------------------------------------------------------------- 1 | test 2 | ==== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | test 8 | -------------------------------------------------------------------------------- /docs/build/html/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/down.png -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/comment.png -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/doctrees/model/conf.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/conf.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/model/run.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/run.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/up-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/up-pressed.png -------------------------------------------------------------------------------- /docs/build/doctrees/model/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/modules.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/model/src/src.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/src/src.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/ajax-loader.gif -------------------------------------------------------------------------------- /docs/build/html/_static/comment-close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/comment-close.png -------------------------------------------------------------------------------- /docs/build/html/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/down-pressed.png -------------------------------------------------------------------------------- /docs/build/doctrees/model/test/test.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/test/test.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/comment-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/comment-bright.png -------------------------------------------------------------------------------- /docs/source/model/modules.rst: -------------------------------------------------------------------------------- 1 | reproducible-model 2 | ================== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | conf 8 | run 9 | -------------------------------------------------------------------------------- /docs/build/doctrees/model/src/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/src/modules.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/model/test/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/test/modules.doctree -------------------------------------------------------------------------------- /docs/source/model/run.rst: -------------------------------------------------------------------------------- 1 | run module 2 | ========== 3 | 4 | .. automodule:: run 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/source/model/conf.rst: -------------------------------------------------------------------------------- 1 | conf module 2 | =========== 3 | 4 | .. automodule:: conf 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/doctrees/model/src/src.helpers.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/doctrees/model/src/src.helpers.doctree -------------------------------------------------------------------------------- /docs/build/html/_sources/model/modules.rst.txt: -------------------------------------------------------------------------------- 1 | reproducible-model 2 | ================== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | conf 8 | run 9 | -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/build/html/_sources/model/run.rst.txt: -------------------------------------------------------------------------------- 1 | run module 2 | ========== 3 | 4 | .. automodule:: run 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/build/html/_sources/model/conf.rst.txt: -------------------------------------------------------------------------------- 1 | conf module 2 | =========== 3 | 4 | .. automodule:: conf 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmawer/reproducible-model/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | awscli==1.16.89 2 | boto3==1.9.111 3 | numpy==1.16.2 4 | pandas==0.24.2 5 | pyaml==18.11.0 6 | python-dateutil==2.8.0 7 | scikit-learn==0.20.3 8 | scipy==1.2.1 9 | SQLAlchemy==1.3.1 10 | xgboost==0.82 11 | xmltodict==0.12.0 12 | -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 7cb5edd85edb4333efc6c24d390cb365 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '0.1', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | FILE_SUFFIX: '.html', 7 | HAS_SOURCE: true, 8 | SOURCELINK_SUFFIX: '.txt', 9 | NAVIGATION_WITH_KEYS: false, 10 | }; -------------------------------------------------------------------------------- /docs/source/model/test/test.rst: -------------------------------------------------------------------------------- 1 | test package 2 | ============ 3 | 4 | Submodules 5 | ---------- 6 | 7 | test.test module 8 | ---------------- 9 | 10 | .. automodule:: test.test 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: test 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /config/logging/local.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=stream_handler 6 | 7 | [formatters] 8 | keys=formatter 9 | 10 | [logger_root] 11 | level=DEBUG 12 | handlers=stream_handler 13 | 14 | [handler_stream_handler] 15 | class=StreamHandler 16 | level=DEBUG 17 | formatter=formatter 18 | args=(sys.stderr,) 19 | 20 | [formatter_formatter] 21 | format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s -------------------------------------------------------------------------------- /config/logging/testing.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=stream_handler 6 | 7 | [formatters] 8 | keys=formatter 9 | 10 | [logger_root] 11 | level=DEBUG 12 | handlers=stream_handler 13 | 14 | [handler_stream_handler] 15 | class=StreamHandler 16 | level=DEBUG 17 | formatter=formatter 18 | args=(sys.stderr,) 19 | 20 | [formatter_formatter] 21 | format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s -------------------------------------------------------------------------------- /docs/build/html/_sources/model/test/test.rst.txt: -------------------------------------------------------------------------------- 1 | test package 2 | ============ 3 | 4 | Submodules 5 | ---------- 6 | 7 | test.test module 8 | ---------------- 9 | 10 | .. automodule:: test.test 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: test 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | !data/archive/.gitkeep 3 | !data/.gitkeep 4 | models/* 5 | !models/README.md 6 | !models/archive/.gitkeep 7 | *.pyc 8 | .ipynb_checkpoints 9 | data/* 10 | !data/external/ 11 | models/* 12 | !models/README.md 13 | test/test/* 14 | notebooks/archive/ 15 | 16 | 17 | __pycache__/ 18 | *.py[cod] 19 | *$py.class 20 | .idea/ 21 | .log 22 | .png 23 | *.sh 24 | .idea 25 | *.pyc 26 | .ipynb_checkpoints 27 | .DS_Store 28 | 29 | 30 | !*/.gitkeep -------------------------------------------------------------------------------- /docs/source/model/src/src.helpers.rst: -------------------------------------------------------------------------------- 1 | src.helpers package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | src.helpers.helpers module 8 | -------------------------- 9 | 10 | .. automodule:: src.helpers.helpers 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: src.helpers 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/model/src/src.helpers.rst.txt: -------------------------------------------------------------------------------- 1 | src.helpers package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | src.helpers.helpers module 8 | -------------------------- 9 | 10 | .. automodule:: src.helpers.helpers 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: src.helpers 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. reproducible-model documentation master file, created by 2 | sphinx-quickstart on Tue Mar 26 23:16:02 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to reproducible-model's documentation! 7 | ============================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | model/src/src 14 | model/test/test 15 | model/run 16 | 17 | 18 | 19 | Indices and tables 20 | ================== 21 | 22 | * :ref:`genindex` 23 | * :ref:`modindex` 24 | * :ref:`search` 25 | -------------------------------------------------------------------------------- /docs/build/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. reproducible-model documentation master file, created by 2 | sphinx-quickstart on Tue Mar 26 23:16:02 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to reproducible-model's documentation! 7 | ============================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | model/src/src 14 | model/test/test 15 | model/run 16 | 17 | 18 | 19 | Indices and tables 20 | ================== 21 | 22 | * :ref:`genindex` 23 | * :ref:`modindex` 24 | * :ref:`search` 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /test/test_config.yml: -------------------------------------------------------------------------------- 1 | generate_features: 2 | command: python run.py generate_features --config=config/example-training-config.yml 3 | true_dir: test/true/ 4 | test_dir: test/test/ 5 | files_to_compare: 6 | - boston_house_prices_processed.csv 7 | train_model: 8 | command: python run.py train_model --config=config/example-training-config.yml --csv=test/test/boston_house_prices_processed.csv 9 | true_dir: test/true/ 10 | test_dir: test/test/ 11 | files_to_compare: 12 | - example-boston-train-features.csv 13 | - example-boston-train-targets.csv 14 | - example-boston-test-features.csv 15 | - example-boston-test-targets.csv 16 | - example-boston-validate-features.csv 17 | - example-boston-validate-targets.csv -------------------------------------------------------------------------------- /src/helpers/helpers.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | def fillin_kwargs(keywords, kwargs): 5 | keywords = [keywords] if type(keywords) != list else keywords 6 | for keyword in keywords: 7 | if keyword not in kwargs: 8 | kwargs[keyword] = {} 9 | return kwargs 10 | 11 | class Timer: 12 | def __init__(self, function, logger): 13 | self.logger = logger 14 | self.function = function 15 | 16 | def __enter__(self): 17 | self.start = datetime.datetime.now() 18 | 19 | return self 20 | 21 | def __exit__(self, *args): 22 | self.end = datetime.datetime.now() 23 | self.interval = self.end - self.start 24 | self.logger.info("%s took %0.2f seconds", self.function, self.interval.total_seconds()) -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: eda3 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - ipywidgets 7 | - black 8 | - bqplot 9 | - jupyter_contrib_core 10 | - jupyter_contrib_nbextensions 11 | - jupyter_highlight_selected_word 12 | - jupyter_latex_envs 13 | - jupyter_nbextensions_configurator 14 | - prettytable 15 | - anaconda-client 16 | - ipykernel 17 | - ipython 18 | - ipython_genutils 19 | - jupyter_client 20 | - jupyter_console 21 | - jupyter_core 22 | - jupyterlab 23 | - matplotlib 24 | - missingno 25 | - nbconvert 26 | - notebook 27 | - numpy 28 | - openssl 29 | - pandas 30 | - pandas-profiling 31 | - pip 32 | - pivottablejs 33 | - python=3.6 34 | - qgrid 35 | - requests 36 | - scikit-learn 37 | - scipy 38 | - seaborn 39 | - setuptools 40 | - statsmodels 41 | - tqdm 42 | - vaex 43 | - widgetsnbextension 44 | - yapf 45 | - pip: 46 | - folium 47 | - ipython-genutils 48 | - jupyter-client 49 | - jupyter-console 50 | - jupyter-contrib-core 51 | - jupyter-contrib-nbextensions 52 | - jupyter-core 53 | - jupyter-highlight-selected-word 54 | - jupyter-latex-envs 55 | - jupyter-nbextensions-configurator 56 | - pyhive[presto] 57 | - sql_magic 58 | - SQLAlchemy 59 | -------------------------------------------------------------------------------- /docs/source/model/src/src.rst: -------------------------------------------------------------------------------- 1 | src package 2 | =========== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | src.helpers 10 | 11 | Submodules 12 | ---------- 13 | 14 | src.evaluate\_model module 15 | -------------------------- 16 | 17 | .. automodule:: src.evaluate_model 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | src.generate\_features module 23 | ----------------------------- 24 | 25 | .. automodule:: src.generate_features 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | src.load\_data module 31 | --------------------- 32 | 33 | .. automodule:: src.load_data 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | src.postprocess module 39 | ---------------------- 40 | 41 | .. automodule:: src.postprocess 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | src.score\_model module 47 | ----------------------- 48 | 49 | .. automodule:: src.score_model 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | src.train\_model module 55 | ----------------------- 56 | 57 | .. automodule:: src.train_model 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | 62 | 63 | Module contents 64 | --------------- 65 | 66 | .. automodule:: src 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | -------------------------------------------------------------------------------- /test/true/example-boston-validate.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,0 3 | 1,0 4 | 2,1 5 | 3,0 6 | 4,1 7 | 5,0 8 | 6,0 9 | 7,0 10 | 8,0 11 | 9,1 12 | 10,1 13 | 11,0 14 | 12,1 15 | 13,0 16 | 14,1 17 | 15,0 18 | 16,1 19 | 17,0 20 | 18,0 21 | 19,1 22 | 20,1 23 | 21,1 24 | 22,1 25 | 23,0 26 | 24,1 27 | 25,0 28 | 26,1 29 | 27,1 30 | 28,1 31 | 29,1 32 | 30,1 33 | 31,0 34 | 32,1 35 | 33,0 36 | 34,1 37 | 35,0 38 | 36,0 39 | 37,0 40 | 38,1 41 | 39,0 42 | 40,0 43 | 41,0 44 | 42,0 45 | 43,0 46 | 44,1 47 | 45,1 48 | 46,1 49 | 47,1 50 | 48,1 51 | 49,0 52 | 50,1 53 | 51,0 54 | 52,1 55 | 53,1 56 | 54,0 57 | 55,0 58 | 56,0 59 | 57,0 60 | 58,1 61 | 59,1 62 | 60,0 63 | 61,1 64 | 62,1 65 | 63,0 66 | 64,1 67 | 65,1 68 | 66,0 69 | 67,1 70 | 68,1 71 | 69,0 72 | 70,1 73 | 71,1 74 | 72,1 75 | 73,0 76 | 74,1 77 | 75,1 78 | 76,0 79 | 77,1 80 | 78,0 81 | 79,0 82 | 80,0 83 | 81,1 84 | 82,1 85 | 83,0 86 | 84,0 87 | 85,0 88 | 86,1 89 | 87,1 90 | 88,0 91 | 89,0 92 | 90,0 93 | 91,0 94 | 92,0 95 | 93,0 96 | 94,0 97 | 95,1 98 | 96,1 99 | 97,0 100 | 98,0 101 | 99,0 102 | 100,1 103 | 101,0 104 | 102,1 105 | 103,1 106 | 104,0 107 | 105,1 108 | 106,1 109 | 107,1 110 | 108,0 111 | 109,1 112 | 110,1 113 | 111,0 114 | 112,1 115 | 113,1 116 | 114,1 117 | 115,1 118 | 116,1 119 | 117,1 120 | 118,1 121 | 119,0 122 | 120,0 123 | 121,1 124 | 122,1 125 | 123,1 126 | 124,1 127 | 125,0 128 | -------------------------------------------------------------------------------- /test/true/example-boston-test.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,1 3 | 1,1 4 | 2,0 5 | 3,0 6 | 4,1 7 | 5,0 8 | 6,0 9 | 7,1 10 | 8,1 11 | 9,0 12 | 10,0 13 | 11,0 14 | 12,0 15 | 13,0 16 | 14,0 17 | 15,0 18 | 16,0 19 | 17,0 20 | 18,0 21 | 19,0 22 | 20,1 23 | 21,0 24 | 22,0 25 | 23,0 26 | 24,1 27 | 25,1 28 | 26,1 29 | 27,0 30 | 28,0 31 | 29,0 32 | 30,0 33 | 31,0 34 | 32,1 35 | 33,0 36 | 34,0 37 | 35,0 38 | 36,1 39 | 37,1 40 | 38,0 41 | 39,0 42 | 40,0 43 | 41,0 44 | 42,0 45 | 43,1 46 | 44,1 47 | 45,0 48 | 46,1 49 | 47,0 50 | 48,0 51 | 49,1 52 | 50,0 53 | 51,1 54 | 52,0 55 | 53,1 56 | 54,0 57 | 55,1 58 | 56,1 59 | 57,1 60 | 58,1 61 | 59,0 62 | 60,1 63 | 61,0 64 | 62,1 65 | 63,0 66 | 64,1 67 | 65,0 68 | 66,1 69 | 67,1 70 | 68,0 71 | 69,1 72 | 70,1 73 | 71,0 74 | 72,1 75 | 73,0 76 | 74,0 77 | 75,1 78 | 76,0 79 | 77,1 80 | 78,0 81 | 79,1 82 | 80,1 83 | 81,0 84 | 82,1 85 | 83,1 86 | 84,1 87 | 85,1 88 | 86,0 89 | 87,0 90 | 88,1 91 | 89,1 92 | 90,1 93 | 91,1 94 | 92,1 95 | 93,1 96 | 94,0 97 | 95,1 98 | 96,0 99 | 97,1 100 | 98,0 101 | 99,1 102 | 100,1 103 | 101,1 104 | 102,0 105 | 103,1 106 | 104,1 107 | 105,1 108 | 106,1 109 | 107,0 110 | 108,1 111 | 109,1 112 | 110,1 113 | 111,0 114 | 112,0 115 | 113,1 116 | 114,1 117 | 115,0 118 | 116,0 119 | 117,1 120 | 118,1 121 | 119,1 122 | 120,1 123 | 121,0 124 | 122,1 125 | 123,1 126 | 124,1 127 | 125,1 128 | 126,0 129 | -------------------------------------------------------------------------------- /test/true/example-boston-validate-targets.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,0 3 | 1,0 4 | 2,1 5 | 3,0 6 | 4,1 7 | 5,0 8 | 6,0 9 | 7,0 10 | 8,0 11 | 9,1 12 | 10,1 13 | 11,0 14 | 12,1 15 | 13,0 16 | 14,1 17 | 15,0 18 | 16,1 19 | 17,0 20 | 18,0 21 | 19,1 22 | 20,1 23 | 21,1 24 | 22,1 25 | 23,0 26 | 24,1 27 | 25,0 28 | 26,1 29 | 27,1 30 | 28,1 31 | 29,1 32 | 30,1 33 | 31,0 34 | 32,1 35 | 33,0 36 | 34,1 37 | 35,0 38 | 36,0 39 | 37,0 40 | 38,1 41 | 39,0 42 | 40,0 43 | 41,0 44 | 42,0 45 | 43,0 46 | 44,1 47 | 45,1 48 | 46,1 49 | 47,1 50 | 48,1 51 | 49,0 52 | 50,1 53 | 51,0 54 | 52,1 55 | 53,1 56 | 54,0 57 | 55,0 58 | 56,0 59 | 57,0 60 | 58,1 61 | 59,1 62 | 60,0 63 | 61,1 64 | 62,1 65 | 63,0 66 | 64,1 67 | 65,1 68 | 66,0 69 | 67,1 70 | 68,1 71 | 69,0 72 | 70,1 73 | 71,1 74 | 72,1 75 | 73,0 76 | 74,1 77 | 75,1 78 | 76,0 79 | 77,1 80 | 78,0 81 | 79,0 82 | 80,0 83 | 81,1 84 | 82,1 85 | 83,0 86 | 84,0 87 | 85,0 88 | 86,1 89 | 87,1 90 | 88,0 91 | 89,0 92 | 90,0 93 | 91,0 94 | 92,0 95 | 93,0 96 | 94,0 97 | 95,1 98 | 96,1 99 | 97,0 100 | 98,0 101 | 99,0 102 | 100,1 103 | 101,0 104 | 102,1 105 | 103,1 106 | 104,0 107 | 105,1 108 | 106,1 109 | 107,1 110 | 108,0 111 | 109,1 112 | 110,1 113 | 111,0 114 | 112,1 115 | 113,1 116 | 114,1 117 | 115,1 118 | 116,1 119 | 117,1 120 | 118,1 121 | 119,0 122 | 120,0 123 | 121,1 124 | 122,1 125 | 123,1 126 | 124,1 127 | 125,0 128 | -------------------------------------------------------------------------------- /docs/build/html/_sources/model/src/src.rst.txt: -------------------------------------------------------------------------------- 1 | src package 2 | =========== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | src.helpers 10 | 11 | Submodules 12 | ---------- 13 | 14 | src.evaluate\_model module 15 | -------------------------- 16 | 17 | .. automodule:: src.evaluate_model 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | src.generate\_features module 23 | ----------------------------- 24 | 25 | .. automodule:: src.generate_features 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | src.load\_data module 31 | --------------------- 32 | 33 | .. automodule:: src.load_data 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | src.postprocess module 39 | ---------------------- 40 | 41 | .. automodule:: src.postprocess 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | src.score\_model module 47 | ----------------------- 48 | 49 | .. automodule:: src.score_model 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | src.train\_model module 55 | ----------------------- 56 | 57 | .. automodule:: src.train_model 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | 62 | 63 | Module contents 64 | --------------- 65 | 66 | .. automodule:: src 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | -------------------------------------------------------------------------------- /test/true/example-boston-test-targets.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,1 3 | 1,1 4 | 2,0 5 | 3,0 6 | 4,1 7 | 5,0 8 | 6,0 9 | 7,1 10 | 8,1 11 | 9,0 12 | 10,0 13 | 11,0 14 | 12,0 15 | 13,0 16 | 14,0 17 | 15,0 18 | 16,0 19 | 17,0 20 | 18,0 21 | 19,0 22 | 20,1 23 | 21,0 24 | 22,0 25 | 23,0 26 | 24,1 27 | 25,1 28 | 26,1 29 | 27,0 30 | 28,0 31 | 29,0 32 | 30,0 33 | 31,0 34 | 32,1 35 | 33,0 36 | 34,0 37 | 35,0 38 | 36,1 39 | 37,1 40 | 38,0 41 | 39,0 42 | 40,0 43 | 41,0 44 | 42,0 45 | 43,1 46 | 44,1 47 | 45,0 48 | 46,1 49 | 47,0 50 | 48,0 51 | 49,1 52 | 50,0 53 | 51,1 54 | 52,0 55 | 53,1 56 | 54,0 57 | 55,1 58 | 56,1 59 | 57,1 60 | 58,1 61 | 59,0 62 | 60,1 63 | 61,0 64 | 62,1 65 | 63,0 66 | 64,1 67 | 65,0 68 | 66,1 69 | 67,1 70 | 68,0 71 | 69,1 72 | 70,1 73 | 71,0 74 | 72,1 75 | 73,0 76 | 74,0 77 | 75,1 78 | 76,0 79 | 77,1 80 | 78,0 81 | 79,1 82 | 80,1 83 | 81,0 84 | 82,1 85 | 83,1 86 | 84,1 87 | 85,1 88 | 86,0 89 | 87,0 90 | 88,1 91 | 89,1 92 | 90,1 93 | 91,1 94 | 92,1 95 | 93,1 96 | 94,0 97 | 95,1 98 | 96,0 99 | 97,1 100 | 98,0 101 | 99,1 102 | 100,1 103 | 101,1 104 | 102,0 105 | 103,1 106 | 104,1 107 | 105,1 108 | 106,1 109 | 107,0 110 | 108,1 111 | 109,1 112 | 110,1 113 | 111,0 114 | 112,0 115 | 113,1 116 | 114,1 117 | 115,0 118 | 116,0 119 | 117,1 120 | 118,1 121 | 119,1 122 | 120,1 123 | 121,0 124 | 122,1 125 | 123,1 126 | 124,1 127 | 125,1 128 | 126,0 129 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | ## Model tests 4 | 5 | From the repo root directory, run `python run.py test` to run tests to check that model code produces expected output. 6 | 7 | This will execute the command in the `test_config.yml` file and check that the files produced and saved to the `test/test` directory match those that are sources of truth in the `test/true` directory. 8 | 9 | As components of the model are developed, a sample of expected results given a sample input should be placed in `true/` and the command used to generate them and save the results to `test/` should be added to `test_config.yml` 10 | 11 | ### Configuration 12 | 13 | The configuration file should look like the following: 14 | 15 | ```yaml 16 | test_name: 17 | command: Command that should be run to generate the results/artifacts to be tested 18 | true_dir: Path from the repo root to where the source of truth files are held (e.g. test/true/) 19 | test_dir: Path from the repo root to where the files produced by the command above will be stored 20 | files_to_compare: 21 | - Names of files that will exist in both the true and test directories after the above command is written (e.g. test_output.csv) 22 | - The files with the same name in both directories will be compared and test will pass if they are the same 23 | - If files produced are JSON or other non-ordered data entities, this comparison will not work (need to add ability to compare dictionaries in future) 24 | ``` 25 | 26 | ## Unit tests 27 | 28 | Need to add 29 | -------------------------------------------------------------------------------- /config/example-training-config.yml: -------------------------------------------------------------------------------- 1 | model: 2 | name: example-model 3 | author: Chloe Mawer 4 | version: AA1 5 | description: Predicts a random result given some arbitrary data inputs as an example of this config file 6 | tags: 7 | - classifier 8 | - housing 9 | dependencies: requirements.txt 10 | load_data: 11 | how: csv 12 | csv: 13 | path: data/sample/boston_house_prices.csv 14 | usecols: [CRIM, ZN, INDUS, CHAS, NOX, RM, AGE, DIS, RAD, TAX, PTRATIO, B, LSTAT] 15 | generate_features: 16 | make_categorical: 17 | columns: RAD 18 | RAD: 19 | categories: [1, 2, 3, 5, 4, 8, 6, 7, 24] 20 | one_hot_encode: True 21 | bin_values: 22 | columns: CRIM 23 | quartiles: 2 24 | save_dataset: test/test/boston_house_prices_processed.csv 25 | train_model: 26 | method: xgboost 27 | choose_features: 28 | features_to_use: [ZN, INDUS, CHAS, NOX, RM, AGE, DIS, RAD, TAX, PTRATIO] 29 | get_target: 30 | target: CRIM 31 | split_data: 32 | train_size: 0.5 33 | test_size: 0.25 34 | validate_size: 0.25 35 | random_state: 24 36 | save_split_prefix: test/test/example-boston 37 | params: 38 | max_depth: 100 39 | learning_rate: 50 40 | random_state: 1019 41 | fit: 42 | eval_metric: auc 43 | verbose: True 44 | save_tmo: models/example-boston-crime-prediction.pkl 45 | score_model: 46 | path_to_tmo: models/example-boston-crime-prediction.pkl 47 | predict: 48 | ntree_limit: 0 49 | save_scores: test/true/example-boston-test-scores.csv 50 | evaluate_model: 51 | metrics: [auc, accuracy, logloss] 52 | 53 | -------------------------------------------------------------------------------- /src/score_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | import yaml 4 | import os 5 | import subprocess 6 | import re 7 | import datetime 8 | 9 | import pickle 10 | 11 | import sklearn 12 | import xgboost 13 | import pandas as pd 14 | import numpy as np 15 | 16 | from src.load_data import load_data 17 | from src.helpers import Timer, fillin_kwargs 18 | from src.generate_features import choose_features, get_target 19 | from sklearn.linear_model import LogisticRegression, LinearRegression 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | score_model_kwargs = ["predict"] 24 | 25 | 26 | def score_model(df, path_to_tmo, save_scores=None, **kwargs): 27 | 28 | with open(path_to_tmo, "rb") as f: 29 | model = pickle.load(f) 30 | 31 | kwargs = fillin_kwargs(score_model_kwargs, kwargs) 32 | with Timer("scoring", logger): 33 | y_predicted = model.predict(df.values, **kwargs["predict"]) 34 | 35 | if save_scores is not None: 36 | pd.DataFrame(y_predicted).to_csv(save_scores, index=False) 37 | 38 | return y_predicted 39 | 40 | 41 | def run_scoring(args): 42 | with open(args.config, "r") as f: 43 | config = yaml.load(f) 44 | 45 | if args.csv is not None: 46 | df = load_data(how="csv", csv=dict(path=args.csv)) 47 | elif "load_data" in config: 48 | df = load_data(**config["load_data"]) 49 | else: 50 | raise ValueError("Path to CSV for input data must be provided through --csv or " 51 | "'load_data' configuration must exist in config file") 52 | 53 | y_predicted = score_model(df, **config["score_model"]) 54 | 55 | if args.save is not None: 56 | pd.DataFrame(y_predicted).to_csv(args.save, index=False) 57 | 58 | 59 | if __name__ == '__main__': 60 | parser = argparse.ArgumentParser(description="Score model") 61 | parser.add_argument('--config', help='path to yaml file with configurations') 62 | parser.add_argumemt('--csv', default=None, help="Path to CSV for input to model scoring") 63 | parser.add_argument('--save', default=None, help='Path to where the scores should be saved to (optional)') 64 | 65 | args = parser.parse_args() 66 | 67 | run_scoring(args) 68 | 69 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import logging.config 4 | 5 | from test.test import run_tests 6 | from src.score_model import run_scoring 7 | from src.generate_features import run_features 8 | from src.train_model import run_training 9 | 10 | 11 | if __name__ == '__main__': 12 | 13 | logging.config.fileConfig("config/logging/local.conf") 14 | logger = logging.getLogger("run") 15 | parser = argparse.ArgumentParser(description="Run components of the model source code") 16 | subparsers = parser.add_subparsers() 17 | 18 | # FEATURE subparser 19 | sb_features = subparsers.add_parser("generate_features", description="Generate features") 20 | sb_features.add_argument('--config', help='path to yaml file with configurations') 21 | sb_features.add_argument('--csv', default=None, help="Path to CSV for input to model scoreing") 22 | sb_features.add_argument('--save', default=None, help='Path to where the dataset should be saved to (optional') 23 | sb_features.set_defaults(func=run_features) 24 | 25 | # TRAIN subparser 26 | sb_train = subparsers.add_parser("train_model", description="Train model") 27 | sb_train.add_argument('--config', help='path to yaml file with configurations') 28 | sb_train.add_argument('--csv', default=None, help="Path to CSV for input to model training") 29 | sb_train.add_argument('--save', default=None, help='Path to where the dataset should be saved to (optional') 30 | sb_train.set_defaults(func=run_training) 31 | 32 | # SCORE subparser 33 | sb_score = subparsers.add_parser("score_model", description="Score model") 34 | sb_score.add_argument('--config', help='path to yaml file with configurations') 35 | sb_score.add_argument('--csv', default=None, help="Path to CSV for input to model scoring") 36 | sb_score.add_argument('--save', default=None, help='Path to where the dataset should be saved to (optional') 37 | sb_score.set_defaults(func=run_scoring) 38 | 39 | # TEST subparser 40 | sb_test = subparsers.add_parser("test", description="Test whether the expected outputs are produced") 41 | sb_test.add_argument("--path", default="test/test_config.yml", help="Path to the test configuration file") 42 | sb_test.set_defaults(func=run_tests) 43 | 44 | args = parser.parse_args() 45 | args.func(args) 46 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Sphinx documentation 2 | 3 | ## Accessing docs 4 | 5 | Open up `build/html/index.html` to access documentation. 6 | 7 | ## Updating docs 8 | 9 | ### Changes to current files 10 | Any time that the current Python files or sphinx `.rst` files are changed, the `html` should be recreated by running: 11 | 12 | ```bash 13 | make html 14 | ``` 15 | 16 | ### Addition of files 17 | 18 | If new files are added, the autodoc files should be recreated by running 19 | 20 | 21 | ```bash 22 | sphinx-apidoc -f -o source/model/ ../ ../data/ ../figures ../src ../test 23 | 24 | sphinx-apidoc -f -o source/model/test/ ../test/ 25 | 26 | sphinx-apidoc -f -o source/model/src/ ../src/ ../src/archive/ 27 | ``` 28 | 29 | as in step 3 in the setup guide below. 30 | 31 | If new directories are added, the above command should be run for the new directory and the directory needs to be added to `source/index.rst` as in step 4 in the guide below. 32 | 33 | ## Sphinx setup guide 34 | This documentation was created by doing the following from this directory: 35 | 36 | 1. Install the necessary packages 37 | ```bash 38 | conda install sphinx 39 | conda install sphinx_rtd_theme 40 | ``` 41 | 1. Run `sphinx-quickstart` 42 | 43 | 2. Edit `conf.py` 44 | 45 | Add the following at the top of the script: 46 | 47 | ```python 48 | import sphinx_rtd_theme 49 | sys.path.insert(0, os.path.abspath('../..')) 50 | sys.path.insert(0, os.path.abspath('../')) 51 | sys.path.insert(0, os.path.abspath('../src')) 52 | ``` 53 | 54 | Change `html_theme` (found around line 85) and add `html_theme_path` as follows: 55 | 56 | ```python 57 | html_theme = 'sphinx_rtd_theme' 58 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 59 | ``` 60 | 61 | 3. Run `sphinx-apidoc` to autogenerate pages with `autodoc` as follows from the command line: 62 | 63 | ```bash 64 | sphinx-apidoc -f -o source/model/ ../ ../data/ ../figures ../src ../test 65 | 66 | sphinx-apidoc -f -o source/model/test/ ../test/ 67 | 68 | sphinx-apidoc -f -o source/model/src/ ../src/ ../src/archive/ 69 | ``` 70 | 71 | 4. Add to `source/index.rst`: 72 | 73 | ```markdown 74 | Contents 75 | -------- 76 | .. toctree:: 77 | :maxdepth: 2 78 | 79 | model/src/src 80 | model/test/test 81 | model/run 82 | ``` 83 | 84 | 5. Make html files by running from the command line: 85 | 86 | ```bash 87 | make html 88 | ``` 89 | -------------------------------------------------------------------------------- /test/true/example-boston-train.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,0 3 | 1,1 4 | 2,1 5 | 3,1 6 | 4,0 7 | 5,1 8 | 6,0 9 | 7,1 10 | 8,0 11 | 9,0 12 | 10,0 13 | 11,1 14 | 12,1 15 | 13,0 16 | 14,0 17 | 15,0 18 | 16,0 19 | 17,0 20 | 18,1 21 | 19,0 22 | 20,0 23 | 21,0 24 | 22,1 25 | 23,1 26 | 24,1 27 | 25,0 28 | 26,0 29 | 27,0 30 | 28,1 31 | 29,1 32 | 30,1 33 | 31,1 34 | 32,0 35 | 33,0 36 | 34,1 37 | 35,1 38 | 36,1 39 | 37,1 40 | 38,1 41 | 39,0 42 | 40,0 43 | 41,0 44 | 42,1 45 | 43,0 46 | 44,0 47 | 45,1 48 | 46,0 49 | 47,0 50 | 48,0 51 | 49,1 52 | 50,1 53 | 51,0 54 | 52,1 55 | 53,1 56 | 54,1 57 | 55,0 58 | 56,0 59 | 57,0 60 | 58,0 61 | 59,1 62 | 60,0 63 | 61,0 64 | 62,0 65 | 63,0 66 | 64,0 67 | 65,1 68 | 66,0 69 | 67,1 70 | 68,0 71 | 69,1 72 | 70,0 73 | 71,0 74 | 72,1 75 | 73,0 76 | 74,0 77 | 75,0 78 | 76,0 79 | 77,1 80 | 78,1 81 | 79,0 82 | 80,0 83 | 81,1 84 | 82,1 85 | 83,0 86 | 84,1 87 | 85,0 88 | 86,0 89 | 87,1 90 | 88,1 91 | 89,0 92 | 90,0 93 | 91,1 94 | 92,0 95 | 93,1 96 | 94,0 97 | 95,1 98 | 96,0 99 | 97,0 100 | 98,0 101 | 99,0 102 | 100,0 103 | 101,1 104 | 102,1 105 | 103,0 106 | 104,1 107 | 105,0 108 | 106,0 109 | 107,1 110 | 108,1 111 | 109,1 112 | 110,1 113 | 111,1 114 | 112,1 115 | 113,0 116 | 114,1 117 | 115,0 118 | 116,0 119 | 117,0 120 | 118,1 121 | 119,1 122 | 120,0 123 | 121,1 124 | 122,0 125 | 123,0 126 | 124,1 127 | 125,1 128 | 126,0 129 | 127,1 130 | 128,0 131 | 129,0 132 | 130,1 133 | 131,0 134 | 132,1 135 | 133,0 136 | 134,1 137 | 135,0 138 | 136,0 139 | 137,1 140 | 138,1 141 | 139,1 142 | 140,0 143 | 141,0 144 | 142,0 145 | 143,1 146 | 144,0 147 | 145,0 148 | 146,0 149 | 147,0 150 | 148,1 151 | 149,0 152 | 150,0 153 | 151,1 154 | 152,1 155 | 153,0 156 | 154,1 157 | 155,0 158 | 156,0 159 | 157,0 160 | 158,1 161 | 159,0 162 | 160,1 163 | 161,1 164 | 162,1 165 | 163,1 166 | 164,0 167 | 165,0 168 | 166,1 169 | 167,0 170 | 168,1 171 | 169,0 172 | 170,0 173 | 171,0 174 | 172,0 175 | 173,1 176 | 174,1 177 | 175,0 178 | 176,1 179 | 177,0 180 | 178,0 181 | 179,0 182 | 180,0 183 | 181,0 184 | 182,0 185 | 183,0 186 | 184,1 187 | 185,0 188 | 186,0 189 | 187,1 190 | 188,1 191 | 189,0 192 | 190,1 193 | 191,1 194 | 192,0 195 | 193,1 196 | 194,1 197 | 195,1 198 | 196,1 199 | 197,1 200 | 198,1 201 | 199,0 202 | 200,0 203 | 201,1 204 | 202,1 205 | 203,1 206 | 204,1 207 | 205,0 208 | 206,0 209 | 207,0 210 | 208,1 211 | 209,0 212 | 210,0 213 | 211,1 214 | 212,1 215 | 213,0 216 | 214,0 217 | 215,1 218 | 216,1 219 | 217,0 220 | 218,1 221 | 219,0 222 | 220,0 223 | 221,0 224 | 222,0 225 | 223,0 226 | 224,0 227 | 225,1 228 | 226,1 229 | 227,1 230 | 228,1 231 | 229,1 232 | 230,1 233 | 231,1 234 | 232,0 235 | 233,0 236 | 234,1 237 | 235,1 238 | 236,1 239 | 237,1 240 | 238,1 241 | 239,1 242 | 240,0 243 | 241,1 244 | 242,0 245 | 243,0 246 | 244,0 247 | 245,1 248 | 246,1 249 | 247,1 250 | 248,1 251 | 249,0 252 | 250,0 253 | 251,1 254 | 252,1 255 | -------------------------------------------------------------------------------- /test/true/example-boston-train-targets.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,0 3 | 1,1 4 | 2,1 5 | 3,1 6 | 4,0 7 | 5,1 8 | 6,0 9 | 7,1 10 | 8,0 11 | 9,0 12 | 10,0 13 | 11,1 14 | 12,1 15 | 13,0 16 | 14,0 17 | 15,0 18 | 16,0 19 | 17,0 20 | 18,1 21 | 19,0 22 | 20,0 23 | 21,0 24 | 22,1 25 | 23,1 26 | 24,1 27 | 25,0 28 | 26,0 29 | 27,0 30 | 28,1 31 | 29,1 32 | 30,1 33 | 31,1 34 | 32,0 35 | 33,0 36 | 34,1 37 | 35,1 38 | 36,1 39 | 37,1 40 | 38,1 41 | 39,0 42 | 40,0 43 | 41,0 44 | 42,1 45 | 43,0 46 | 44,0 47 | 45,1 48 | 46,0 49 | 47,0 50 | 48,0 51 | 49,1 52 | 50,1 53 | 51,0 54 | 52,1 55 | 53,1 56 | 54,1 57 | 55,0 58 | 56,0 59 | 57,0 60 | 58,0 61 | 59,1 62 | 60,0 63 | 61,0 64 | 62,0 65 | 63,0 66 | 64,0 67 | 65,1 68 | 66,0 69 | 67,1 70 | 68,0 71 | 69,1 72 | 70,0 73 | 71,0 74 | 72,1 75 | 73,0 76 | 74,0 77 | 75,0 78 | 76,0 79 | 77,1 80 | 78,1 81 | 79,0 82 | 80,0 83 | 81,1 84 | 82,1 85 | 83,0 86 | 84,1 87 | 85,0 88 | 86,0 89 | 87,1 90 | 88,1 91 | 89,0 92 | 90,0 93 | 91,1 94 | 92,0 95 | 93,1 96 | 94,0 97 | 95,1 98 | 96,0 99 | 97,0 100 | 98,0 101 | 99,0 102 | 100,0 103 | 101,1 104 | 102,1 105 | 103,0 106 | 104,1 107 | 105,0 108 | 106,0 109 | 107,1 110 | 108,1 111 | 109,1 112 | 110,1 113 | 111,1 114 | 112,1 115 | 113,0 116 | 114,1 117 | 115,0 118 | 116,0 119 | 117,0 120 | 118,1 121 | 119,1 122 | 120,0 123 | 121,1 124 | 122,0 125 | 123,0 126 | 124,1 127 | 125,1 128 | 126,0 129 | 127,1 130 | 128,0 131 | 129,0 132 | 130,1 133 | 131,0 134 | 132,1 135 | 133,0 136 | 134,1 137 | 135,0 138 | 136,0 139 | 137,1 140 | 138,1 141 | 139,1 142 | 140,0 143 | 141,0 144 | 142,0 145 | 143,1 146 | 144,0 147 | 145,0 148 | 146,0 149 | 147,0 150 | 148,1 151 | 149,0 152 | 150,0 153 | 151,1 154 | 152,1 155 | 153,0 156 | 154,1 157 | 155,0 158 | 156,0 159 | 157,0 160 | 158,1 161 | 159,0 162 | 160,1 163 | 161,1 164 | 162,1 165 | 163,1 166 | 164,0 167 | 165,0 168 | 166,1 169 | 167,0 170 | 168,1 171 | 169,0 172 | 170,0 173 | 171,0 174 | 172,0 175 | 173,1 176 | 174,1 177 | 175,0 178 | 176,1 179 | 177,0 180 | 178,0 181 | 179,0 182 | 180,0 183 | 181,0 184 | 182,0 185 | 183,0 186 | 184,1 187 | 185,0 188 | 186,0 189 | 187,1 190 | 188,1 191 | 189,0 192 | 190,1 193 | 191,1 194 | 192,0 195 | 193,1 196 | 194,1 197 | 195,1 198 | 196,1 199 | 197,1 200 | 198,1 201 | 199,0 202 | 200,0 203 | 201,1 204 | 202,1 205 | 203,1 206 | 204,1 207 | 205,0 208 | 206,0 209 | 207,0 210 | 208,1 211 | 209,0 212 | 210,0 213 | 211,1 214 | 212,1 215 | 213,0 216 | 214,0 217 | 215,1 218 | 216,1 219 | 217,0 220 | 218,1 221 | 219,0 222 | 220,0 223 | 221,0 224 | 222,0 225 | 223,0 226 | 224,0 227 | 225,1 228 | 226,1 229 | 227,1 230 | 228,1 231 | 229,1 232 | 230,1 233 | 231,1 234 | 232,0 235 | 233,0 236 | 234,1 237 | 235,1 238 | 236,1 239 | 237,1 240 | 238,1 241 | 239,1 242 | 240,0 243 | 241,1 244 | 242,0 245 | 243,0 246 | 244,0 247 | 245,1 248 | 246,1 249 | 247,1 250 | 248,1 251 | 249,0 252 | 250,0 253 | 251,1 254 | 252,1 255 | -------------------------------------------------------------------------------- /docs/build/html/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} 2 | -------------------------------------------------------------------------------- /docs/build/html/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({docnames:["index","model/conf","model/modules","model/run","model/src/modules","model/src/src","model/src/src.helpers","model/test/modules","model/test/test"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["index.rst","model/conf.rst","model/modules.rst","model/run.rst","model/src/modules.rst","model/src/src.rst","model/src/src.helpers.rst","model/test/modules.rst","model/test/test.rst"],objects:{"":{conf:[1,0,0,"-"],run:[3,0,0,"-"],src:[5,0,0,"-"],test:[8,0,0,"-"]},"src.generate_features":{bin_values:[5,1,1,""],choose_features:[5,1,1,""],generate_features:[5,1,1,""],get_target:[5,1,1,""],make_categorical:[5,1,1,""],one_hot_encode:[5,1,1,""],run_features:[5,1,1,""]},"src.helpers":{helpers:[6,0,0,"-"]},"src.helpers.helpers":{Timer:[6,2,1,""],fillin_kwargs:[6,1,1,""]},"src.load_data":{copy_file_from_s3:[5,1,1,""],copy_files_from_s3:[5,1,1,""],create_connection:[5,1,1,""],format_sql:[5,1,1,""],ifin:[5,1,1,""],load_data:[5,1,1,""],load_sql:[5,1,1,""],query_data:[5,1,1,""],read_csv:[5,1,1,""]},"src.score_model":{run_scoring:[5,1,1,""],score_model:[5,1,1,""]},"src.train_model":{run_training:[5,1,1,""],split_data:[5,1,1,""],train_model:[5,1,1,""]},"test.test":{compare_dict:[8,1,1,""],open_dictlike_file:[8,1,1,""],run_tests:[8,1,1,""]},src:{evaluate_model:[5,0,0,"-"],generate_features:[5,0,0,"-"],helpers:[6,0,0,"-"],load_data:[5,0,0,"-"],postprocess:[5,0,0,"-"],score_model:[5,0,0,"-"],train_model:[5,0,0,"-"]},test:{test:[8,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class"},terms:{"class":6,"function":6,"return":5,"true":5,alt:5,amazonrds_pw:5,amazonrds_us:5,arg:[5,8],base:6,bin:5,bin_valu:5,choose_featur:5,column:5,compare_dict:8,conf:2,config:5,config_path:8,conn:5,content:[0,4,7],copy_file_from_s3:5,copy_files_from_s3:5,create_connect:5,csv:5,data:5,databas:5,datafram:5,dbconfig:5,destpath:5,dicta:8,dictb:8,dictionari:5,drop_origin:5,evaluate_model:[0,4],fals:5,features_to_us:5,fillin_kwarg:6,fname:8,format_sql:5,generate_featur:[0,4],get_target:5,helper:[4,5],host:5,how:5,ifin:5,index:0,input:5,keyword:[5,6],kwarg:[5,6],load:5,load_com:5,load_data:[0,4],load_sql:5,logger:[6,8],make_categor:5,method:5,modul:[0,2,4,7],mysql:5,new_column:5,none:[5,8],object:6,one:5,one_hot:5,one_hot_encod:5,open_dictlike_fil:8,option:5,packag:[0,4,7],page:0,panda:5,param:5,paramet:5,password:5,password_env:5,path:5,path_to_sql:5,path_to_tmo:5,port:5,postprocess:[0,4],pymysql:5,python:5,quartil:5,queri:5,query_data:5,random_st:5,read_csv:5,remain:5,replace_sqlvar:5,replace_var:5,run:[0,2],run_featur:5,run_scor:5,run_test:8,run_train:5,s3path:5,save_dataset:5,save_path:5,save_scor:5,save_split_prefix:5,save_tmo:5,score_model:[0,4],search:0,sourc:[5,6,8],split_data:5,sql:5,sqltype:5,src:0,submodul:[0,4,7],subpackag:[0,4],target:5,test:0,test_siz:5,timer:6,train_model:[0,4],train_siz:5,user_env:5,usernam:5,validate_s:5,yml:5},titles:["Welcome to reproducible-model\u2019s documentation!","conf module","reproducible-model","run module","src","src package","src.helpers package","test","test package"],titleterms:{conf:1,content:[5,6,8],document:0,evaluate_model:5,generate_featur:5,helper:6,indic:0,load_data:5,model:[0,2],modul:[1,3,5,6,8],packag:[5,6,8],postprocess:5,reproduc:[0,2],run:3,score_model:5,src:[4,5,6],submodul:[5,6,8],subpackag:5,tabl:0,test:[7,8],train_model:5,welcom:0}}) -------------------------------------------------------------------------------- /docs/build/html/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | /* sphinx_rtd_theme version 0.4.2 | MIT license */ 2 | /* Built 20181005 13:10 */ 3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n"),i("table.docutils.footnote").wrap("
"),i("table.docutils.citation").wrap("
"),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i(''),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;e 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | conf module — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • conf module
  • 130 | 131 | 132 |
  • 133 | 134 | 135 | View page source 136 | 137 | 138 |
  • 139 | 140 |
141 | 142 | 143 |
144 |
145 |
146 |
147 | 148 |
149 |

conf module

150 |
151 | 152 | 153 |
154 | 155 |
156 |
157 | 158 | 159 |
160 | 161 |
162 |

163 | © Copyright 2019 164 | 165 |

166 |
167 | Built with Sphinx using a theme provided by Read the Docs. 168 | 169 |
170 | 171 |
172 |
173 | 174 |
175 | 176 |
177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /src/generate_features.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | import yaml 4 | import os 5 | import subprocess 6 | import re 7 | import boto3 8 | import sqlalchemy 9 | import pandas as pd 10 | 11 | from src.load_data import load_data, read_csv 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def choose_features(df, features_to_use=None, save_path=None, target=None, **kwargs): 17 | 18 | if features_to_use is not None: 19 | features = [] 20 | dropped_columns = [] 21 | for column in df.columns: 22 | # Identifies if this column is in the features to use or if it is a dummy of one of the features to use 23 | if column in features_to_use or column.split("_dummy_")[0] in features_to_use or column == target: 24 | features.append(column) 25 | else: 26 | dropped_columns.append(column) 27 | 28 | if len(dropped_columns) > 0: 29 | logger.info("The following columns were not used as features: %s", ",".join(dropped_columns)) 30 | 31 | X = df[features] 32 | else: 33 | X = df 34 | 35 | if save_path is not None: 36 | X.to_csv(save_path, **kwargs) 37 | 38 | return X 39 | 40 | 41 | def get_target(df, target, save_path=None, **kwargs): 42 | 43 | y = df[target] 44 | 45 | if save_path is not None: 46 | y.to_csv(save_path, **kwargs) 47 | 48 | return y.values 49 | 50 | 51 | def bin_values(df, columns, bins=None, quartiles=None, new_column=False, **kwargs): 52 | columns = [columns] if type(columns) != list else columns 53 | 54 | if bins is not None and quartiles is not None: 55 | raise ValueError("Only bins or quartiles can be done at one time.") 56 | elif bins is None and quartiles is None: 57 | raise ValueError("Specify bins or quartiles") 58 | else: 59 | for j, column in enumerate(columns): 60 | column_name = "%s_binned" if new_column else column 61 | if bins is not None: 62 | bins_input = bins[j] if type(bins) == list and len(bins) == len(columns) else bins 63 | df[column_name] = pd.cut(df[column], bins=bins_input, labels=range(bins_input)) 64 | else: 65 | 66 | quartiles_input = quartiles[j] if type(quartiles) == list else quartiles 67 | df[column_name] = pd.qcut(df[column], q=quartiles_input, labels=range(quartiles_input)) 68 | 69 | return df 70 | 71 | 72 | def make_categorical(df, columns, one_hot=False, **kwargs): 73 | columns = [columns] if type(columns) != list else columns 74 | 75 | for column in columns: 76 | one_hot_col = False 77 | if column in kwargs: 78 | if "read_csv" in kwargs[column]: 79 | categories = read_csv(**kwargs[column]["read_csv"]) 80 | elif "categories" in kwargs[column]: 81 | categories = kwargs[column]["categories"] 82 | else: 83 | categories = df[column].unique() 84 | 85 | if "one_hot_encode" in kwargs[column] and kwargs[column]["one_hot_encode"]: 86 | one_hot_col = True 87 | 88 | df[column] = pd.Categorical(df[column], categories=categories) 89 | 90 | if one_hot or one_hot_col: 91 | df = one_hot_encode(df, column) 92 | 93 | return df 94 | 95 | 96 | def one_hot_encode(df, columns, drop_original=True): 97 | columns = [columns] if type(columns) != list else columns 98 | 99 | for column in columns: 100 | dummies = pd.get_dummies(df[column]) 101 | dummies.columns = ["%s_dummy_%i" % (column, j) for j in range(len(dummies.columns))] 102 | df = pd.concat([df, dummies], axis=1) 103 | 104 | if drop_original: 105 | df = df.drop(labels=columns, axis=1) 106 | 107 | return df 108 | 109 | 110 | def generate_features(df, save_dataset=None, **kwargs): 111 | 112 | for step in kwargs: 113 | if step not in ["choose_features", "get_target"]: 114 | command = "%s(df, **kwargs[step])" % step 115 | logging.debug("Generating feature via %s", command) 116 | df = eval(command) 117 | 118 | choose_features_kwargs = {} if "choose_features" not in kwargs else kwargs["choose_features"] 119 | df = choose_features(df, **choose_features_kwargs) 120 | 121 | if save_dataset is not None: 122 | df.to_csv(save_dataset) 123 | 124 | return df 125 | 126 | 127 | def run_features(args): 128 | with open(args.config, "r") as f: 129 | config = yaml.load(f) 130 | 131 | if args.csv is not None: 132 | df = load_data(how="csv", csv=dict(path=args.csv)) 133 | elif "load_data" in config: 134 | df = load_data(**config["load_data"]) 135 | else: 136 | raise ValueError("Path to CSV for input data must be provided through --csv or " 137 | "'load_data' configuration must exist in config file") 138 | 139 | df = generate_features(df, **config["generate_features"]) 140 | 141 | 142 | if __name__ == '__main__': 143 | parser = argparse.ArgumentParser(description="Generate features") 144 | parser.add_argument('--config', help='path to yaml file with configurations') 145 | parser.add_argumemt('--csv', default=None, help="Path to CSV for generating features from") 146 | 147 | args = parser.parse_args() 148 | 149 | run_features(args) -------------------------------------------------------------------------------- /docs/build/html/_modules/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Overview: module code — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • Overview: module code
  • 130 | 131 | 132 |
  • 133 | 134 |
  • 135 | 136 |
137 | 138 | 139 |
140 |
141 |
142 |
143 | 144 |

All modules for which code is available

145 | 152 | 153 |
154 | 155 |
156 |
157 | 158 | 159 |
160 | 161 |
162 |

163 | © Copyright 2019 164 | 165 |

166 |
167 | Built with Sphinx using a theme provided by Read the Docs. 168 | 169 |
170 | 171 |
172 |
173 | 174 |
175 | 176 |
177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /src/train_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | import yaml 4 | import os 5 | import subprocess 6 | import re 7 | import datetime 8 | 9 | import pickle 10 | 11 | import sklearn 12 | import xgboost 13 | import pandas as pd 14 | import numpy as np 15 | 16 | from src.load_data import load_data 17 | from src.helpers import Timer, fillin_kwargs 18 | from src.generate_features import choose_features, get_target 19 | from sklearn.linear_model import LogisticRegression, LinearRegression 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | methods = dict(logistic=LogisticRegression, 24 | linear_regression=LinearRegression, 25 | xgboost=xgboost.XGBClassifier) 26 | 27 | train_model_kwargs = ["split_data", "params", "fit", "compile"] 28 | 29 | 30 | def split_data(X, y, train_size=1, test_size=0, validate_size=0, random_state=24, save_split_prefix=None): 31 | 32 | if y is not None: 33 | assert len(X) == len(y) 34 | include_y = True 35 | else: 36 | y = [0] * len(X) 37 | include_y = False 38 | if train_size + test_size + validate_size == 1: 39 | prop = True 40 | elif train_size + test_size + validate_size == len(X): 41 | prop = False 42 | else: 43 | raise ValueError("train_size + test_size + validate_size " 44 | "must equal 1 or equal the number of rows in the dataset") 45 | 46 | if prop: 47 | train_size = int(np.round(train_size * len(X))) 48 | validate_size = int(np.round(validate_size * len(X))) 49 | test_size = int(len(X) - train_size - validate_size) 50 | 51 | if train_size == 1: 52 | X_train, y_train = X, y 53 | else: 54 | X_train, X_remain, y_train, y_remain = sklearn.model_selection.train_test_split(X, y, train_size=train_size, 55 | random_state=random_state) 56 | 57 | X_validate, X_test, y_validate, y_test = sklearn.model_selection.train_test_split(X_remain, y_remain, 58 | test_size=test_size, 59 | random_state=random_state+1) 60 | print(type(X_train)) 61 | X = dict(train=X_train) 62 | y = dict(train=y_train) 63 | 64 | if len(X_test) > 0: 65 | X["test"] = X_test 66 | y["test"] = y_test 67 | if len(X_validate) > 0: 68 | X["validate"] = X_validate 69 | y["validate"] = y_validate 70 | 71 | if save_split_prefix is not None: 72 | for split in X: 73 | pd.DataFrame(X[split]).to_csv("%s-%s-features.csv" % (save_split_prefix, split)) 74 | if include_y: 75 | pd.DataFrame(y[split]).to_csv("%s-%s-targets.csv" % (save_split_prefix, split)) 76 | 77 | logger.info("X_%s and y_%s saved to %s-%s-features.csv and %s-%s-targets.csv", 78 | split, split, 79 | save_split_prefix, split, 80 | save_split_prefix, split) 81 | 82 | if not include_y: 83 | y = dict(train=None) 84 | 85 | return X, y 86 | 87 | 88 | def train_model(df, method=None, save_tmo=None, **kwargs): 89 | 90 | assert method in methods.keys() 91 | 92 | if "choose_features" in kwargs: 93 | X = choose_features(df, **kwargs["choose_features"]) 94 | else: 95 | X = df 96 | 97 | if "get_target" in kwargs: 98 | y = get_target(df, **kwargs["get_target"]) 99 | else: 100 | y = None 101 | 102 | kwargs = fillin_kwargs(train_model_kwargs, kwargs) 103 | 104 | X, y = split_data(X, y, **kwargs["split_data"]) 105 | 106 | model = methods[method](**kwargs["params"]) 107 | 108 | if "validate" in X and "validate" in y: 109 | kwargs["fit"]["eval_set"] = [(X["validate"], y["validate"])] 110 | 111 | with Timer("model training", logger) as t: 112 | model.fit(X["train"], y["train"], **kwargs["fit"]) 113 | 114 | if save_tmo is not None: 115 | with open(save_tmo, "wb") as f: 116 | pickle.dump(model, f) 117 | logger.info("Trained model object saved to %s", save_tmo) 118 | 119 | return model 120 | 121 | 122 | def run_training(args): 123 | with open(args.config, "r") as f: 124 | config = yaml.load(f) 125 | 126 | if args.csv is not None: 127 | df = load_data(how="csv", csv=dict(path=args.csv)) 128 | elif "load_data" in config: 129 | df = load_data(**config["load_data"]) 130 | else: 131 | raise ValueError("Path to CSV for input data must be provided through --csv or " 132 | "'load_data' configuration must exist in config file") 133 | 134 | tmo = train_model(df, **config["train_model"]) 135 | 136 | if args.save is not None: 137 | with open(args.save, "wb") as f: 138 | pickle.dump(tmo, f) 139 | logger.info("Trained model object saved to %s", args.save) 140 | 141 | 142 | if __name__ == '__main__': 143 | parser = argparse.ArgumentParser(description="Train model") 144 | parser.add_argument('--config', help='path to yaml file with configurations') 145 | parser.add_argumemt('--csv', default=None, help="Path to CSV for input to model training") 146 | parser.add_argument('--save', default=None, help='Path to where the dataset should be saved to (optional') 147 | 148 | args = parser.parse_args() 149 | 150 | run_training(args) 151 | 152 | -------------------------------------------------------------------------------- /docs/build/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Search — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • Search
  • 130 | 131 | 132 |
  • 133 | 134 | 135 | 136 |
  • 137 | 138 |
139 | 140 | 141 |
142 |
143 |
144 |
145 | 146 | 154 | 155 | 156 |
157 | 158 |
159 | 160 |
161 | 162 |
163 |
164 | 165 | 166 |
167 | 168 |
169 |

170 | © Copyright 2019 171 | 172 |

173 |
174 | Built with Sphinx using a theme provided by Read the Docs. 175 | 176 |
177 | 178 |
179 |
180 | 181 |
182 | 183 |
184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 208 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /docs/build/html/model/modules.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | reproducible-model — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • reproducible-model
  • 130 | 131 | 132 |
  • 133 | 134 | 135 | View page source 136 | 137 | 138 |
  • 139 | 140 |
141 | 142 | 143 |
144 |
145 |
146 |
147 | 148 |
149 |

reproducible-model

150 |
151 | 155 |
156 |
157 | 158 | 159 |
160 | 161 |
162 |
163 | 164 | 165 |
166 | 167 |
168 |

169 | © Copyright 2019 170 | 171 |

172 |
173 | Built with Sphinx using a theme provided by Read the Docs. 174 | 175 |
176 | 177 |
178 |
179 | 180 |
181 | 182 |
183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /docs/build/html/model/run.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | run module — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 |
42 | 43 | 44 | 92 | 93 |
94 | 95 | 96 | 102 | 103 | 104 |
105 | 106 |
107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 |
125 | 126 |
    127 | 128 |
  • Docs »
  • 129 | 130 |
  • run module
  • 131 | 132 | 133 |
  • 134 | 135 | 136 | View page source 137 | 138 | 139 |
  • 140 | 141 |
142 | 143 | 144 |
145 |
146 |
147 |
148 | 149 |
150 |

run module

151 |
152 | 153 | 154 |
155 | 156 |
157 |
158 | 159 | 165 | 166 | 167 |
168 | 169 |
170 |

171 | © Copyright 2019 172 | 173 |

174 |
175 | Built with Sphinx using a theme provided by Read the Docs. 176 | 177 |
178 | 179 |
180 |
181 | 182 |
183 | 184 |
185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /docs/build/html/model/test/modules.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | test — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • test
  • 130 | 131 | 132 |
  • 133 | 134 | 135 | View page source 136 | 137 | 138 |
  • 139 | 140 |
141 | 142 | 143 |
144 |
145 |
146 |
147 | 148 |
149 |

test

150 |
151 | 159 |
160 |
161 | 162 | 163 |
164 | 165 |
166 |
167 | 168 | 169 |
170 | 171 |
172 |

173 | © Copyright 2019 174 | 175 |

176 |
177 | Built with Sphinx using a theme provided by Read the Docs. 178 | 179 |
180 | 181 |
182 |
183 | 184 |
185 | 186 |
187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /src/load_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | import yaml 4 | import os 5 | import subprocess 6 | import re 7 | import boto3 8 | import sqlalchemy 9 | import pandas as pd 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def ifin(param, dictionary, alt=None): 15 | 16 | assert type(dictionary) == dict 17 | if param in dictionary: 18 | return dictionary[param] 19 | else: 20 | return alt 21 | 22 | 23 | def copy_file_from_s3(path, s3path, s3=None): 24 | if s3 is None: 25 | s3 = boto3.resource("s3") 26 | regex = r"s3://([\w._-]+)/([\w./_-]+)" 27 | 28 | m = re.match(regex, s3path) 29 | s3bucket = m.group(1) 30 | s3path = m.group(2) 31 | 32 | bucket = s3.Bucket(s3bucket) 33 | 34 | s3path = os.path.join(s3path, path) 35 | 36 | bucket.download_file(s3path, path) 37 | 38 | 39 | def copy_files_from_s3(s3path, destpath): 40 | s3path = [s3path] if type(s3path) != list else s3path 41 | destpath = [destpath] if type(destpath) != list else destpath 42 | 43 | assert len(s3path) == len(destpath) 44 | 45 | for s3p, destp in zip(s3path, destpath): 46 | command = "aws s3 --recursive cp {s3path} {destpath}".format(s3path=s3p, destpath=destp) 47 | subprocess.check_output(command.split()) 48 | 49 | 50 | def format_sql(sql, replace_sqlvar=None, replace_var=None, python=True): 51 | if replace_sqlvar is not None: 52 | for var in replace_sqlvar: 53 | sql = sql.replace("${var:%s}" % var, replace_sqlvar[var]) 54 | 55 | if replace_var is not None: 56 | sql = sql.format(**replace_var) 57 | 58 | if python: 59 | sql = sql.replace("%", "%%") 60 | 61 | return sql 62 | 63 | 64 | def load_sql(path_to_sql, load_comments=False, replace_sqlvar=None, replace_var=None, python=True): 65 | sql = "" 66 | with open(path_to_sql, "r") as f: 67 | for line in f.readlines(): 68 | if not load_comments and not line.startswith("--"): 69 | sql += line 70 | 71 | sql = format_sql(replace_sqlvar=replace_sqlvar, replace_var=replace_var, python=python) 72 | 73 | return sql 74 | 75 | 76 | def create_connection(host='127.0.0.1', database="", sqltype="mysql+pymysql", port=3308, 77 | user_env="amazonRDS_user", password_env="amazonRDS_pw", 78 | username=None, password=None, dbconfig=None): 79 | 80 | if dbconfig is not None: 81 | with open(args.dbconfig, "r") as f: 82 | db = yaml.load(f) 83 | 84 | host = db["host"] 85 | database = ifin("dbname", db, "") 86 | sqltype = ifin("type", db, sqltype) 87 | port = db["port"] 88 | user_env = db["user_env"] 89 | password_env = db["password_env"] 90 | 91 | username = os.environ.get(user_env) if username is None else username 92 | password = os.environ.get(password_env) if password is None else password 93 | 94 | engine_string = "{sqltype}://{username}:{password}@{host}:{port}/{database}" 95 | engine_string = engine_string.format(sqltype=sqltype, username=username, 96 | password=password, host=host, port=port, database=database) 97 | conn = sqlalchemy.create_engine(engine_string) 98 | 99 | return conn 100 | 101 | 102 | def query_data(sql=None, path_to_sql=None, dbconfig="config/dbconfig.yml", conn=None, 103 | load_comments=False, replace_sqlvar=None, replace_var=None, python=True): 104 | if sql is None and path_to_sql is not None: 105 | sql = load_sql(path_to_sql, 106 | load_comments=load_comments, 107 | replace_sqlvar=replace_sqlvar, 108 | replace_var=replace_var, 109 | python=python) 110 | elif sql is not None: 111 | sql = format_sql(sql, 112 | replace_sqlvar=replace_sqlvar, 113 | replace_var=replace_var, 114 | python=python) 115 | else: 116 | raise ValueError("Only sql or path_to_sql should be provided") 117 | 118 | if conn is None: 119 | conn = create_connection(dbconfig=dbconfig) 120 | 121 | df = pd.read_sql(sql, con=conn) 122 | 123 | logger.info("Dataframe with %i rows loaded from query", len(df)) 124 | 125 | return df 126 | 127 | 128 | def read_csv(path, **kwargs): 129 | 130 | if "usecols" in kwargs: 131 | logging.debug("Columns being read from csv: %s", ",".join(kwargs["usecols"])) 132 | df = pd.read_csv(path, **kwargs) 133 | 134 | logger.info("Dataframe with %i rows loaded from %s", len(df), path) 135 | 136 | return df 137 | 138 | 139 | def load_data(how, query=None, csv=None): 140 | """ 141 | 142 | Args: 143 | how: How to load data. Options are one of remaining keyword args (e.g. query, read_csv) 144 | query: Dictionary of inputs to `query_data()`, None if how="csv" 145 | csv: Dictionary of inputs to `read_csv()`, None if how="query" 146 | 147 | Returns: Pandas dataframe 148 | 149 | """ 150 | 151 | if how.lower() == "query": 152 | query = {} if query is None else query 153 | data = query_data(**query) 154 | elif how.lower() == "csv": 155 | if csv is None or "path" not in csv: 156 | raise ValueError("csv['path'] must exist be provided") 157 | data = read_csv(**csv) 158 | else: 159 | raise ValueError("how must be given as 'query' or 'csv'") 160 | return data 161 | 162 | 163 | if __name__ == '__main__': 164 | parser = argparse.ArgumentParser(description="") 165 | parser.add_argument('--config', help='path to yaml file with configurations') 166 | 167 | parser.add_argument('--save', default=None, help='Path to where the dataset should be saved to (optional') 168 | 169 | args = parser.parse_args() 170 | 171 | with open(args.config, "r") as f: 172 | config = yaml.load(f) 173 | 174 | df = load_data(**config["load_data"]) 175 | 176 | if args.save is not None: 177 | df.to_csv(args.save) 178 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | 18 | import sphinx_rtd_theme 19 | sys.path.insert(0, os.path.abspath('../..')) 20 | sys.path.insert(0, os.path.abspath('../')) 21 | sys.path.insert(0, os.path.abspath('../src')) 22 | 23 | # -- Project information ----------------------------------------------------- 24 | 25 | project = 'reproducible-model' 26 | copyright = '2019' 27 | author = 'Reproducibility expert' 28 | 29 | # The short X.Y version 30 | version = '' 31 | # The full version, including alpha/beta/rc tags 32 | release = '0.1' 33 | 34 | 35 | # -- General configuration --------------------------------------------------- 36 | 37 | # If your documentation needs a minimal Sphinx version, state it here. 38 | # 39 | # needs_sphinx = '1.0' 40 | 41 | # Add any Sphinx extension module names here, as strings. They can be 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 43 | # ones. 44 | extensions = [ 45 | 'sphinx.ext.autodoc', 46 | 'sphinx.ext.intersphinx', 47 | 'sphinx.ext.napoleon', 48 | 'sphinx.ext.todo', 49 | 'sphinx.ext.mathjax', 50 | 'sphinx.ext.ifconfig', 51 | 'sphinx.ext.viewcode', 52 | 'sphinx.ext.githubpages', 53 | ] 54 | 55 | # Add any paths that contain templates here, relative to this directory. 56 | templates_path = ['_templates'] 57 | 58 | # The suffix(es) of source filenames. 59 | # You can specify multiple suffix as a list of string: 60 | # 61 | # source_suffix = ['.rst', '.md'] 62 | source_suffix = '.rst' 63 | 64 | # The master toctree document. 65 | master_doc = 'index' 66 | 67 | # The language for content autogenerated by Sphinx. Refer to documentation 68 | # for a list of supported languages. 69 | # 70 | # This is also used if you do content translation via gettext catalogs. 71 | # Usually you set "language" from the command line for these cases. 72 | language = None 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | # This pattern also affects html_static_path and html_extra_path. 77 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 78 | 79 | # The name of the Pygments (syntax highlighting) style to use. 80 | pygments_style = None 81 | 82 | 83 | # -- Options for HTML output ------------------------------------------------- 84 | 85 | # The theme to use for HTML and HTML Help pages. See the documentation for 86 | # a list of builtin themes. 87 | # 88 | html_theme = 'sphinx_rtd_theme' 89 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 90 | 91 | # Theme options are theme-specific and customize the look and feel of a theme 92 | # further. For a list of options available for each theme, see the 93 | # documentation. 94 | # 95 | # html_theme_options = {} 96 | 97 | # Add any paths that contain custom static files (such as style sheets) here, 98 | # relative to this directory. They are copied after the builtin static files, 99 | # so a file named "default.css" will overwrite the builtin "default.css". 100 | html_static_path = ['_static'] 101 | 102 | # Custom sidebar templates, must be a dictionary that maps document names 103 | # to template names. 104 | # 105 | # The default sidebars (for documents that don't match any pattern) are 106 | # defined by theme itself. Builtin themes are using these templates by 107 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 108 | # 'searchbox.html']``. 109 | # 110 | # html_sidebars = {} 111 | 112 | 113 | # -- Options for HTMLHelp output --------------------------------------------- 114 | 115 | # Output file base name for HTML help builder. 116 | htmlhelp_basename = 'reproducible-modeldoc' 117 | 118 | 119 | # -- Options for LaTeX output ------------------------------------------------ 120 | 121 | latex_elements = { 122 | # The paper size ('letterpaper' or 'a4paper'). 123 | # 124 | # 'papersize': 'letterpaper', 125 | 126 | # The font size ('10pt', '11pt' or '12pt'). 127 | # 128 | # 'pointsize': '10pt', 129 | 130 | # Additional stuff for the LaTeX preamble. 131 | # 132 | # 'preamble': '', 133 | 134 | # Latex figure (float) alignment 135 | # 136 | # 'figure_align': 'htbp', 137 | } 138 | 139 | # Grouping the document tree into LaTeX files. List of tuples 140 | # (source start file, target name, title, 141 | # author, documentclass [howto, manual, or own class]). 142 | latex_documents = [ 143 | (master_doc, 'reproducible-model.tex', 'reproducible-model Documentation', 144 | 'Chloe Mawer', 'manual'), 145 | ] 146 | 147 | 148 | # -- Options for manual page output ------------------------------------------ 149 | 150 | # One entry per manual page. List of tuples 151 | # (source start file, name, description, authors, manual section). 152 | man_pages = [ 153 | (master_doc, 'reproducible-model', 'reproducible-model Documentation', 154 | [author], 1) 155 | ] 156 | 157 | 158 | # -- Options for Texinfo output ---------------------------------------------- 159 | 160 | # Grouping the document tree into Texinfo files. List of tuples 161 | # (source start file, target name, title, author, 162 | # dir menu entry, description, category) 163 | texinfo_documents = [ 164 | (master_doc, 'reproducible-model', 'reproducible-model Documentation', 165 | author, 'reproducible-model', 'One line description of project.', 166 | 'Miscellaneous'), 167 | ] 168 | 169 | 170 | # -- Options for Epub output ------------------------------------------------- 171 | 172 | # Bibliographic Dublin Core info. 173 | epub_title = project 174 | 175 | # The unique identifier of the text. This can be a ISBN number 176 | # or the project homepage. 177 | # 178 | # epub_identifier = '' 179 | 180 | # A unique identification for the text. 181 | # 182 | # epub_uid = '' 183 | 184 | # A list of files that should not be packed into the epub file. 185 | epub_exclude_files = ['search.html'] 186 | 187 | 188 | # -- Extension configuration ------------------------------------------------- 189 | 190 | # -- Options for intersphinx extension --------------------------------------- 191 | 192 | # Example configuration for intersphinx: refer to the Python standard library. 193 | intersphinx_mapping = {'https://docs.python.org/': None} 194 | 195 | # -- Options for todo extension ---------------------------------------------- 196 | 197 | # If true, `todo` and `todoList` produce output, else they produce nothing. 198 | todo_include_todos = True 199 | -------------------------------------------------------------------------------- /docs/build/html/model/src/modules.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | src — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • src
  • 130 | 131 | 132 |
  • 133 | 134 | 135 | View page source 136 | 137 | 138 |
  • 139 | 140 |
141 | 142 | 143 |
144 |
145 |
146 |
147 | 148 |
149 |

src

150 | 174 |
175 | 176 | 177 |
178 | 179 |
180 |
181 | 182 | 183 |
184 | 185 |
186 |

187 | © Copyright 2019 188 | 189 |

190 |
191 | Built with Sphinx using a theme provided by Read the Docs. 192 | 193 |
194 | 195 |
196 |
197 | 198 |
199 | 200 |
201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 224 | 225 | 226 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Example of a reproducible machine learning model 2 | 3 | See [https://cmawer.github.io/reproducible-model](https://cmawer.github.io/reproducible-model) for the lightening talk I gave at the Women in Machine Learning and Data Science meetup on March 26, 2019 at Stitch Fix on the ingredients of a reproducible machine learning model. 4 | 5 | 6 | ## Repo structure 7 | 8 | ``` 9 | ├── README.md <- You are here 10 | │ 11 | ├── config <- Directory for yaml configuration files for model training, scoring, etc 12 | │ ├── logging/ <- Configuration of python loggers 13 | │ 14 | ├── data <- Folder that contains data used or generated. Only the external/ and sample/ subdirectories are tracked by git. 15 | │ ├── archive/ <- Place to put archive data is no longer usabled. Not synced with git. 16 | │ ├── external/ <- External data sources, will be synced with git 17 | │ ├── sample/ <- Sample data used for code development and testing, will be synced with git 18 | │ 19 | ├── docs <- A default Sphinx project; see sphinx-doc.org for details. 20 | │ 21 | ├── figures <- Generated graphics and figures to be used in reporting. 22 | │ 23 | ├── models <- Trained model objects (TMOs), model predictions, and/or model summaries 24 | │ ├── archive <- No longer current models. This directory is included in the .gitignore and is not tracked by git 25 | │ 26 | ├── notebooks 27 | │ ├── develop <- Current notebooks being used in development. 28 | │ ├── deliver <- Notebooks shared with others. 29 | │ ├── archive <- Develop notebooks no longer being used. 30 | │ ├── template.ipynb <- Template notebook for analysis with useful imports and helper functions. 31 | │ 32 | ├── src <- Source data for the sybil project 33 | │ ├── archive/ <- No longer current scripts. 34 | │ ├── helpers/ <- Helper scripts used in main src files 35 | │ ├── sql/ <- SQL source code 36 | │ ├── ingest_data.py <- Script for ingesting data from different sources 37 | │ ├── generate_features.py <- Script for cleaning and transforming data and generating features used for use in training and scoring. 38 | │ ├── train_model.py <- Script for training machine learning model(s) 39 | │ ├── score_model.py <- Script for scoring new predictions using a trained model. 40 | │ ├── postprocess.py <- Script for postprocessing predictions and model results 41 | │ ├── evaluate_model.py <- Script for evaluating model performance 42 | │ 43 | ├── test <- Files necessary for running model tests (see documentation below) 44 | │ ├── true <- Directory containing sources of truth for what results produced in each test should look like 45 | │ ├── test <- Directory where artifacts and results of tests are saved to be compared to the sources of truth. Only .gitkeep in this directory should be synced to Github 46 | │ ├── test.py <- Runs the tests defined in test_config.yml and then compares the produced artifacts/results with those defined as expected in the true/ directory 47 | │ ├── test_config.yml <- Configures the set of tests for comparing artifacts and results. Currently does not include unit testing or other traditional software testing 48 | │ 49 | ├── run.py <- Simplifies the execution of one or more of the src scripts 50 | ├── requirements.txt <- Python package dependencies 51 | ``` 52 | This project structure was partially influenced by the [Cookiecutter Data Science project](https://drivendata.github.io/cookiecutter-data-science/). 53 | 54 | ## Environment setup 55 | 56 | The `requirements.txt` file contains the packages required to run the model code. An environment can be set up in two ways. See bottom of README for exploratory data analysis environment setup. 57 | 58 | ### With `virtualenv` and `pip` 59 | 60 | ```bash 61 | pip install virtualenv 62 | 63 | virtualenv reprod 64 | 65 | source reprod/bin/activate 66 | 67 | pip -r requirements.txt 68 | 69 | ``` 70 | 71 | ## Reproducibility testing 72 | Check that results and code behavior are what is expected 73 | 74 | 75 | From the repo root directory, run: 76 | 77 | ```python 78 | python run.py test 79 | ``` 80 | to run tests to check that model code produces expected output. See `test/README.md` for more info. 81 | 82 | 83 | ## Environment setup for exploratory analysis 84 | 85 | `environment.yml` contains the specifications for an environment that will get you started for exploratory data analysis. 86 | It also contains the packages imported in the template Jupyer notebook, `notebooks/template.ipynb` (see more info below). 87 | 88 | ### Create conda environment 89 | Create conda environment with packages in `environment.yml`: 90 | 91 | `conda env create -f environment.yml` 92 | 93 | and activate: 94 | 95 | `source activate eda3` 96 | 97 | ### Complete Jupyter extensions install 98 | After creating and activating your conda environment, run from the commandline: 99 | 100 | `jupyter contrib nbextension install --user` 101 | 102 | Next, start a Jupyter notebook server by running: 103 | 104 | `jupyter notebook` 105 | 106 | You can then go to the extension configurator at [http://localhost:8888/nbextensions/](http://localhost:8888/nbextensions/) and enable your desired extensions. 107 | 108 | #### Collapsible headings extension 109 | I recommend enabling the `Collapsible Headings` extension so that the `Imports and setup` section on the template notebook (see section below), which is quite long, can be minimized, as well as other heading sections when desired. To maintain this collapsibility when exporting to html, run: 110 | 111 | `jupyter nbconvert --to html_ch FILE.ipynb` 112 | 113 | #### Table of Contents (2) 114 | Enabling the `Table of Contents (2)` extension automatically creates a table of contents based on your notebook headings, which can be placed at the top of the notebook or at the side (which is nice for navigation in a long notebook). 115 | 116 | To keep this table of contents when you export to html, add to the command in the prior section and run: 117 | 118 | `jupyter nbconvert --to html_ch FILE.ipynb --template toc2` 119 | 120 | #### Other recommended extensions 121 | * `Code prettify`: when enabled, you can press the little hammer icon at the top of the page and it will make "pretty" ([PEP8](https://www.python.org/dev/peps/pep-0008/) compliant) the code in the current cell. This is really nice when you're writing a long command. 122 | * `Execute time`: when enabled, this adds to the bottom of a code cell the time at which the cell was executed and how long it took. This is nice for traceability purposes and making sure code was run in order when looking at old code. It also helps when code takes a while to run and you leave it and come back and want to know how long it took. 123 | * `Ruler`: when enabled, this adds a vertical line to code cells to denote the distance of 76 characters, which is the maximum line length suggested by ([PEP8](https://www.python.org/dev/peps/pep-0008/)). 124 | 125 | There are a lot more options so take a look through! 126 | 127 | ### Template Jupyter Notebook 128 | `notebooks/template.ipynb` is a template Jupyter notebook that includes: 129 | * A set of regularly used package imports 130 | * Code that helps reference other parts of the directory structure (e.g. `dataplus()` prepends the data directory for data import) 131 | * Code that sets up a SQLAlchemy connection to MySQL 132 | * Headings for stating notebook objectives, guiding questions, conclusions 133 | * A pretty Lineage logo at the top 134 | 135 | -------------------------------------------------------------------------------- /docs/build/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Welcome to reproducible-model’s documentation! — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 |
42 | 43 | 44 | 92 | 93 |
94 | 95 | 96 | 102 | 103 | 104 |
105 | 106 |
107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 |
125 | 126 |
    127 | 128 |
  • Docs »
  • 129 | 130 |
  • Welcome to reproducible-model’s documentation!
  • 131 | 132 | 133 |
  • 134 | 135 | 136 | View page source 137 | 138 | 139 |
  • 140 | 141 |
142 | 143 | 144 |
145 |
146 |
147 |
148 | 149 |
150 |

Welcome to reproducible-model’s documentation!

151 | 175 |
176 |
177 |

Indices and tables

178 | 183 |
184 | 185 | 186 |
187 | 188 |
189 |
190 | 191 | 197 | 198 | 199 |
200 | 201 |
202 |

203 | © Copyright 2019 204 | 205 |

206 |
207 | Built with Sphinx using a theme provided by Read the Docs. 208 | 209 |
210 | 211 |
212 |
213 | 214 |
215 | 216 |
217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /docs/build/html/model/test/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | test package — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 |
43 | 44 | 45 | 98 | 99 |
100 | 101 | 102 | 108 | 109 | 110 |
111 | 112 |
113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 |
131 | 132 |
    133 | 134 |
  • Docs »
  • 135 | 136 |
  • test package
  • 137 | 138 | 139 |
  • 140 | 141 | 142 | View page source 143 | 144 | 145 |
  • 146 | 147 |
148 | 149 | 150 |
151 |
152 |
153 |
154 | 155 |
156 |

test package

157 |
158 |

Submodules

159 |
160 |
161 |

test.test module

162 |
163 |
164 | test.test.compare_dict(dicta, dictb)[source]
165 |
166 | 167 |
168 |
169 | test.test.open_dictlike_file(fname)[source]
170 |
171 | 172 |
173 |
174 | test.test.run_tests(args=None, config_path=None, logger=None)[source]
175 |
176 | 177 |
178 |
179 |

Module contents

180 |
181 |
182 | 183 | 184 |
185 | 186 |
187 |
188 | 189 | 197 | 198 | 199 |
200 | 201 |
202 |

203 | © Copyright 2019 204 | 205 |

206 |
207 | Built with Sphinx using a theme provided by Read the Docs. 208 | 209 |
210 | 211 |
212 |
213 | 214 |
215 | 216 |
217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /docs/build/html/py-modindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Python Module Index — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 |
44 | 45 | 46 | 94 | 95 |
96 | 97 | 98 | 104 | 105 | 106 |
107 | 108 |
109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 |
127 | 128 |
    129 | 130 |
  • Docs »
  • 131 | 132 |
  • Python Module Index
  • 133 | 134 | 135 |
  • 136 | 137 |
  • 138 | 139 |
140 | 141 | 142 |
143 |
144 |
145 |
146 | 147 | 148 |

Python Module Index

149 | 150 |
151 | c | 152 | r | 153 | s | 154 | t 155 |
156 | 157 | 158 | 159 | 161 | 162 | 163 | 166 | 167 | 169 | 170 | 171 | 174 | 175 | 177 | 178 | 180 | 183 | 184 | 185 | 188 | 189 | 190 | 193 | 194 | 195 | 198 | 199 | 200 | 203 | 204 | 205 | 208 | 209 | 210 | 213 | 214 | 215 | 218 | 219 | 220 | 223 | 224 | 226 | 227 | 229 | 232 | 233 | 234 | 237 |
 
160 | c
164 | conf 165 |
 
168 | r
172 | run 173 |
 
176 | s
181 | src 182 |
    186 | src.evaluate_model 187 |
    191 | src.generate_features 192 |
    196 | src.helpers 197 |
    201 | src.helpers.helpers 202 |
    206 | src.load_data 207 |
    211 | src.postprocess 212 |
    216 | src.score_model 217 |
    221 | src.train_model 222 |
 
225 | t
230 | test 231 |
    235 | test.test 236 |
238 | 239 | 240 |
241 | 242 |
243 |
244 | 245 | 246 |
247 | 248 |
249 |

250 | © Copyright 2019 251 | 252 |

253 |
254 | Built with Sphinx using a theme provided by Read the Docs. 255 | 256 |
257 | 258 |
259 |
260 | 261 |
262 | 263 |
264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 287 | 288 | 289 | -------------------------------------------------------------------------------- /docs/build/html/_modules/src/helpers/helpers.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | src.helpers.helpers — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • Module code »
  • 130 | 131 |
  • src.helpers.helpers
  • 132 | 133 | 134 |
  • 135 | 136 |
  • 137 | 138 |
139 | 140 | 141 |
142 |
143 |
144 |
145 | 146 |

Source code for src.helpers.helpers

147 | import datetime
148 | 
149 | 
150 | 
[docs]def fillin_kwargs(keywords, kwargs): 151 | keywords = [keywords] if type(keywords) != list else keywords 152 | for keyword in keywords: 153 | if keyword not in kwargs: 154 | kwargs[keyword] = {} 155 | return kwargs
156 | 157 |
[docs]class Timer: 158 | def __init__(self, function, logger): 159 | self.logger = logger 160 | self.function = function 161 | 162 | def __enter__(self): 163 | self.start = datetime.datetime.now() 164 | 165 | return self 166 | 167 | def __exit__(self, *args): 168 | self.end = datetime.datetime.now() 169 | self.interval = self.end - self.start 170 | self.logger.info("%s took %0.2f seconds", self.function, self.interval.total_seconds())
171 |
172 | 173 |
174 | 175 |
176 |
177 | 178 | 179 |
180 | 181 |
182 |

183 | © Copyright 2019 184 | 185 |

186 |
187 | Built with Sphinx using a theme provided by Read the Docs. 188 | 189 |
190 | 191 |
192 |
193 | 194 |
195 | 196 |
197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /test/true/example-boston-validate-features.csv: -------------------------------------------------------------------------------- 1 | ,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,TAX,PTRATIO,RAD_dummy_0,RAD_dummy_1,RAD_dummy_2,RAD_dummy_3,RAD_dummy_4,RAD_dummy_5,RAD_dummy_6,RAD_dummy_7,RAD_dummy_8 2 | 489,0.0,27.74,0,0.609,5.414,98.3,1.7554,711,20.1,0,0,0,0,1,0,0,0,0 3 | 186,0.0,2.46,0,0.488,7.831,53.6,3.1992,193,17.8,0,0,1,0,0,0,0,0,0 4 | 226,0.0,6.2,0,0.504,8.04,86.5,3.2157,307,17.4,0,0,0,0,0,1,0,0,0 5 | 84,0.0,4.49,0,0.449,6.3889999999999985,48.0,4.7794,247,18.5,0,0,1,0,0,0,0,0,0 6 | 135,0.0,21.89,0,0.624,6.335,98.2,2.1107,437,21.2,0,0,0,0,1,0,0,0,0 7 | 248,22.0,5.86,0,0.431,6.433,49.1,7.8265,330,19.1,0,0,0,0,0,0,0,1,0 8 | 307,33.0,2.18,0,0.472,6.849,70.3,3.1827,222,18.4,0,0,0,0,0,0,0,1,0 9 | 203,95.0,2.68,0,0.4161,7.853,33.2,5.118,224,14.7,0,0,0,0,1,0,0,0,0 10 | 118,0.0,10.01,0,0.547,5.872000000000001,73.1,2.4775,432,17.8,0,0,0,0,0,0,1,0,0 11 | 453,0.0,18.1,0,0.713,7.393,99.3,2.4527,666,20.2,0,0,0,0,0,0,0,0,1 12 | 380,0.0,18.1,0,0.6709999999999999,6.968,91.9,1.4165,666,20.2,0,0,0,0,0,0,0,0,1 13 | 138,0.0,21.89,0,0.624,5.857,98.2,1.6686,437,21.2,0,0,0,0,1,0,0,0,0 14 | 267,20.0,3.97,0,0.575,8.297,67.0,2.4216,264,13.0,0,0,0,1,0,0,0,0,0 15 | 184,0.0,2.46,0,0.488,5.604,89.8,2.9879,193,17.8,0,0,1,0,0,0,0,0,0 16 | 396,0.0,18.1,0,0.693,6.405,96.0,1.6768,666,20.2,0,0,0,0,0,0,0,0,1 17 | 492,0.0,27.74,0,0.609,5.983,83.5,2.1099,711,20.1,0,0,0,0,1,0,0,0,0 18 | 417,0.0,18.1,0,0.679,5.3039999999999985,89.1,1.6475,666,20.2,0,0,0,0,0,0,0,0,1 19 | 201,82.5,2.03,0,0.415,6.162000000000001,38.4,6.27,348,14.7,0,1,0,0,0,0,0,0,0 20 | 79,0.0,12.83,0,0.4370000000000001,5.874,36.6,4.5026,398,18.7,0,0,0,1,0,0,0,0,0 21 | 464,0.0,18.1,0,0.655,6.209,65.4,2.9634,666,20.2,0,0,0,0,0,0,0,0,1 22 | 411,0.0,18.1,0,0.597,6.657,100.0,1.5275,666,20.2,0,0,0,0,0,0,0,0,1 23 | 308,0.0,9.9,0,0.544,6.635,82.5,3.3175,304,18.4,0,0,0,0,1,0,0,0,0 24 | 402,0.0,18.1,0,0.693,6.404,100.0,1.639,666,20.2,0,0,0,0,0,0,0,0,1 25 | 278,40.0,6.41,0,0.447,6.482,32.1,4.1403,254,17.6,0,0,0,0,1,0,0,0,0 26 | 162,0.0,19.58,1,0.605,7.802,98.2,2.0407,403,14.7,0,0,0,1,0,0,0,0,0 27 | 78,0.0,12.83,0,0.4370000000000001,6.232,53.7,5.0141,398,18.7,0,0,0,1,0,0,0,0,0 28 | 165,0.0,19.58,0,0.605,6.101,93.0,2.2834,403,14.7,0,0,0,1,0,0,0,0,0 29 | 148,0.0,19.58,0,0.871,5.186,93.8,1.5296,403,14.7,0,0,0,1,0,0,0,0,0 30 | 393,0.0,18.1,0,0.693,6.193,92.6,1.7912,666,20.2,0,0,0,0,0,0,0,0,1 31 | 23,0.0,8.14,0,0.5379999999999999,5.813,100.0,4.0952,307,21.0,0,0,0,0,1,0,0,0,0 32 | 428,0.0,18.1,0,0.679,6.193,78.1,1.9356,666,20.2,0,0,0,0,0,0,0,0,1 33 | 77,0.0,12.83,0,0.4370000000000001,6.14,45.8,4.0905,398,18.7,0,0,0,1,0,0,0,0,0 34 | 266,20.0,3.97,0,0.647,7.0139999999999985,84.6,2.1329,264,13.0,0,0,0,1,0,0,0,0,0 35 | 46,0.0,6.91,0,0.448,5.7860000000000005,33.3,5.1004,233,17.9,0,0,1,0,0,0,0,0,0 36 | 406,0.0,18.1,0,0.659,4.138,100.0,1.1781,666,20.2,0,0,0,0,0,0,0,0,1 37 | 205,0.0,10.59,0,0.489,5.891,22.3,3.9454,277,18.6,0,0,0,0,1,0,0,0,0 38 | 495,0.0,9.69,0,0.585,5.67,28.8,2.7986,391,19.2,0,0,0,0,0,0,1,0,0 39 | 202,82.5,2.03,0,0.415,7.61,15.7,6.27,348,14.7,0,1,0,0,0,0,0,0,0 40 | 13,0.0,8.14,0,0.5379999999999999,5.949,61.8,4.7075,307,21.0,0,0,0,0,1,0,0,0,0 41 | 352,60.0,1.69,0,0.411,5.884,18.5,10.7103,411,18.3,0,0,0,0,1,0,0,0,0 42 | 3,0.0,2.18,0,0.458,6.997999999999998,45.8,6.0622,222,18.7,0,0,1,0,0,0,0,0,0 43 | 64,17.5,1.38,0,0.4161,7.104,59.5,9.2229,216,18.6,0,0,1,0,0,0,0,0,0 44 | 210,0.0,10.59,1,0.489,5.96,92.1,3.8771,277,18.6,0,0,0,0,1,0,0,0,0 45 | 299,70.0,2.24,0,0.4,7.041,10.0,7.8278,358,14.8,0,0,0,1,0,0,0,0,0 46 | 376,0.0,18.1,0,0.6709999999999999,6.649,93.3,1.3449,666,20.2,0,0,0,0,0,0,0,0,1 47 | 443,0.0,18.1,0,0.74,6.485,100.0,1.9784,666,20.2,0,0,0,0,0,0,0,0,1 48 | 367,0.0,18.1,0,0.631,3.863,100.0,1.5106,666,20.2,0,0,0,0,0,0,0,0,1 49 | 22,0.0,8.14,0,0.5379999999999999,6.142,91.7,3.9769,307,21.0,0,0,0,0,1,0,0,0,0 50 | 19,0.0,8.14,0,0.5379999999999999,5.727,69.5,3.7965,307,21.0,0,0,0,0,1,0,0,0,0 51 | 81,25.0,4.86,0,0.426,6.619,70.4,5.4007,281,19.0,0,0,0,0,1,0,0,0,0 52 | 400,0.0,18.1,0,0.693,5.987,100.0,1.5888,666,20.2,0,0,0,0,0,0,0,0,1 53 | 345,0.0,4.39,0,0.442,6.0139999999999985,48.5,8.0136,352,18.8,0,0,1,0,0,0,0,0,0 54 | 441,0.0,18.1,0,0.74,6.4060000000000015,97.2,2.0651,666,20.2,0,0,0,0,0,0,0,0,1 55 | 359,0.0,18.1,0,0.77,6.112,81.3,2.5091,666,20.2,0,0,0,0,0,0,0,0,1 56 | 502,0.0,11.93,0,0.573,6.12,76.7,2.2875,273,21.0,1,0,0,0,0,0,0,0,0 57 | 275,40.0,6.41,0,0.447,6.854,42.8,4.2673,254,17.6,0,0,0,0,1,0,0,0,0 58 | 108,0.0,8.56,0,0.52,6.474,97.1,2.4329,384,20.9,0,0,0,1,0,0,0,0,0 59 | 6,12.5,7.87,0,0.524,6.0120000000000005,66.6,5.5605,311,15.2,0,0,0,1,0,0,0,0,0 60 | 480,0.0,18.1,0,0.532,6.242000000000001,64.7,3.4242,666,20.2,0,0,0,0,0,0,0,0,1 61 | 460,0.0,18.1,0,0.713,6.7010000000000005,90.0,2.5975,666,20.2,0,0,0,0,0,0,0,0,1 62 | 200,95.0,1.47,0,0.403,7.135,13.9,7.6534,402,17.0,0,0,1,0,0,0,0,0,0 63 | 139,0.0,21.89,0,0.624,6.151,97.9,1.6687,437,21.2,0,0,0,0,1,0,0,0,0 64 | 220,0.0,6.2,1,0.507,6.9510000000000005,88.5,2.8617,307,17.4,0,0,0,0,0,1,0,0,0 65 | 188,45.0,3.44,0,0.4370000000000001,6.556,29.1,4.5667,398,15.2,0,0,0,1,0,0,0,0,0 66 | 270,20.0,6.96,0,0.464,5.856,42.1,4.4289999999999985,223,18.6,0,0,1,0,0,0,0,0,0 67 | 446,0.0,18.1,0,0.74,6.341,96.4,2.072,666,20.2,0,0,0,0,0,0,0,0,1 68 | 183,0.0,2.46,0,0.488,6.563,95.6,2.847,193,17.8,0,0,1,0,0,0,0,0,0 69 | 467,0.0,18.1,0,0.584,6.002999999999999,94.5,2.5403,666,20.2,0,0,0,0,0,0,0,0,1 70 | 131,0.0,21.89,0,0.624,6.3260000000000005,97.7,2.271,437,21.2,0,0,0,0,1,0,0,0,0 71 | 269,20.0,6.96,1,0.464,5.92,61.5,3.9175,223,18.6,0,0,1,0,0,0,0,0,0 72 | 31,0.0,8.14,0,0.5379999999999999,6.072,100.0,4.175,307,21.0,0,0,0,0,1,0,0,0,0 73 | 413,0.0,18.1,0,0.597,5.155,100.0,1.5894,666,20.2,0,0,0,0,0,0,0,0,1 74 | 462,0.0,18.1,0,0.713,6.317,83.0,2.7344,666,20.2,0,0,0,0,0,0,0,0,1 75 | 189,45.0,3.44,0,0.4370000000000001,7.185,38.9,4.5667,398,15.2,0,0,0,1,0,0,0,0,0 76 | 310,0.0,9.9,0,0.544,4.973,37.8,2.5194,304,18.4,0,0,0,0,1,0,0,0,0 77 | 142,0.0,19.58,1,0.871,5.403,100.0,1.3216,403,14.7,0,0,0,1,0,0,0,0,0 78 | 71,0.0,10.81,0,0.413,5.961,17.5,5.2873,305,19.2,0,0,0,0,1,0,0,0,0 79 | 477,0.0,18.1,0,0.614,5.3039999999999985,97.3,2.1007,666,20.2,0,0,0,0,0,0,0,0,1 80 | 97,0.0,2.89,0,0.445,8.068999999999999,76.0,3.4952,276,18.0,0,1,0,0,0,0,0,0,0 81 | 82,25.0,4.86,0,0.426,6.3020000000000005,32.2,5.4007,281,19.0,0,0,0,0,1,0,0,0,0 82 | 63,25.0,5.13,0,0.453,6.7620000000000005,43.4,7.9809,284,19.7,0,0,0,0,0,1,0,0,0 83 | 126,0.0,25.65,0,0.581,5.613,95.6,1.7572,188,19.1,0,1,0,0,0,0,0,0,0 84 | 152,0.0,19.58,1,0.871,5.012,88.0,1.6102,403,14.7,0,0,0,1,0,0,0,0,0 85 | 114,0.0,10.01,0,0.547,6.254,84.2,2.2565,432,17.8,0,0,0,0,0,0,1,0,0 86 | 91,0.0,3.41,0,0.489,6.405,73.9,3.0921,270,17.8,0,1,0,0,0,0,0,0,0 87 | 104,0.0,8.56,0,0.52,6.167000000000002,90.0,2.421,384,20.9,0,0,0,1,0,0,0,0,0 88 | 322,0.0,7.38,0,0.493,6.041,49.9,4.7211,287,19.6,0,0,0,1,0,0,0,0,0 89 | 448,0.0,18.1,0,0.713,6.185,98.7,2.2616,666,20.2,0,0,0,0,0,0,0,0,1 90 | 344,55.0,3.78,0,0.484,6.874,28.1,6.4654,370,17.6,0,0,0,1,0,0,0,0,0 91 | 196,80.0,1.52,0,0.404,7.287000000000001,34.1,7.309,329,12.6,0,1,0,0,0,0,0,0,0 92 | 40,75.0,2.95,0,0.428,7.024,15.8,5.4011,252,18.3,0,0,1,0,0,0,0,0,0 93 | 488,0.0,27.74,0,0.609,5.454,92.7,1.8209,711,20.1,0,0,0,0,1,0,0,0,0 94 | 215,0.0,10.59,0,0.489,6.182,42.4,3.9454,277,18.6,0,0,0,0,1,0,0,0,0 95 | 70,0.0,10.81,0,0.413,6.417000000000002,6.6,5.2873,305,19.2,0,0,0,0,1,0,0,0,0 96 | 57,100.0,1.32,0,0.411,6.816,40.5,8.3248,256,15.1,0,0,0,1,0,0,0,0,0 97 | 15,0.0,8.14,0,0.5379999999999999,5.834,56.5,4.4986,307,21.0,0,0,0,0,1,0,0,0,0 98 | 398,0.0,18.1,0,0.693,5.4529999999999985,100.0,1.4896,666,20.2,0,0,0,0,0,0,0,0,1 99 | 501,0.0,11.93,0,0.573,6.593,69.1,2.4786,273,21.0,1,0,0,0,0,0,0,0,0 100 | 37,0.0,5.96,0,0.499,5.85,41.5,3.9342,279,19.2,0,0,0,1,0,0,0,0,0 101 | 249,22.0,5.86,0,0.431,6.718,17.5,7.8265,330,19.1,0,0,0,0,0,0,0,1,0 102 | 324,0.0,7.38,0,0.493,6.415,40.1,4.7211,287,19.6,0,0,0,1,0,0,0,0,0 103 | 105,0.0,8.56,0,0.52,5.851,96.7,2.1069,384,20.9,0,0,0,1,0,0,0,0,0 104 | 461,0.0,18.1,0,0.713,6.376,88.4,2.5671,666,20.2,0,0,0,0,0,0,0,0,1 105 | 404,0.0,18.1,0,0.693,5.5310000000000015,85.4,1.6074,666,20.2,0,0,0,0,0,0,0,0,1 106 | 103,0.0,8.56,0,0.52,6.1370000000000005,87.4,2.7147,384,20.9,0,0,0,1,0,0,0,0,0 107 | 438,0.0,18.1,0,0.74,5.935,87.9,1.8206,666,20.2,0,0,0,0,0,0,0,0,1 108 | 369,0.0,18.1,1,0.631,6.683,96.8,1.3567,666,20.2,0,0,0,0,0,0,0,0,1 109 | 394,0.0,18.1,0,0.693,5.8870000000000005,94.7,1.7821,666,20.2,0,0,0,0,0,0,0,0,1 110 | 119,0.0,10.01,0,0.547,5.731,65.2,2.7592,432,17.8,0,0,0,0,0,0,1,0,0 111 | 161,0.0,19.58,0,0.605,7.489,90.8,1.9709,403,14.7,0,0,0,1,0,0,0,0,0 112 | 319,0.0,9.9,0,0.544,6.113,58.8,4.0019,304,18.4,0,0,0,0,1,0,0,0,0 113 | 335,0.0,5.19,0,0.515,6.037000000000001,34.5,5.9853,224,20.2,0,0,0,1,0,0,0,0,0 114 | 451,0.0,18.1,0,0.713,6.655,98.2,2.3552,666,20.2,0,0,0,0,0,0,0,0,1 115 | 472,0.0,18.1,0,0.58,6.437,75.0,2.8965,666,20.2,0,0,0,0,0,0,0,0,1 116 | 403,0.0,18.1,0,0.693,5.349,96.0,1.7028,666,20.2,0,0,0,0,0,0,0,0,1 117 | 366,0.0,18.1,0,0.718,4.963,91.4,1.7523,666,20.2,0,0,0,0,0,0,0,0,1 118 | 463,0.0,18.1,0,0.713,6.513,89.9,2.8016,666,20.2,0,0,0,0,0,0,0,0,1 119 | 486,0.0,18.1,0,0.583,6.114,79.8,3.5459,666,20.2,0,0,0,0,0,0,0,0,1 120 | 155,0.0,19.58,1,0.871,6.152,82.6,1.7455,403,14.7,0,0,0,1,0,0,0,0,0 121 | 500,0.0,9.69,0,0.585,6.027,79.7,2.4982,391,19.2,0,0,0,0,0,0,1,0,0 122 | 304,33.0,2.18,0,0.472,7.2360000000000015,41.1,4.022,222,18.4,0,0,0,0,0,0,0,1,0 123 | 426,0.0,18.1,0,0.584,5.8370000000000015,59.7,1.9976,666,20.2,0,0,0,0,0,0,0,0,1 124 | 16,0.0,8.14,0,0.5379999999999999,5.935,29.3,4.4986,307,21.0,0,0,0,0,1,0,0,0,0 125 | 457,0.0,18.1,0,0.713,5.936,80.3,2.7792,666,20.2,0,0,0,0,0,0,0,0,1 126 | 358,0.0,18.1,1,0.77,6.127000000000002,83.4,2.7227,666,20.2,0,0,0,0,0,0,0,0,1 127 | 256,90.0,3.75,0,0.394,7.454,34.2,6.3361,244,15.9,0,0,1,0,0,0,0,0,0 128 | -------------------------------------------------------------------------------- /test/true/example-boston-test-features.csv: -------------------------------------------------------------------------------- 1 | ,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,TAX,PTRATIO,RAD_dummy_0,RAD_dummy_1,RAD_dummy_2,RAD_dummy_3,RAD_dummy_4,RAD_dummy_5,RAD_dummy_6,RAD_dummy_7,RAD_dummy_8 2 | 141,0.0,21.89,0,0.624,5.019,100.0,1.4394,437,21.2,0,0,0,0,1,0,0,0,0 3 | 471,0.0,18.1,0,0.532,6.229,90.7,3.0993,666,20.2,0,0,0,0,0,0,0,0,1 4 | 213,0.0,10.59,0,0.489,6.375,32.3,3.9454,277,18.6,0,0,0,0,1,0,0,0,0 5 | 491,0.0,27.74,0,0.609,5.983,98.8,1.8681,711,20.1,0,0,0,0,1,0,0,0,0 6 | 364,0.0,18.1,1,0.718,8.78,82.9,1.9047,666,20.2,0,0,0,0,0,0,0,0,1 7 | 271,20.0,6.96,0,0.464,6.24,16.3,4.4289999999999985,223,18.6,0,0,1,0,0,0,0,0,0 8 | 351,60.0,1.69,0,0.411,6.579,35.9,10.7103,411,18.3,0,0,0,0,1,0,0,0,0 9 | 385,0.0,18.1,0,0.7,5.277,98.1,1.4261,666,20.2,0,0,0,0,0,0,0,0,1 10 | 476,0.0,18.1,0,0.614,6.484,93.6,2.3053,666,20.2,0,0,0,0,0,0,0,0,1 11 | 252,22.0,5.86,0,0.431,6.957000000000002,6.8,8.9067,330,19.1,0,0,0,0,0,0,0,1,0 12 | 172,0.0,4.05,0,0.51,5.572,88.5,2.5961,296,16.6,0,0,0,1,0,0,0,0,0 13 | 244,22.0,5.86,0,0.431,5.593,76.5,7.9549,330,19.1,0,0,0,0,0,0,0,1,0 14 | 504,0.0,11.93,0,0.573,6.794,89.3,2.3889,273,21.0,1,0,0,0,0,0,0,0,0 15 | 61,25.0,5.13,0,0.453,5.966,93.4,6.8185,284,19.7,0,0,0,0,0,1,0,0,0 16 | 505,0.0,11.93,0,0.573,6.03,80.8,2.505,273,21.0,1,0,0,0,0,0,0,0,0 17 | 5,0.0,2.18,0,0.458,6.43,58.7,6.0622,222,18.7,0,0,1,0,0,0,0,0,0 18 | 197,80.0,1.52,0,0.404,7.107,36.6,7.309,329,12.6,0,1,0,0,0,0,0,0,0 19 | 51,21.0,5.64,0,0.439,6.115,63.0,6.8147,243,16.8,0,0,0,0,1,0,0,0,0 20 | 317,0.0,9.9,0,0.544,5.782,71.7,4.0317,304,18.4,0,0,0,0,1,0,0,0,0 21 | 498,0.0,9.69,0,0.585,6.019,65.3,2.4091,391,19.2,0,0,0,0,0,0,1,0,0 22 | 468,0.0,18.1,0,0.58,5.926,71.0,2.9084,666,20.2,0,0,0,0,0,0,0,0,1 23 | 124,0.0,25.65,0,0.581,5.879,95.8,2.0063,188,19.1,0,1,0,0,0,0,0,0,0 24 | 320,0.0,7.38,0,0.493,6.426,52.3,4.5404,287,19.6,0,0,0,1,0,0,0,0,0 25 | 121,0.0,25.65,0,0.581,6.004,84.1,2.1974,188,19.1,0,1,0,0,0,0,0,0,0 26 | 30,0.0,8.14,0,0.5379999999999999,5.712999999999999,94.1,4.2330000000000005,307,21.0,0,0,0,0,1,0,0,0,0 27 | 211,0.0,10.59,1,0.489,5.404,88.6,3.665,277,18.6,0,0,0,0,1,0,0,0,0 28 | 454,0.0,18.1,0,0.713,6.728,94.1,2.4961,666,20.2,0,0,0,0,0,0,0,0,1 29 | 90,0.0,3.41,0,0.489,6.417000000000002,66.1,3.0923,270,17.8,0,1,0,0,0,0,0,0,0 30 | 328,0.0,3.24,0,0.46,5.8679999999999986,25.8,5.2146,430,16.9,0,0,0,0,1,0,0,0,0 31 | 493,0.0,9.69,0,0.585,5.707000000000002,54.0,2.3817,391,19.2,0,0,0,0,0,0,1,0,0 32 | 177,0.0,4.05,0,0.51,6.315,73.4,3.3175,296,16.6,0,0,0,1,0,0,0,0,0 33 | 185,0.0,2.46,0,0.488,6.153,68.8,3.2797,193,17.8,0,0,1,0,0,0,0,0,0 34 | 465,0.0,18.1,0,0.655,5.759,48.2,3.0665,666,20.2,0,0,0,0,0,0,0,0,1 35 | 44,0.0,6.91,0,0.448,6.069,40.0,5.7209,233,17.9,0,0,1,0,0,0,0,0,0 36 | 239,30.0,4.93,0,0.428,6.606,42.2,6.1899,300,16.6,0,0,0,0,0,0,1,0,0 37 | 280,20.0,3.33,0,0.4429,7.82,64.5,4.6947,216,14.9,0,0,0,1,0,0,0,0,0 38 | 150,0.0,19.58,0,0.871,6.122000000000001,97.3,1.618,403,14.7,0,0,0,1,0,0,0,0,0 39 | 146,0.0,19.58,0,0.871,5.627999999999999,100.0,1.5166,403,14.7,0,0,0,1,0,0,0,0,0 40 | 219,0.0,13.89,1,0.55,6.372999999999998,92.4,3.3633,276,16.4,0,0,0,1,0,0,0,0,0 41 | 206,0.0,10.59,0,0.489,6.3260000000000005,52.5,4.3549,277,18.6,0,0,0,0,1,0,0,0,0 42 | 285,55.0,2.25,0,0.389,6.4529999999999985,31.9,7.3073,300,15.3,1,0,0,0,0,0,0,0,0 43 | 179,0.0,2.46,0,0.488,6.98,58.4,2.829,193,17.8,0,0,1,0,0,0,0,0,0 44 | 218,0.0,13.89,1,0.55,5.9510000000000005,93.8,2.8893,276,16.4,0,0,0,1,0,0,0,0,0 45 | 378,0.0,18.1,0,0.6709999999999999,6.38,96.2,1.3861,666,20.2,0,0,0,0,0,0,0,0,1 46 | 17,0.0,8.14,0,0.5379999999999999,5.99,81.7,4.2579,307,21.0,0,0,0,0,1,0,0,0,0 47 | 68,12.5,6.07,0,0.409,5.5939999999999985,36.8,6.497999999999998,345,18.9,0,0,0,0,1,0,0,0,0 48 | 147,0.0,19.58,0,0.871,4.926,95.7,1.4608,403,14.7,0,0,0,1,0,0,0,0,0 49 | 241,30.0,4.93,0,0.428,6.095,65.1,6.3361,300,16.6,0,0,0,0,0,0,1,0,0 50 | 48,0.0,6.91,0,0.448,5.399,95.3,5.87,233,17.9,0,0,1,0,0,0,0,0,0 51 | 236,0.0,6.2,1,0.507,6.631,76.5,4.148,307,17.4,0,0,0,0,0,1,0,0,0 52 | 238,30.0,4.93,0,0.428,6.481,18.5,6.1899,300,16.6,0,0,0,0,0,0,1,0,0 53 | 260,20.0,3.97,0,0.647,7.2029999999999985,81.8,2.1121,264,13.0,0,0,0,1,0,0,0,0,0 54 | 94,28.0,15.04,0,0.464,6.249,77.3,3.615,270,18.2,0,0,0,0,1,0,0,0,0 55 | 14,0.0,8.14,0,0.5379999999999999,6.096,84.5,4.4619,307,21.0,0,0,0,0,1,0,0,0,0 56 | 216,0.0,13.89,1,0.55,5.888,56.0,3.1121,276,16.4,0,0,0,1,0,0,0,0,0 57 | 265,20.0,3.97,0,0.647,5.56,62.8,1.9865,264,13.0,0,0,0,1,0,0,0,0,0 58 | 370,0.0,18.1,1,0.631,7.016,97.5,1.2024,666,20.2,0,0,0,0,0,0,0,0,1 59 | 309,0.0,9.9,0,0.544,5.972,76.7,3.1025,304,18.4,0,0,0,0,1,0,0,0,0 60 | 154,0.0,19.58,1,0.871,6.129,96.0,1.7494,403,14.7,0,0,0,1,0,0,0,0,0 61 | 0,18.0,2.31,0,0.5379999999999999,6.575,65.2,4.09,296,15.3,1,0,0,0,0,0,0,0,0 62 | 130,0.0,21.89,0,0.624,6.457999999999998,98.9,2.1185,437,21.2,0,0,0,0,1,0,0,0,0 63 | 125,0.0,25.65,0,0.581,5.9860000000000015,88.4,1.9929,188,19.1,0,1,0,0,0,0,0,0,0 64 | 357,0.0,18.1,1,0.77,6.395,91.0,2.5052,666,20.2,0,0,0,0,0,0,0,0,1 65 | 76,0.0,12.83,0,0.4370000000000001,6.279,74.5,4.0522,398,18.7,0,0,0,1,0,0,0,0,0 66 | 473,0.0,18.1,0,0.614,6.98,67.6,2.5329,666,20.2,0,0,0,0,0,0,0,0,1 67 | 187,45.0,3.44,0,0.4370000000000001,6.782,41.1,3.7886,398,15.2,0,0,0,1,0,0,0,0,0 68 | 109,0.0,8.56,0,0.52,6.229,91.2,2.5451,384,20.9,0,0,0,1,0,0,0,0,0 69 | 382,0.0,18.1,0,0.7,5.5360000000000005,100.0,1.5804,666,20.2,0,0,0,0,0,0,0,0,1 70 | 88,0.0,3.41,0,0.489,7.0070000000000014,86.3,3.4217,270,17.8,0,1,0,0,0,0,0,0,0 71 | 29,0.0,8.14,0,0.5379999999999999,6.674,87.3,4.239,307,21.0,0,0,0,0,1,0,0,0,0 72 | 222,0.0,6.2,1,0.507,6.879,77.7,3.2721,307,17.4,0,0,0,0,0,1,0,0,0 73 | 100,0.0,8.56,0,0.52,6.727,79.9,2.7778,384,20.9,0,0,0,1,0,0,0,0,0 74 | 449,0.0,18.1,0,0.713,6.417000000000002,98.3,2.185,666,20.2,0,0,0,0,0,0,0,0,1 75 | 295,0.0,13.92,0,0.4370000000000001,6.678,31.1,5.9604,289,16.0,0,0,0,0,1,0,0,0,0 76 | 72,0.0,10.81,0,0.413,6.065,7.8,5.2873,305,19.2,0,0,0,0,1,0,0,0,0 77 | 26,0.0,8.14,0,0.5379999999999999,5.813,90.3,4.682,307,21.0,0,0,0,0,1,0,0,0,0 78 | 80,25.0,4.86,0,0.426,6.727,33.5,5.4007,281,19.0,0,0,0,0,1,0,0,0,0 79 | 153,0.0,19.58,0,0.871,5.709,98.5,1.6232,403,14.7,0,0,0,1,0,0,0,0,0 80 | 111,0.0,10.01,0,0.547,6.715,81.6,2.6775,432,17.8,0,0,0,0,0,0,1,0,0 81 | 485,0.0,18.1,0,0.583,6.312,51.9,3.9917,666,20.2,0,0,0,0,0,0,0,0,1 82 | 450,0.0,18.1,0,0.713,6.749,92.6,2.3236,666,20.2,0,0,0,0,0,0,0,0,1 83 | 54,75.0,4.0,0,0.41,5.888,47.6,7.3197,469,21.1,0,0,1,0,0,0,0,0,0 84 | 314,0.0,9.9,0,0.544,6.567,87.3,3.6023,304,18.4,0,0,0,0,1,0,0,0,0 85 | 481,0.0,18.1,0,0.532,6.75,74.9,3.3317,666,20.2,0,0,0,0,0,0,0,0,1 86 | 442,0.0,18.1,0,0.74,6.2189999999999985,100.0,2.0048,666,20.2,0,0,0,0,0,0,0,0,1 87 | 377,0.0,18.1,0,0.6709999999999999,6.794,98.8,1.358,666,20.2,0,0,0,0,0,0,0,0,1 88 | 113,0.0,10.01,0,0.547,6.0920000000000005,95.4,2.548,432,17.8,0,0,0,0,0,0,1,0,0 89 | 47,0.0,6.91,0,0.448,6.03,85.5,5.6894,233,17.9,0,0,1,0,0,0,0,0,0 90 | 437,0.0,18.1,0,0.74,6.152,100.0,1.9142,666,20.2,0,0,0,0,0,0,0,0,1 91 | 169,0.0,19.58,0,0.605,6.402,95.2,2.2625,403,14.7,0,0,0,1,0,0,0,0,0 92 | 421,0.0,18.1,0,0.718,6.006,95.3,1.8746,666,20.2,0,0,0,0,0,0,0,0,1 93 | 408,0.0,18.1,0,0.597,5.617000000000001,97.9,1.4547,666,20.2,0,0,0,0,0,0,0,0,1 94 | 232,0.0,6.2,0,0.507,8.337,73.3,3.8384,307,17.4,0,0,0,0,0,1,0,0,0 95 | 386,0.0,18.1,0,0.7,4.652,100.0,1.4672,666,20.2,0,0,0,0,0,0,0,0,1 96 | 490,0.0,27.74,0,0.609,5.093,98.0,1.8226,711,20.1,0,0,0,0,1,0,0,0,0 97 | 475,0.0,18.1,0,0.584,6.162000000000001,97.4,2.206,666,20.2,0,0,0,0,0,0,0,0,1 98 | 116,0.0,10.01,0,0.547,6.176,72.5,2.7301,432,17.8,0,0,0,0,0,0,1,0,0 99 | 136,0.0,21.89,0,0.624,5.942,93.5,1.9669,437,21.2,0,0,0,0,1,0,0,0,0 100 | 115,0.0,10.01,0,0.547,5.928,88.2,2.4631,432,17.8,0,0,0,0,0,0,1,0,0 101 | 132,0.0,21.89,0,0.624,6.372000000000001,97.9,2.3274,437,21.2,0,0,0,0,1,0,0,0,0 102 | 397,0.0,18.1,0,0.693,5.747000000000001,98.9,1.6334,666,20.2,0,0,0,0,0,0,0,0,1 103 | 311,0.0,9.9,0,0.544,6.122000000000001,52.8,2.6403,304,18.4,0,0,0,0,1,0,0,0,0 104 | 306,33.0,2.18,0,0.472,7.42,71.9,3.0992,222,18.4,0,0,0,0,0,0,0,1,0 105 | 427,0.0,18.1,0,0.679,6.202000000000001,78.7,1.8629,666,20.2,0,0,0,0,0,0,0,0,1 106 | 412,0.0,18.1,0,0.597,4.628,100.0,1.5539,666,20.2,0,0,0,0,0,0,0,0,1 107 | 416,0.0,18.1,0,0.679,6.782,90.8,1.8195,666,20.2,0,0,0,0,0,0,0,0,1 108 | 25,0.0,8.14,0,0.5379999999999999,5.599,85.7,4.4546,307,21.0,0,0,0,0,1,0,0,0,0 109 | 251,22.0,5.86,0,0.431,6.438,8.9,7.3967,330,19.1,0,0,0,0,0,0,0,1,0 110 | 32,0.0,8.14,0,0.5379999999999999,5.95,82.0,3.99,307,21.0,0,0,0,0,1,0,0,0,0 111 | 430,0.0,18.1,0,0.584,6.348,86.1,2.0527,666,20.2,0,0,0,0,0,0,0,0,1 112 | 157,0.0,19.58,0,0.605,6.943,97.4,1.8773,403,14.7,0,0,0,1,0,0,0,0,0 113 | 59,25.0,5.13,0,0.453,5.9270000000000005,47.2,6.932,284,19.7,0,0,0,0,0,1,0,0,0 114 | 282,20.0,3.33,1,0.4429,7.645,49.7,5.2119,216,14.9,0,0,0,1,0,0,0,0,0 115 | 365,0.0,18.1,0,0.718,3.5610000000000004,87.9,1.6132,666,20.2,0,0,0,0,0,0,0,0,1 116 | 127,0.0,21.89,0,0.624,5.693,96.0,1.7883,437,21.2,0,0,0,0,1,0,0,0,0 117 | 296,0.0,13.92,0,0.4370000000000001,6.549,51.0,5.9604,289,16.0,0,0,0,0,1,0,0,0,0 118 | 333,0.0,5.19,0,0.515,6.316,38.1,6.4584,224,20.2,0,0,0,1,0,0,0,0,0 119 | 368,0.0,18.1,0,0.631,4.97,100.0,1.3325,666,20.2,0,0,0,0,0,0,0,0,1 120 | 237,0.0,6.2,0,0.507,7.358,71.6,4.148,307,17.4,0,0,0,0,0,1,0,0,0 121 | 436,0.0,18.1,0,0.74,6.461,93.3,2.0026,666,20.2,0,0,0,0,0,0,0,0,1 122 | 456,0.0,18.1,0,0.713,5.976,87.9,2.5806,666,20.2,0,0,0,0,0,0,0,0,1 123 | 339,0.0,5.19,0,0.515,5.985,45.4,4.8122,224,20.2,0,0,0,1,0,0,0,0,0 124 | 405,0.0,18.1,0,0.693,5.683,100.0,1.4254,666,20.2,0,0,0,0,0,0,0,0,1 125 | 362,0.0,18.1,0,0.77,5.362,96.2,2.1036,666,20.2,0,0,0,0,0,0,0,0,1 126 | 158,0.0,19.58,0,0.605,6.066,100.0,1.7573,403,14.7,0,0,0,1,0,0,0,0,0 127 | 440,0.0,18.1,0,0.74,5.818,92.4,1.8662,666,20.2,0,0,0,0,0,0,0,0,1 128 | 65,80.0,3.37,0,0.398,6.29,17.8,6.6115,337,16.1,0,0,0,0,1,0,0,0,0 129 | -------------------------------------------------------------------------------- /docs/build/html/model/src/src.helpers.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | src.helpers package — reproducible-model 0.1 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 |
43 | 44 | 45 | 112 | 113 |
114 | 115 | 116 | 122 | 123 | 124 |
125 | 126 |
127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 |
145 | 146 |
    147 | 148 |
  • Docs »
  • 149 | 150 |
  • src package »
  • 151 | 152 |
  • src.helpers package
  • 153 | 154 | 155 |
  • 156 | 157 | 158 | View page source 159 | 160 | 161 |
  • 162 | 163 |
164 | 165 | 166 |
167 |
168 |
169 |
170 | 171 |
172 |

src.helpers package

173 |
174 |

Submodules

175 |
176 |
177 |

src.helpers.helpers module

178 |
179 |
180 | class src.helpers.helpers.Timer(function, logger)[source]
181 |

Bases: object

182 |
183 | 184 |
185 |
186 | src.helpers.helpers.fillin_kwargs(keywords, kwargs)[source]
187 |
188 | 189 |
190 |
191 |

Module contents

192 |
193 |
194 | 195 | 196 |
197 | 198 |
199 |
200 | 201 | 209 | 210 | 211 |
212 | 213 |
214 |

215 | © Copyright 2019 216 | 217 |

218 |
219 | Built with Sphinx using a theme provided by Read the Docs. 220 | 221 |
222 | 223 |
224 |
225 | 226 |
227 | 228 |
229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 252 | 253 | 254 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import yaml 3 | import logging 4 | import os 5 | import xmltodict 6 | import argparse 7 | import filecmp 8 | 9 | dict_file_types = ["json", "xml", "yml", "yaml"] 10 | 11 | 12 | def open_dictlike_file(fname): 13 | with open(fname, "r") as f: 14 | if fname.endswith("json"): 15 | fdict = yaml.load(f) 16 | elif fname.endswith("yaml") or fname.endswith("yml"): 17 | fdict = yaml.load(f) 18 | elif fname.endswith("xml"): 19 | fdict = xmltodict.parse(f.read()) 20 | else: 21 | logging.warning("%s not a known dictionary-like file type", fname) 22 | return fdict 23 | 24 | 25 | def compare_dict(dicta, dictb): 26 | 27 | mismatch_keys = [] 28 | for k in dicta: 29 | if k in dictb: 30 | if type(dicta[k]) == dict: 31 | same, mismatch = compare_dict(dicta[k], dictb[k]) 32 | else: 33 | same = (dicta[k] == dictb[k]) 34 | else: 35 | same = False 36 | if not same: 37 | mismatch_keys.append(k) 38 | 39 | dicts_are_same = False if len(mismatch_keys) > 0 else True 40 | 41 | return dicts_are_same, mismatch_keys 42 | 43 | 44 | def run_tests(args=None, config_path=None): 45 | """ Runs a provided command and compares the files produced to those that are expected. 46 | 47 | Test is configured by a yaml file that has the following format: 48 | 49 | ```yaml 50 | test_name: 51 | command: 52 | true_dir: 53 | test_dir: 54 | files_to_compare: 55 | - 56 | - 57 | ``` 58 | 59 | 60 | Args: 61 | args: If fed args from argparse, args.path should exist and give the path to the testing configuration file 62 | config_path: Path to the testing configuration file 63 | 64 | Returns: 65 | all_passed (bool): True if all tests pass, False if not 66 | results (dict): Dictionary of tests and their corresponding list of files that did not match 67 | 68 | """ 69 | 70 | if args is not None: 71 | config_path = args.path 72 | 73 | with open(config_path, "r") as f: 74 | tests = yaml.load(f) 75 | 76 | all_passed = True 77 | for test in tests: 78 | testconf = tests[test] 79 | 80 | true_dir, test_dir = testconf["true_dir"], testconf["test_dir"] 81 | 82 | no_true_to_compare = [] 83 | for file in testconf["files_to_compare"]: 84 | test_file = os.path.join(test_dir, file) 85 | true_file = os.path.join(true_dir, file) 86 | 87 | # Remove test files if they have already been produced previously 88 | # Otherwise, your code may not actually be producing that file but the test will pass 89 | if os.path.exists(test_file): 90 | os.remove(test_file) 91 | logging.debug("%s removed to be recreated", test_file) 92 | 93 | # Check if the file actually exists in the true directory 94 | if not os.path.exists(true_file): 95 | logging.warning("%s does not exist to be compared to", true_file) 96 | no_true_to_compare.append(file) 97 | 98 | # Run command being tested 99 | subprocess.check_output(testconf["command"].split()) 100 | 101 | # Compare files that were produced that are not versions of dictionaries where order is not deterministic 102 | files_to_compare = [ 103 | f for f in testconf["files_to_compare"] 104 | if f.split('.')[-1] not in dict_file_types and f not in no_true_to_compare 105 | ] 106 | 107 | match, mismatch, errors = filecmp.cmpfiles(true_dir, test_dir, 108 | files_to_compare, shallow=True) 109 | 110 | # Compare files that are versions of dictionaries where order is not deterministic 111 | dicts_to_compare = [ 112 | f for f in testconf["files_to_compare"] 113 | if f.split('.')[-1] in dict_file_types and f not in no_true_to_compare 114 | ] 115 | 116 | dict_mismatch = [] 117 | for fname in dicts_to_compare: 118 | true_dict = open_dictlike_file(os.path.join(true_dir, fname)) 119 | test_dict = open_dictlike_file(os.path.join(test_dir, fname)) 120 | 121 | dicts_are_same, mismatch_keys = compare_dict(true_dict, test_dict) 122 | if not dicts_are_same: 123 | dict_mismatch.append(fname) 124 | with open(os.path.join(true_dir, "true_%s.yml") % fname, "w") as f: 125 | yaml.dump(true_dict, f) 126 | with open(os.path.join(test_dir, "test_%s.yml") % fname, "w") as f: 127 | yaml.dump(test_dict, f) 128 | logging.warning("%s keys are not the same", ",".join(mismatch_keys)) 129 | 130 | mismatch += no_true_to_compare 131 | mismatch += dict_mismatch 132 | 133 | if len(mismatch) > 0: 134 | logging.warning("%s file(s) does not match or did not exist, %s test FAILED", 135 | ", ".join(mismatch), test) 136 | all_passed = False 137 | else: 138 | logging.warning("%s test PASSED" % test) 139 | 140 | if all_passed: 141 | logging.warning("Success, all tests passed!") 142 | 143 | 144 | if __name__ == '__main__': 145 | parser = argparse.ArgumentParser(description="Test whether the expected outputs are produced") 146 | parser.add_argument("--path", default="test/test_config.yml", help="Path to the test configuration file") 147 | args = parser.parse_args() 148 | run_tests(config_path=args.path) 149 | 150 | import subprocess 151 | import yaml 152 | import logging 153 | import os 154 | import argparse 155 | import filecmp 156 | import logging.config 157 | import xmltodict 158 | 159 | dict_file_types = ["json", "xml", "yml", "yaml"] 160 | 161 | 162 | def open_dictlike_file(fname): 163 | with open(fname, "r") as f: 164 | if fname.endswith("json"): 165 | fdict = yaml.load(f) 166 | elif fname.endswith("yaml") or fname.endswith("yml"): 167 | fdict = yaml.load(f) 168 | elif fname.endswith("xml"): 169 | fdict = xmltodict.parse(f.read()) 170 | else: 171 | logging.warning("%s not a known dictionary-like file type", fname) 172 | return fdict 173 | 174 | 175 | def compare_dict(dicta, dictb): 176 | 177 | mismatch_keys = [] 178 | for k in dicta: 179 | if k in dictb: 180 | if type(dicta[k]) == dict: 181 | same, mismatch = compare_dict(dicta[k], dictb[k]) 182 | else: 183 | same = (dicta[k] == dictb[k]) 184 | else: 185 | same = False 186 | if not same: 187 | mismatch_keys.append(k) 188 | 189 | dicts_are_same = False if len(mismatch_keys) > 0 else True 190 | 191 | return dicts_are_same, mismatch_keys 192 | 193 | 194 | def run_tests(args=None, config_path=None, logger=None): 195 | if logger is None: 196 | logger = logging.getLogger(__name__) 197 | logger.setLevel("DEBUG") 198 | 199 | if args is not None: 200 | config_path = args.path 201 | 202 | with open(config_path, "r") as f: 203 | tests = yaml.load(f) 204 | 205 | all_passed = True 206 | results = {} 207 | for test in tests: 208 | testconf = tests[test] 209 | 210 | for file in testconf["files_to_compare"]: 211 | test_file = os.path.join(testconf["test_dir"], file) 212 | true_file = os.path.join(testconf["true_dir"], file) 213 | if os.path.exists(test_file): 214 | os.remove(test_file) 215 | logging.debug("%s removed to be recreated", test_file) 216 | if not os.path.exists(true_file): 217 | logging.warning("%s does not exist to be compared to", true_file) 218 | 219 | subprocess.check_output(testconf["command"].split()) 220 | 221 | match, mismatch, errors = filecmp.cmpfiles(testconf["true_dir"], testconf["test_dir"], 222 | testconf["files_to_compare"], shallow=True) 223 | for file in testconf["files_to_compare"]: 224 | test_file = os.path.join(testconf["test_dir"], file) 225 | if not os.path.exists(test_file): 226 | logging.warning("%s does not exist to be compared to", test_file) 227 | if file not in mismatch: 228 | mismatch.append(file) 229 | 230 | if len(mismatch) > 0: 231 | logging.warning("%s file(s) does not match, %s test FAILED" % (", ".join(mismatch), test)) 232 | all_passed = False 233 | else: 234 | logging.warning("%s test PASSED" % test) 235 | results[test] = mismatch 236 | 237 | if all_passed: 238 | logging.warning("Success, all tests passed!") 239 | 240 | return all_passed, results 241 | 242 | 243 | if __name__ == '__main__': 244 | logging.config.fileConfig("config/logging/local.conf") 245 | logger = logging.getLogger(__name__) 246 | logger.setLevel("DEBUG") 247 | 248 | parser = argparse.ArgumentParser(description="Test whether the expected outputs are produced") 249 | parser.add_argument("--path", default="test/test_config.yml", help="Path to the test configuration file") 250 | args = parser.parse_args() 251 | run_tests(config_path=args.path, logger=logger) 252 | -------------------------------------------------------------------------------- /docs/build/html/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Sphinx JavaScript utilities for all documentation. 6 | * 7 | * :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | /** 13 | * select a different prefix for underscore 14 | */ 15 | $u = _.noConflict(); 16 | 17 | /** 18 | * make the code below compatible with browsers without 19 | * an installed firebug like debugger 20 | if (!window.console || !console.firebug) { 21 | var names = ["log", "debug", "info", "warn", "error", "assert", "dir", 22 | "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", 23 | "profile", "profileEnd"]; 24 | window.console = {}; 25 | for (var i = 0; i < names.length; ++i) 26 | window.console[names[i]] = function() {}; 27 | } 28 | */ 29 | 30 | /** 31 | * small helper function to urldecode strings 32 | */ 33 | jQuery.urldecode = function(x) { 34 | return decodeURIComponent(x).replace(/\+/g, ' '); 35 | }; 36 | 37 | /** 38 | * small helper function to urlencode strings 39 | */ 40 | jQuery.urlencode = encodeURIComponent; 41 | 42 | /** 43 | * This function returns the parsed url parameters of the 44 | * current request. Multiple values per key are supported, 45 | * it will always return arrays of strings for the value parts. 46 | */ 47 | jQuery.getQueryParameters = function(s) { 48 | if (typeof s === 'undefined') 49 | s = document.location.search; 50 | var parts = s.substr(s.indexOf('?') + 1).split('&'); 51 | var result = {}; 52 | for (var i = 0; i < parts.length; i++) { 53 | var tmp = parts[i].split('=', 2); 54 | var key = jQuery.urldecode(tmp[0]); 55 | var value = jQuery.urldecode(tmp[1]); 56 | if (key in result) 57 | result[key].push(value); 58 | else 59 | result[key] = [value]; 60 | } 61 | return result; 62 | }; 63 | 64 | /** 65 | * highlight a given string on a jquery object by wrapping it in 66 | * span elements with the given class name. 67 | */ 68 | jQuery.fn.highlightText = function(text, className) { 69 | function highlight(node, addItems) { 70 | if (node.nodeType === 3) { 71 | var val = node.nodeValue; 72 | var pos = val.toLowerCase().indexOf(text); 73 | if (pos >= 0 && 74 | !jQuery(node.parentNode).hasClass(className) && 75 | !jQuery(node.parentNode).hasClass("nohighlight")) { 76 | var span; 77 | var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); 78 | if (isInSVG) { 79 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 80 | } else { 81 | span = document.createElement("span"); 82 | span.className = className; 83 | } 84 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 85 | node.parentNode.insertBefore(span, node.parentNode.insertBefore( 86 | document.createTextNode(val.substr(pos + text.length)), 87 | node.nextSibling)); 88 | node.nodeValue = val.substr(0, pos); 89 | if (isInSVG) { 90 | var bbox = span.getBBox(); 91 | var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); 92 | rect.x.baseVal.value = bbox.x; 93 | rect.y.baseVal.value = bbox.y; 94 | rect.width.baseVal.value = bbox.width; 95 | rect.height.baseVal.value = bbox.height; 96 | rect.setAttribute('class', className); 97 | var parentOfText = node.parentNode.parentNode; 98 | addItems.push({ 99 | "parent": node.parentNode, 100 | "target": rect}); 101 | } 102 | } 103 | } 104 | else if (!jQuery(node).is("button, select, textarea")) { 105 | jQuery.each(node.childNodes, function() { 106 | highlight(this, addItems); 107 | }); 108 | } 109 | } 110 | var addItems = []; 111 | var result = this.each(function() { 112 | highlight(this, addItems); 113 | }); 114 | for (var i = 0; i < addItems.length; ++i) { 115 | jQuery(addItems[i].parent).before(addItems[i].target); 116 | } 117 | return result; 118 | }; 119 | 120 | /* 121 | * backward compatibility for jQuery.browser 122 | * This will be supported until firefox bug is fixed. 123 | */ 124 | if (!jQuery.browser) { 125 | jQuery.uaMatch = function(ua) { 126 | ua = ua.toLowerCase(); 127 | 128 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || 129 | /(webkit)[ \/]([\w.]+)/.exec(ua) || 130 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || 131 | /(msie) ([\w.]+)/.exec(ua) || 132 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || 133 | []; 134 | 135 | return { 136 | browser: match[ 1 ] || "", 137 | version: match[ 2 ] || "0" 138 | }; 139 | }; 140 | jQuery.browser = {}; 141 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; 142 | } 143 | 144 | /** 145 | * Small JavaScript module for the documentation. 146 | */ 147 | var Documentation = { 148 | 149 | init : function() { 150 | this.fixFirefoxAnchorBug(); 151 | this.highlightSearchWords(); 152 | this.initIndexTable(); 153 | if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) { 154 | this.initOnKeyListeners(); 155 | } 156 | }, 157 | 158 | /** 159 | * i18n support 160 | */ 161 | TRANSLATIONS : {}, 162 | PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; }, 163 | LOCALE : 'unknown', 164 | 165 | // gettext and ngettext don't access this so that the functions 166 | // can safely bound to a different name (_ = Documentation.gettext) 167 | gettext : function(string) { 168 | var translated = Documentation.TRANSLATIONS[string]; 169 | if (typeof translated === 'undefined') 170 | return string; 171 | return (typeof translated === 'string') ? translated : translated[0]; 172 | }, 173 | 174 | ngettext : function(singular, plural, n) { 175 | var translated = Documentation.TRANSLATIONS[singular]; 176 | if (typeof translated === 'undefined') 177 | return (n == 1) ? singular : plural; 178 | return translated[Documentation.PLURALEXPR(n)]; 179 | }, 180 | 181 | addTranslations : function(catalog) { 182 | for (var key in catalog.messages) 183 | this.TRANSLATIONS[key] = catalog.messages[key]; 184 | this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); 185 | this.LOCALE = catalog.locale; 186 | }, 187 | 188 | /** 189 | * add context elements like header anchor links 190 | */ 191 | addContextElements : function() { 192 | $('div[id] > :header:first').each(function() { 193 | $('\u00B6'). 194 | attr('href', '#' + this.id). 195 | attr('title', _('Permalink to this headline')). 196 | appendTo(this); 197 | }); 198 | $('dt[id]').each(function() { 199 | $('\u00B6'). 200 | attr('href', '#' + this.id). 201 | attr('title', _('Permalink to this definition')). 202 | appendTo(this); 203 | }); 204 | }, 205 | 206 | /** 207 | * workaround a firefox stupidity 208 | * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 209 | */ 210 | fixFirefoxAnchorBug : function() { 211 | if (document.location.hash && $.browser.mozilla) 212 | window.setTimeout(function() { 213 | document.location.href += ''; 214 | }, 10); 215 | }, 216 | 217 | /** 218 | * highlight the search words provided in the url in the text 219 | */ 220 | highlightSearchWords : function() { 221 | var params = $.getQueryParameters(); 222 | var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; 223 | if (terms.length) { 224 | var body = $('div.body'); 225 | if (!body.length) { 226 | body = $('body'); 227 | } 228 | window.setTimeout(function() { 229 | $.each(terms, function() { 230 | body.highlightText(this.toLowerCase(), 'highlighted'); 231 | }); 232 | }, 10); 233 | $('') 235 | .appendTo($('#searchbox')); 236 | } 237 | }, 238 | 239 | /** 240 | * init the domain index toggle buttons 241 | */ 242 | initIndexTable : function() { 243 | var togglers = $('img.toggler').click(function() { 244 | var src = $(this).attr('src'); 245 | var idnum = $(this).attr('id').substr(7); 246 | $('tr.cg-' + idnum).toggle(); 247 | if (src.substr(-9) === 'minus.png') 248 | $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); 249 | else 250 | $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); 251 | }).css('display', ''); 252 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { 253 | togglers.click(); 254 | } 255 | }, 256 | 257 | /** 258 | * helper function to hide the search marks again 259 | */ 260 | hideSearchWords : function() { 261 | $('#searchbox .highlight-link').fadeOut(300); 262 | $('span.highlighted').removeClass('highlighted'); 263 | }, 264 | 265 | /** 266 | * make the url absolute 267 | */ 268 | makeURL : function(relativeURL) { 269 | return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; 270 | }, 271 | 272 | /** 273 | * get the current relative url 274 | */ 275 | getCurrentURL : function() { 276 | var path = document.location.pathname; 277 | var parts = path.split(/\//); 278 | $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { 279 | if (this === '..') 280 | parts.pop(); 281 | }); 282 | var url = parts.join('/'); 283 | return path.substring(url.lastIndexOf('/') + 1, path.length - 1); 284 | }, 285 | 286 | initOnKeyListeners: function() { 287 | $(document).keyup(function(event) { 288 | var activeElementType = document.activeElement.tagName; 289 | // don't navigate when in search box or textarea 290 | if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT') { 291 | switch (event.keyCode) { 292 | case 37: // left 293 | var prevHref = $('link[rel="prev"]').prop('href'); 294 | if (prevHref) { 295 | window.location.href = prevHref; 296 | return false; 297 | } 298 | case 39: // right 299 | var nextHref = $('link[rel="next"]').prop('href'); 300 | if (nextHref) { 301 | window.location.href = nextHref; 302 | return false; 303 | } 304 | } 305 | } 306 | }); 307 | } 308 | }; 309 | 310 | // quick alias for translations 311 | _ = Documentation.gettext; 312 | 313 | $(document).ready(function() { 314 | Documentation.init(); 315 | }); 316 | --------------------------------------------------------------------------------