├── .gitignore ├── LICENCE.txt ├── README.md ├── data ├── external │ └── .gitkeep ├── interim │ └── .gitkeep ├── processed │ └── .gitkeep └── raw │ └── .gitkeep ├── docs └── .gitkeep ├── models └── .gitkeep ├── notebooks └── .gitkeep ├── references └── folder_structure.txt ├── reports └── figures │ └── .gitkeep ├── requirements.txt └── src ├── __init__.py ├── data ├── .gitkeep ├── __init__.py └── make_dataset.py ├── features ├── .gitkeep ├── __init__.py └── build_features.py ├── models ├── .gitkeep ├── __init__.py ├── predict_model.py └── train_model.py └── visualization ├── .gitkeep ├── __init__.py ├── plot_settings.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *.cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # DotEnv configuration 60 | .env 61 | 62 | # Database 63 | *.db 64 | *.rdb 65 | 66 | # Pycharm 67 | .idea 68 | 69 | # VS Code 70 | .vscode/ 71 | *.code-workspace 72 | 73 | # Spyder 74 | .spyproject/ 75 | 76 | # Jupyter NB Checkpoints 77 | .ipynb_checkpoints/ 78 | 79 | 80 | # Mac OS-specific storage files 81 | .DS_Store 82 | 83 | # vim 84 | *.swp 85 | *.swo 86 | 87 | # Mypy cache 88 | .mypy_cache/ 89 | -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Datalumina B.V. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/README.md -------------------------------------------------------------------------------- /data/external/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/data/external/.gitkeep -------------------------------------------------------------------------------- /data/interim/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/data/interim/.gitkeep -------------------------------------------------------------------------------- /data/processed/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/data/processed/.gitkeep -------------------------------------------------------------------------------- /data/raw/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/data/raw/.gitkeep -------------------------------------------------------------------------------- /docs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/docs/.gitkeep -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/models/.gitkeep -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/notebooks/.gitkeep -------------------------------------------------------------------------------- /references/folder_structure.txt: -------------------------------------------------------------------------------- 1 | ├── LICENSE 2 | ├── README.md <- The top-level README for developers using this project. 3 | ├── data 4 | │ ├── external <- Data from third party sources. 5 | │ ├── interim <- Intermediate data that has been transformed. 6 | │ ├── processed <- The final, canonical data sets for modeling. 7 | │ └── raw <- The original, immutable data dump. 8 | │ 9 | ├── docs <- A default Sphinx project; see sphinx-doc.org for details 10 | │ 11 | ├── models <- Trained and serialized models, model predictions, or model summaries 12 | │ 13 | ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering), 14 | │ the creator's initials, and a short `-` delimited description, e.g. 15 | │ `1.0-jqp-initial-data-exploration`. 16 | │ 17 | ├── references <- Data dictionaries, manuals, and all other explanatory materials. 18 | │ 19 | ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc. 20 | │ └── figures <- Generated graphics and figures to be used in reporting 21 | │ 22 | ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. 23 | │ generated with `pip freeze > requirements.txt` 24 | │ 25 | ├── src <- Source code for use in this project. 26 | │ ├── __init__.py <- Makes src a Python module 27 | │ │ 28 | │ ├── data <- Scripts to download or generate data 29 | │ │ └── make_dataset.py 30 | │ │ 31 | │ ├── features <- Scripts to turn raw data into features for modeling 32 | │ │ └── build_features.py 33 | │ │ 34 | │ ├── models <- Scripts to train models and then use trained models to make 35 | │ │ │ predictions 36 | │ │ ├── predict_model.py 37 | │ │ └── train_model.py 38 | │ │ 39 | │ └── visualization <- Scripts to create exploratory and results oriented visualizations 40 | │ └── visualize.py 41 | 42 | -------------------------------------------------------------------------------- /reports/figures/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/reports/figures/.gitkeep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/requirements.txt -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/__init__.py -------------------------------------------------------------------------------- /src/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/data/.gitkeep -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/make_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/data/make_dataset.py -------------------------------------------------------------------------------- /src/features/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/features/.gitkeep -------------------------------------------------------------------------------- /src/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/features/__init__.py -------------------------------------------------------------------------------- /src/features/build_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/features/build_features.py -------------------------------------------------------------------------------- /src/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/models/.gitkeep -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/predict_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/models/predict_model.py -------------------------------------------------------------------------------- /src/models/train_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/models/train_model.py -------------------------------------------------------------------------------- /src/visualization/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/visualization/.gitkeep -------------------------------------------------------------------------------- /src/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/visualization/__init__.py -------------------------------------------------------------------------------- /src/visualization/plot_settings.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.pyplot as plt 3 | from cycler import cycler 4 | 5 | colors = cycler(color=plt.get_cmap("tab10").colors) # ["b", "r", "g"] 6 | # colors = cycler(color=["#282782", "r", "g"]) 7 | 8 | mpl.style.use("ggplot") 9 | mpl.rcParams["figure.figsize"] = (20, 5) 10 | mpl.rcParams["axes.facecolor"] = "white" 11 | mpl.rcParams["axes.grid"] = True 12 | mpl.rcParams["grid.color"] = "lightgray" 13 | mpl.rcParams["axes.prop_cycle"] = colors 14 | mpl.rcParams["axes.linewidth"] = 1 15 | mpl.rcParams["xtick.color"] = "black" 16 | mpl.rcParams["ytick.color"] = "black" 17 | mpl.rcParams["font.size"] = 12 18 | mpl.rcParams["figure.titlesize"] = 25 19 | mpl.rcParams["figure.dpi"] = 100 20 | -------------------------------------------------------------------------------- /src/visualization/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveebbelaar/df-data-science-template/d2c800e0f0bd318f18162a9a926e000fe32b5015/src/visualization/visualize.py --------------------------------------------------------------------------------