├── docs └── .gitkeep ├── data ├── raw │ └── .gitkeep ├── external │ └── .gitkeep ├── interim │ └── .gitkeep └── processed │ └── .gitkeep ├── models └── .gitkeep ├── notebooks └── .gitkeep ├── requirements.txt ├── src ├── __init__.py ├── data │ ├── .gitkeep │ ├── __init__.py │ └── make_dataset.py ├── features │ ├── .gitkeep │ ├── __init__.py │ └── build_features.py ├── models │ ├── .gitkeep │ ├── __init__.py │ ├── predict_model.py │ └── train_model.py └── visualization │ ├── .gitkeep │ ├── __init__.py │ ├── visualize.py │ └── plot_settings.py ├── reports └── figures │ └── .gitkeep ├── README.md ├── .gitignore └── references └── folder_structure.txt /docs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/raw/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/external/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/interim/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/processed/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/features/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /reports/figures/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data/make_dataset.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/features/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/models/predict_model.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/models/train_model.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/visualization/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/features/build_features.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/visualization/visualize.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Data Science Project Template 2 | 3 | You can use this template to structure your Python data science projects. It is based on [Cookie Cutter Data Science](https://drivendata.github.io/cookiecutter-data-science/). -------------------------------------------------------------------------------- /src/visualization/plot_settings.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.pyplot as plt 3 | from cycler import cycler 4 | 5 | colors = cycler(color=plt.get_cmap("tab10").colors) # ["b", "r", "g"] 6 | 7 | mpl.style.use("ggplot") 8 | mpl.rcParams["figure.figsize"] = (20, 5) 9 | mpl.rcParams["axes.facecolor"] = "white" 10 | mpl.rcParams["axes.grid"] = True 11 | mpl.rcParams["grid.color"] = "lightgray" 12 | mpl.rcParams["axes.prop_cycle"] = colors 13 | mpl.rcParams["axes.linewidth"] = 1 14 | mpl.rcParams["xtick.color"] = "black" 15 | mpl.rcParams["ytick.color"] = "black" 16 | mpl.rcParams["font.size"] = 12 17 | mpl.rcParams["figure.titlesize"] = 25 18 | mpl.rcParams["figure.dpi"] = 100 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *.cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # DotEnv configuration 60 | .env 61 | 62 | # Database 63 | *.db 64 | *.rdb 65 | 66 | # Pycharm 67 | .idea 68 | 69 | # VS Code 70 | .vscode/ 71 | *.code-workspace 72 | 73 | # Spyder 74 | .spyproject/ 75 | 76 | # Jupyter NB Checkpoints 77 | .ipynb_checkpoints/ 78 | 79 | # exclude data from source control by default 80 | # /data/ 81 | 82 | # Mac OS-specific storage files 83 | .DS_Store 84 | 85 | # vim 86 | *.swp 87 | *.swo 88 | 89 | # Mypy cache 90 | .mypy_cache/ 91 | -------------------------------------------------------------------------------- /references/folder_structure.txt: -------------------------------------------------------------------------------- 1 | ├── LICENSE 2 | ├── Makefile <- Makefile with commands like `make data` or `make train` 3 | ├── README.md <- The top-level README for developers using this project. 4 | ├── data 5 | │ ├── external <- Data from third party sources. 6 | │ ├── interim <- Intermediate data that has been transformed. 7 | │ ├── processed <- The final, canonical data sets for modeling. 8 | │ └── raw <- The original, immutable data dump. 9 | │ 10 | ├── docs <- A default Sphinx project; see sphinx-doc.org for details 11 | │ 12 | ├── models <- Trained and serialized models, model predictions, or model summaries 13 | │ 14 | ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering), 15 | │ the creator's initials, and a short `-` delimited description, e.g. 16 | │ `1.0-jqp-initial-data-exploration`. 17 | │ 18 | ├── references <- Data dictionaries, manuals, and all other explanatory materials. 19 | │ 20 | ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc. 21 | │ └── figures <- Generated graphics and figures to be used in reporting 22 | │ 23 | ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. 24 | │ generated with `pip freeze > requirements.txt` 25 | │ 26 | ├── setup.py <- Make this project pip installable with `pip install -e` 27 | ├── src <- Source code for use in this project. 28 | │ ├── __init__.py <- Makes src a Python module 29 | │ │ 30 | │ ├── data <- Scripts to download or generate data 31 | │ │ └── make_dataset.py 32 | │ │ 33 | │ ├── features <- Scripts to turn raw data into features for modeling 34 | │ │ └── build_features.py 35 | │ │ 36 | │ ├── models <- Scripts to train models and then use trained models to make 37 | │ │ │ predictions 38 | │ │ ├── predict_model.py 39 | │ │ └── train_model.py 40 | │ │ 41 | │ └── visualization <- Scripts to create exploratory and results oriented visualizations 42 | │ └── visualize.py 43 | │ 44 | └── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io --------------------------------------------------------------------------------