├── .flake8 ├── .gitattributes ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── Makefile ├── PyData Cambridge.pdf ├── README.md ├── data ├── main.py ├── processed │ └── v1 │ │ └── .gitkeep ├── raw │ └── v1 │ │ └── .gitkeep └── scripts │ ├── extract.py │ ├── load.py │ └── transform.py ├── deploy ├── scripts │ └── .gitkeep └── tests │ └── .gitkeep ├── develop ├── artifacts │ └── .gitkeep ├── eda │ └── .gitkeep ├── main.py ├── notebooks │ └── .gitkeep └── scripts │ ├── eda.py │ └── load.py ├── label └── .gitkeep ├── poetry.toml ├── pyproject.toml ├── settings.json ├── train └── .gitkeep └── visualise └── .gitkeep /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203,E501,W503 4 | exclude = .venv 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # https://git-scm.com/docs/gitattributes 2 | 3 | *.csv filter=lfs diff=lfs merge=lfs -text 4 | *.xlsx filter=lfs diff=lfs merge=lfs -text 5 | *.pkl filter=lfs diff=lfs merge=lfs -text 6 | *.whl filter=lfs diff=lfs merge=lfs -text 7 | *.json filter=lfs diff=lfs merge=lfs -text 8 | *.png filter=lfs diff=lfs merge=lfs -text 9 | *.jpg filter=lfs diff=lfs merge=lfs -text 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .venv 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | 125 | # vscode project settings 126 | .vscode 127 | *.code-workspace 128 | .code-workspace 129 | debug.log 130 | 131 | # Files 132 | .DS_Store 133 | poetry.lock 134 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: '22.10.0' 4 | hooks: 5 | - id: black 6 | - id: black-jupyter 7 | - repo: https://github.com/PyCQA/flake8 8 | rev: '5.0.4' 9 | hooks: 10 | - id: flake8 11 | args: ['--max-line-length=88', '--extend-ignore=E203,E501,W503', '--exclude=.venv'] 12 | - repo: https://github.com/PyCQA/isort 13 | rev: '5.10.1' 14 | hooks: 15 | - id: isort 16 | args: ['--profile=black'] 17 | - repo: https://github.com/kynan/nbstripout 18 | rev: '0.6.1' 19 | hooks: 20 | - id: nbstripout 21 | - repo: https://github.com/pycqa/pydocstyle 22 | rev: '6.1.1' 23 | hooks: 24 | - id: pydocstyle 25 | args: ['--convention=google', '--add-ignore=D100,D101,D102,D103,D104,D105,D106,D107'] 26 | - repo: https://github.com/pre-commit/pre-commit-hooks 27 | rev: 'v4.3.0' 28 | hooks: 29 | - id: check-ast 30 | - id: end-of-file-fixer 31 | - id: trailing-whitespace 32 | - repo: https://github.com/sqlfluff/sqlfluff 33 | rev: '1.4.2' 34 | hooks: 35 | - id: sqlfluff-fix 36 | - id: sqlfluff-lint 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Gabriel Harris 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .EXPORT_ALL_VARIABLES: 2 | .PHONY: venv install pre-commit clean 3 | 4 | GLOBAL_PYTHON = $(shell py -3.9 -c 'import sys; print(sys.executable)') 5 | LOCAL_PYTHON = .\\.venv\\Scripts\\python.exe 6 | LOCAL_PRE_COMMIT = .\\.venv\\Lib\\site-packages\\pre_commit 7 | 8 | setup: venv install pre-commit 9 | 10 | venv: $(GLOBAL_PYTHON) 11 | @echo "Creating .venv..." 12 | poetry env use $(GLOBAL_PYTHON) 13 | 14 | install: ${LOCAL_PYTHON} 15 | @echo "Installing dependencies..." 16 | poetry install --no-root --sync 17 | 18 | pre-commit: ${LOCAL_PYTHON} ${LOCAL_PRE_COMMIT} 19 | @echo "Setting up pre-commit..." 20 | .\\.venv\\Scripts\\pre-commit install 21 | .\\.venv\\Scripts\\pre-commit autoupdate 22 | 23 | clean: 24 | if exist .\\.git\\hooks ( rmdir .\\.git\\hooks /q /s ) 25 | if exist .\\.venv\\ ( rmdir .\\.venv /q /s ) 26 | if exist poetry.lock ( del poetry.lock /q /s ) 27 | -------------------------------------------------------------------------------- /PyData Cambridge.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/PyData Cambridge.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Science Repo Template 2 | 3 | A repository template using Poetry, Makefile, and pre-commit-hooks 4 | 5 | For more details, read the Medium [article](https://medium.com/@drgabrielharris/python-how-using-poetry-make-and-pre-commit-hooks-to-setup-a-repo-template-for-your-ds-team-15b5a77d0e0f) 6 | 7 | ## Project structure 8 | 9 | ```bash 10 | . 11 | ├── data 12 | │ ├── processed 13 | │ │ └── v1 14 | │ ├── raw 15 | │ │ └── v1 16 | │ ├── scripts 17 | │ │ ├── extract.py 18 | │ │ ├── transform.py 19 | │ │ └── load.py 20 | │ └── main.py 21 | ├── deploy 22 | │ ├── scripts 23 | │ └── tests 24 | ├── develop 25 | │ ├── artifacts 26 | │ ├── eda 27 | │ ├── notebooks 28 | │ ├── scripts 29 | │ │ ├── eda.py 30 | │ │ └── load.py 31 | │ └── main.py 32 | ├── label 33 | ├── train 34 | ├── visualise 35 | ├── .flake8 36 | ├── .gitattributes 37 | ├── .gitignore 38 | ├── .pre-commit-config.yaml 39 | ├── Makefile 40 | ├── poetry.toml 41 | ├── pyproject.toml 42 | └── README.md 43 | ``` 44 | -------------------------------------------------------------------------------- /data/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/data/main.py -------------------------------------------------------------------------------- /data/processed/v1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/data/processed/v1/.gitkeep -------------------------------------------------------------------------------- /data/raw/v1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/data/raw/v1/.gitkeep -------------------------------------------------------------------------------- /data/scripts/extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/data/scripts/extract.py -------------------------------------------------------------------------------- /data/scripts/load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/data/scripts/load.py -------------------------------------------------------------------------------- /data/scripts/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/data/scripts/transform.py -------------------------------------------------------------------------------- /deploy/scripts/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/deploy/scripts/.gitkeep -------------------------------------------------------------------------------- /deploy/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/deploy/tests/.gitkeep -------------------------------------------------------------------------------- /develop/artifacts/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/develop/artifacts/.gitkeep -------------------------------------------------------------------------------- /develop/eda/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/develop/eda/.gitkeep -------------------------------------------------------------------------------- /develop/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/develop/main.py -------------------------------------------------------------------------------- /develop/notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/develop/notebooks/.gitkeep -------------------------------------------------------------------------------- /develop/scripts/eda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/develop/scripts/eda.py -------------------------------------------------------------------------------- /develop/scripts/load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/develop/scripts/load.py -------------------------------------------------------------------------------- /label/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/label/.gitkeep -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "Project Name" 3 | version = "0.1.0" 4 | description = "Project Description" 5 | authors = ["Authors Names"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "3.9.*" 9 | pandas = "^1" 10 | 11 | [tool.poetry.group.dev.dependencies] 12 | black = "*" 13 | flake8 = "*" 14 | isort = "*" 15 | nbstripout = "*" 16 | notebook = "*" 17 | rich = "*" 18 | pre-commit = "*" 19 | 20 | [tool.poetry.group.docs.dependencies] 21 | pydocstyle = {extras = ["toml"], version = "*"} 22 | 23 | [tool.poetry.group.test.dependencies] 24 | pytest = "*" 25 | 26 | [build-system] 27 | requires = ["poetry-core>=1.0.0"] 28 | build-backend = "poetry.core.masonry.api" 29 | 30 | [tool.black] 31 | line-length = 88 32 | 33 | [tool.isort] 34 | profile = 'black' 35 | 36 | [tool.pydocstyle] 37 | convention = "google" 38 | add-ignore = "D100,D101,D102,D103,D104,D105,D106,D107" 39 | -------------------------------------------------------------------------------- /settings.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3e1f2815b2a3da77137ffe241bb1142828520d471787229933c17b6eba9961b8 3 | size 588 4 | -------------------------------------------------------------------------------- /train/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/train/.gitkeep -------------------------------------------------------------------------------- /visualise/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrGabrielHarris/data-science-repo-template/06e65a5c6b273efc885e8a23e8885494bff10e51/visualise/.gitkeep --------------------------------------------------------------------------------