├── .gitignore ├── Makefile ├── README.md ├── config-private.yml ├── config.yml ├── data ├── intermediate │ └── .gitkeep ├── processed │ └── .gitkeep ├── raw │ └── .gitkeep └── temp │ └── .gitkeep ├── documents ├── docs │ └── .gitkeep ├── images │ └── .gitkeep └── references │ └── .gitkeep ├── notebooks └── .gitkeep ├── results ├── .gitkeep ├── models │ └── .gitkeep └── outputs │ └── .gitkeep └── src ├── .gitkeep └── misc └── yaml-to-env.py /.gitignore: -------------------------------------------------------------------------------- 1 | /.env 2 | /config-private.yml 3 | .gitkeep 4 | 5 | /data 6 | /results 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | .env: config.yml config-private.yml 4 | python src/misc/yaml-to-env.py 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data science project structure 2 | 3 | Based on https://drivendata.github.io/cookiecutter-data-science/ 4 | 5 | ``` 6 | . 7 | ├── Makefile <- tasks 8 | ├── config.yml <- config file in YAML, can be exported as env vars if needed 9 | ├── config-private.yml <- config file with private config (password, api keys, etc.) 10 | ├── data 11 | │   └── raw 12 | │   ├── intermediate 13 | │   ├── processed 14 | │   ├── temp 15 | ├── results 16 | │   ├── outputs 17 | │   ├── models 18 | ├── documents 19 | │   ├── docs 20 | │   ├── images 21 | │   └── references 22 | ├── notebooks <- notebooks for explorations / prototyping 23 | └── src <- all source code, internal org as needed 24 | ``` 25 | -------------------------------------------------------------------------------- /config-private.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/config.yml -------------------------------------------------------------------------------- /data/intermediate/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/data/intermediate/.gitkeep -------------------------------------------------------------------------------- /data/processed/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/data/processed/.gitkeep -------------------------------------------------------------------------------- /data/raw/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/data/raw/.gitkeep -------------------------------------------------------------------------------- /data/temp/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/data/temp/.gitkeep -------------------------------------------------------------------------------- /documents/docs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/documents/docs/.gitkeep -------------------------------------------------------------------------------- /documents/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/documents/images/.gitkeep -------------------------------------------------------------------------------- /documents/references/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/documents/references/.gitkeep -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/notebooks/.gitkeep -------------------------------------------------------------------------------- /results/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/results/.gitkeep -------------------------------------------------------------------------------- /results/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/results/models/.gitkeep -------------------------------------------------------------------------------- /results/outputs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/results/outputs/.gitkeep -------------------------------------------------------------------------------- /src/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasRobertFr/ml-project-structure/cdf8273ca97b75d7bc3983869b8c61494a9c19ac/src/.gitkeep -------------------------------------------------------------------------------- /src/misc/yaml-to-env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import yaml 4 | import pipes 5 | 6 | # Merge data structures 7 | def merge(a, b): 8 | if isinstance(a, dict) and isinstance(b, dict): 9 | d = dict(a) 10 | d.update({k: merge(a.get(k, None), b[k]) for k in b}) 11 | return d 12 | 13 | if isinstance(a, list) and isinstance(b, list): 14 | return [merge(x, y) for x, y in itertools.izip_longest(a, b)] 15 | 16 | return a if b is None else b 17 | 18 | # Read config file, keep env 19 | def readFileKeepEnv(filename): 20 | f = open("config.yml", "r") 21 | out = "" 22 | for line in f: 23 | if "#env" in line: 24 | out += line + "\n" 25 | return out 26 | 27 | # Load config files 28 | config = yaml.load(readFileKeepEnv("config.yml")) 29 | config_priv = yaml.load(readFileKeepEnv("config-private.yml")) 30 | config = merge(config, config_priv) 31 | 32 | print config 33 | 34 | # Export as env vars 35 | # TODO generalise to nested dict 36 | envFile = "" 37 | for k, v in config.items(): 38 | k = pipes.quote(k) 39 | v = pipes.quote(v) 40 | envFile += "%s=%s\n" % (k, v) 41 | open(".env", "w").write(envFile) 42 | --------------------------------------------------------------------------------