├── notebooks
    └── .gitignore
├── .gitignore
├── data
    └── .gitignore
├── docker
    ├── Dockerfile-dev
    ├── environment.yml
    └── Dockerfile-prod
├── code
    └── example.py
├── .env_dev
├── docker-compose.yml
├── docker-compose.prod.yml
└── README.md


/notebooks/.gitignore:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env*
2 | !.env_dev


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore


--------------------------------------------------------------------------------
/docker/Dockerfile-dev:
--------------------------------------------------------------------------------
1 | # base image
2 | FROM continuumio/miniconda3:4.5.11
3 | 
4 | # load in the environment.yml file
5 | ADD ./docker/environment.yml /
6 | 
7 | # create the environmnt
8 | RUN conda update -n base conda -y && conda env update


--------------------------------------------------------------------------------
/code/example.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | print('Hello world!')
4 | 
5 | # Example of grabbing an environment variable, the below will print different results based on whether you're running
6 | # docker-compose.yml or docker-compose.prod.yml
7 | print(os.environ.get('db_username'))


--------------------------------------------------------------------------------
/.env_dev:
--------------------------------------------------------------------------------
 1 | # credentials and database information
 2 | db_username=test_username
 3 | db_password=test_password
 4 | db_host=test_host
 5 | db_port=test_port
 6 | db_name=test
 7 | 
 8 | # disables lag in stdout/stderr output
 9 | PYTHONUNBUFFERED=1
10 | 
11 | # random seed
12 | random_seed=42


--------------------------------------------------------------------------------
/docker/environment.yml:
--------------------------------------------------------------------------------
1 | name: base                                                                   
2 | channels:                                                                    
3 |   - defaults                                                                 
4 | dependencies:                                                                
5 |   - pandas=0.22.0
6 |   - sqlalchemy=1.2.1
7 |   - scikit-learn=0.20.0
8 |   - pyodbc=4.0.23
9 |   - jupyterlab


--------------------------------------------------------------------------------
/docker/Dockerfile-prod:
--------------------------------------------------------------------------------
 1 | # base image
 2 | FROM continuumio/miniconda3:4.5.11
 3 | 
 4 | # load in the environment.yml file
 5 | ADD ./docker/environment.yml /
 6 | 
 7 | # create the environment
 8 | RUN conda update -n base conda -y && conda env update
 9 | 
10 | # add the code folder and notebooks folder to the docker image - this allows us to run
11 | # the image directly without relying on docker-compose or local file dependenciess
12 | ADD ./code/ /code
13 | ADD ./notebooks /notebooks


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.2"
 2 | 
 3 | services:
 4 |   python:       
 5 |     build: 
 6 |       dockerfile: ./docker/Dockerfile-dev
 7 |       context: ./
 8 |     image: my_project_python
 9 |     env_file:
10 |       - .env_dev
11 | 
12 |     ports:
13 |       - "8888:8888"
14 | 
15 |     command: 
16 |       jupyter lab --no-browser --ip=0.0.0.0 --allow-root --NotebookApp.token='local_dev'
17 | 
18 |     volumes:
19 |       - ./data:/data
20 |       - ./code:/code
21 |       - ./notebooks:/notebooks
22 | 
23 | 


--------------------------------------------------------------------------------
/docker-compose.prod.yml:
--------------------------------------------------------------------------------
 1 | version: "3.2"
 2 | 
 3 | services:
 4 |   python:       
 5 |     build: 
 6 |       dockerfile: ./docker/Dockerfile-prod
 7 |       context: ./
 8 |     image: my_project_python # alternatively replace with your docker registry/dockerhub info (i.e. youruser/example_project_python:latest for dockerhub)
 9 |     
10 |     # uncomment the below two lines if you plan on running via docker-compose in production
11 |     #env_file: 
12 |     #  - .env_prod
13 | 
14 |     command: 
15 |       python /code/example.py
16 | 
17 |     # note - once in production I usually pull from an API or some other location, if you
18 |     # still expect to pull data from the file system then uncomment the below two lines and
19 |     # replace /real_data_location with the correct path
20 |     #volumes:
21 |     #  - /real_data_location:/data


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Example template to use Conda + Docker for reproducible, easy to deploy models.
 2 | 
 3 | Blog post goes into more detail - find it here:
 4 | 
 5 | https://binal.pub/2018/10/data-science-with-docker-and-conda/
 6 | 
 7 | #### How to Use This All
 8 | 
 9 | As an example - here's my normal development process. Using it I can get from development to production with little friction, knowing that my code will work as expected, and that it won't negatively affect other processes on the production server.
10 | 
11 | ##### Developing and Packaging
12 | 
13 | 1. Clone the template down. Update the `environment.yml` as needed with packages I know I'll need, and run `docker-compose build`. This will build the development image with all the packages I defined installed within it.
14 | 2. Create a `.env_dev` file with development environment variables
15 | 3. Run `docker-compose up` and navigate to JupyterLab, which will be running on [http://localhost:8888](http://localhost:8888). We can access it by entering in the token `local_dev`.
16 | 4. From there prototype and develop a model/process using Jupyter Notebooks, saving any notebooks I create along the way into `/notebooks` as a development diary. Any final artifacts/models I plan on using in production I save within `/code`.
17 | 5. Once I have a final version of my code, save it (and any models it relies on) into `/code`.
18 | 6. Update the `docker-compose.prod.yml` file's `command` section to point to the my scripts' name, and the `image` section to point to my docker registry (something like my_registry/my_project:0.1).
19 | 7. Run `docker-compose -f docker-compose.prod.yml build` - this builds the production version of the image, packaging everything in the `/code` and `/notebooks` directories directly onto the image.
20 | 8. Run `docker-compose -f docker-compose.prod.yml push` which pushes that packaged image into my organizations docker registry.
21 | 
22 | At this point I now have an image that contains all my code, models, and other artifacts I need, that's preinstalled with exact versions of the Python packages and dependencies I require. It's stored in a central location where I can easily pull it down onto other servers.
23 | 


--------------------------------------------------------------------------------