├── models
    └── _keep
├── .dockerignore
├── Procfile
├── bin
    ├── test.sh
    ├── color_my_terminal.sh
    ├── train_model.sh
    ├── install_dependencies.sh
    ├── test_model_metrics.sh
    ├── deploy_to_heroku.sh
    ├── configure_venv_locally.sh
    └── predict.sh
├── requirements-dev.txt
├── src
    ├── settings.py
    ├── test.py
    ├── app.py
    ├── test_model_metrics.py
    ├── train.py
    └── app_with_logging.py
├── requirements.txt
├── .gitignore
├── docs
    ├── mlflow.md
    ├── facilitator_notes.md
    ├── pre-requisites.md
    ├── FAQs.md
    └── CD.md
├── .circleci
    ├── config.helloworld.yaml
    ├── config.heroku.reference.yaml
    └── config.yml
├── pipeline.gocd.yaml
├── Dockerfile
└── README.md


/models/_keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .venv-local


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: python src/train.py && gunicorn src.app:app


--------------------------------------------------------------------------------
/bin/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | source .venv/bin/activate
4 | python -m unittest discover -s src/


--------------------------------------------------------------------------------
/bin/color_my_terminal.sh:
--------------------------------------------------------------------------------
1 | export "PS1=${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\] \$ "


--------------------------------------------------------------------------------
/bin/train_model.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | source .venv/bin/activate
5 | python src/train.py
6 | echo "Model training complete."


--------------------------------------------------------------------------------
/bin/install_dependencies.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | # install dependencies
5 | pip install --upgrade pip
6 | pip install -r requirements.txt


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | 
3 | matplotlib==3.0.2
4 | seaborn==0.9.0
5 | pylint
6 | nose==1.3.7
7 | nose-watch==0.9.2
8 | rednose==1.3.0


--------------------------------------------------------------------------------
/bin/test_model_metrics.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | source .venv/bin/activate
5 | export RUN_METRICS_TEST='true'
6 | python -m unittest src/test_model_metrics.py


--------------------------------------------------------------------------------
/src/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 | PORT=8080
3 | 
4 | # replace the following with the external IP of the MLFlow tracking server
5 | MLFLOW_IP='35.185.191.70'
6 | SHOULD_USE_MLFLOW=False
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Flask==1.0.2
 2 | fluent-logger
 3 | gunicorn==19.9.0
 4 | joblib==0.13.1
 5 | lime==0.1.1.33
 6 | numpy==1.15.4
 7 | pandas==0.23.4
 8 | scikit-learn==0.20.2
 9 | mlflow==0.8.2
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv*/
 2 | 
 3 | **/__pycache__
 4 | models/*.joblib
 5 | !models/keep
 6 | 
 7 | # ignoring all json files in project root folder to prevent accidental commits of secrets 
 8 | /*.json
 9 | 
10 | # IDE config
11 | .vscode/
12 | .idea/
13 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | 
 4 | class TestSimpleExample(TestCase):
 5 |     def test_1_should_equal_1(self):
 6 |         self.assertEqual(1, 1)
 7 | 
 8 |     def test_1_plus_1_should_equal_2(self):
 9 |         self.assertEqual(1 + 1, 2)
10 | 


--------------------------------------------------------------------------------
/docs/mlflow.md:
--------------------------------------------------------------------------------
1 | # Provisioning MLFlow
2 | 
3 | [For workshop facilitators]
4 | 
5 | Instructions for provisioning MLFlow on kubernetes are in the README of: https://github.com/arunma/mlflow-gcp
6 | 
7 | Once MLFlow is provisioned, go to `src/settings.py` and:
8 | - Replace the mlflow tracking server URL
9 | - set `SHOULD_USE_MLFLOW=True`


--------------------------------------------------------------------------------
/bin/deploy_to_heroku.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | heroku_app_name=$1
 5 | heroku_repo="https://heroku:$HEROKU_AUTH_TOKEN@git.heroku.com/$heroku_app_name.git"
 6 | 
 7 | echo "Deploying app to heroku..."
 8 | git push $heroku_repo master --force
 9 | 
10 | echo "Running smoke test"
11 | bin/predict.sh "$heroku_app_name.herokuapp.com"


--------------------------------------------------------------------------------
/docs/facilitator_notes.md:
--------------------------------------------------------------------------------
1 | # Instructions for workshop facilitator
2 | 
3 | Before the workshop:
4 | - Provision mlflow. Instructions in [`docs/mlflow.md`](./mlflow.md)
5 | - Revert circleci to starter template: `cp .circleci/config.helloworld.yml .circleci/config.yml`
6 | 
7 | Workshop flow:
8 | - Setup local machine. Instructions in [`README.md`](../README.md)
9 | - Setup CI pipeline. Instructions in [`docs/CD.md`](./CD.md)


--------------------------------------------------------------------------------
/.circleci/config.helloworld.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   hello_world:
 4 |     docker:
 5 |       - image: circleci/python:3.6.1
 6 |     working_directory: ~/repo
 7 |     steps:
 8 |       - run:
 9 |           name: hello
10 |           command: echo "HELLO WORLD!!!"
11 |       - run:
12 |           name: bye
13 |           command: echo "GOODBYE!!!"
14 | 
15 | workflows:
16 |   version: 2
17 |   my_ci_pipeline:
18 |     jobs:
19 |       - hello_world


--------------------------------------------------------------------------------
/pipeline.gocd.yaml:
--------------------------------------------------------------------------------
 1 | # simple.gocd.yaml
 2 | pipelines:
 3 |   pipe1:
 4 |     group: simple
 5 |     materials:
 6 |       mygit:  # this is the name of material
 7 |         # says about type of material and url at once
 8 |         git: https://github.com/davified/ci-workshop-app.git
 9 |     stages:
10 |       - build: # name of stage
11 |           elastic_profile_id: demo-app
12 |           jobs:
13 |             build: # name of the job
14 |               tasks:
15 |                - exec: # indicates type of task
16 |                    command: echo "hello world"


--------------------------------------------------------------------------------
/bin/configure_venv_locally.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | which python3
 5 | if [ $? -ne 0 ]; then
 6 |   if [[ $(uname) == 'Darwin' ]]; then
 7 |     # mac users
 8 |     which brew
 9 |     if [ $? -ne 0 ]; then
10 |       echo "INFO: Installing homebrew"
11 |       /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
12 |     fi
13 | 
14 |     echo "INFO: Installing python3"
15 |     brew install python3
16 |   else
17 |     echo "Please install Python 3 before using this script"
18 |     echo "Exiting..."
19 | 
20 |     exit 1
21 |   fi
22 | fi
23 | 
24 | python3 -m venv .venv-local
25 | 
26 | source .venv-local/bin/activate
27 | pip install --upgrade pip
28 | pip install -r requirements-dev.txt
29 | 


--------------------------------------------------------------------------------
/bin/predict.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [[ $1 == '' ]]; then
 5 |   echo "[ERROR] Usage: $0 <URL>"
 6 |   echo "[ERROR] Example: $0 http://localhost:8080"
 7 |   echo "[ERROR] Example: $0 http://my-app.herokuapp.com"
 8 |   echo "[ERROR] Exiting..."
 9 |   exit 1
10 | else
11 |   base_url=$1 
12 | fi
13 | 
14 | curl --request POST "$base_url/predict" \
15 |      --header "Content-Type: application/json" \
16 |      --data \
17 |               '{ 
18 |                 "AGE": 65.2,
19 |                 "B": 396.9,
20 |                 "CHAS": 0,
21 |                 "CRIM": 0.00632,
22 |                 "DIS": 4.09,
23 |                 "INDUS": 2.31,
24 |                 "LSTAT": 4.98,
25 |                 "NOX": 0.538,
26 |                 "PTRATIO": 15.3,
27 |                 "RAD": 1.0,
28 |                 "RM": 16.575,
29 |                 "TAX": 296,
30 |                 "ZN": 18 
31 |               }'


--------------------------------------------------------------------------------
/src/app.py:
--------------------------------------------------------------------------------
 1 | import os, json, re
 2 | 
 3 | import joblib
 4 | import pandas as pd
 5 | from flask import Flask, jsonify, request
 6 | 
 7 | app = Flask(__name__)
 8 | column_order = joblib.load('models/column_order.joblib') 
 9 | model = joblib.load('models/model.joblib') 
10 | 
11 | @app.route('/', methods=['GET'])
12 | def hello_world():
13 |     return jsonify({"response": "hello world!"})
14 | 
15 | @app.route('/predict', methods=['POST'])
16 | def predict():
17 |     request_payload = request.json
18 |     input_features = pd.DataFrame([], columns=column_order)
19 |     input_features = input_features.append(request_payload, ignore_index=True)
20 |     input_features = input_features.fillna(0)
21 | 
22 |     prediction = model.predict(input_features.values.tolist()).tolist()[0]
23 | 
24 |     return jsonify({'predicted price (thousands)': prediction})
25 | 
26 | if __name__ == '__main__':    
27 |     # Run app locally.
28 |     port = os.environ.get('PORT', 8080)
29 |     app.run(port=port, host='0.0.0.0', debug=True)
30 | 
31 |     # App starts up differently on heroku. See Procfile


--------------------------------------------------------------------------------
/src/test_model_metrics.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from math import sqrt
 4 | 
 5 | import joblib
 6 | import pandas as pd
 7 | from sklearn import datasets, metrics
 8 | 
 9 | 
10 | @unittest.skipUnless(os.environ.get('RUN_METRICS_TEST', '') == 'true', 'skip metrics tests when running unit tests')
11 | class TestSimpleExample(unittest.TestCase):
12 |     def setUp(self):
13 |         import warnings
14 |         with warnings.catch_warnings():
15 |             warnings.simplefilter("ignore")
16 |             model = joblib.load(f'./models/model.joblib') 
17 | 
18 |         data = datasets.load_boston()
19 |         x = pd.DataFrame(data.data, columns=data.feature_names)
20 |         self.y = pd.DataFrame(data.target, columns=["MEDV"])
21 |         self.y_pred = model.predict(x)
22 |     
23 |     
24 |     def test_rmse_should_be_below_5(self):
25 |         rmse = sqrt(metrics.mean_squared_error(y_true=self.y, y_pred=self.y_pred))
26 |         self.assertLessEqual(rmse, 6)
27 | 
28 |     def test_r2_score_should_be_above_0_point_8(self):
29 |         r2 = metrics.r2_score(y_true=self.y, y_pred=self.y_pred)
30 |         self.assertGreaterEqual(r2, 0.5)


--------------------------------------------------------------------------------
/docs/pre-requisites.md:
--------------------------------------------------------------------------------
 1 | ### Workshop pre-requisites
 2 | 
 3 | Before the workshop, please ensure you have done the following:
 4 | - Install a code editor of your choice. If you aren’t familiar with a code editor, [VS Code](https://code.visualstudio.com/) or [PyCharm (community edition)](https://www.jetbrains.com/pycharm/download/) are good options.
 5 | - Install and start Docker
 6 |   - [Mac users](https://docs.docker.com/docker-for-mac/install/)
 7 |   - [Linux users](https://docs.docker.com/install/linux/docker-ce/ubuntu/)
 8 |   - [Windows](https://docs.docker.com/docker-for-windows/install/)
 9 |   - **Important things to note**:
10 |     - You will be prompted to create a DockerHub account. Follow the instructions in order to download Docker
11 |     - Follow the installation prompts (go with the default options) **until you have successfully started Docker**
12 |     - [Windows users] When prompted to enable Hyper-V and Containers features, click 'Ok' and let computer restart again.
13 |     - You may have to restart your computer 2-3 times.
14 | - Install a REST client (e.g. [Insomnia](https://insomnia.rest/))
15 | - Create accounts:
16 |   - [Heroku](https://heroku.com) (free) 
17 |   - [CircleCI](https://circleci.com) (free)
18 | - [Windows Users only] Install [git bash](https://gitforwindows.org/). We will be using `git bash` as the terminal for the workshop.


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # ================================================================= #
 2 | # ------------ First stage in our multistage Dockerfile ----------- #
 3 | # ================================================================= #
 4 | FROM python:3.6-slim as Base
 5 | 
 6 | RUN apt-get update \
 7 |   && apt-get install -y curl git
 8 | 
 9 | WORKDIR /home/ci-workshop-app
10 | 
11 | COPY requirements.txt /home/ci-workshop-app/requirements.txt
12 | RUN pip install -r requirements.txt
13 | 
14 | COPY . /home/ci-workshop-app
15 | 
16 | # ================================================================= #
17 | # ------------ Second stage in our multistage Dockerfile ---------- #
18 | # ================================================================= #
19 | 
20 | FROM Base as Build
21 | 
22 | ARG CI
23 | ENV CI=$CI
24 | 
25 | RUN /home/ci-workshop-app/bin/train_model.sh
26 | 
27 | # CMD ["/home/ci-workshop-app/bin/start_server.sh"]
28 | 
29 | # ================================================================= #
30 | # ------------ Third stage in our multistage Dockerfile ----------- #
31 | # ================================================================= #
32 | FROM Build as Dev
33 | 
34 | RUN apt-get install -y gnupg \
35 |   && curl https://cli-assets.heroku.com/install-ubuntu.sh | sh
36 | 
37 | COPY requirements-dev.txt /home/ci-workshop-app/requirements-dev.txt
38 | RUN pip install -r /home/ci-workshop-app/requirements-dev.txt
39 | 
40 | RUN git config --global credential.helper 'cache --timeout=36000'
41 | 
42 | EXPOSE 8080
43 | 
44 | ARG user
45 | RUN useradd ${user:-root} -g root || true
46 | USER ${user:-root}
47 | 
48 | # CMD ["/home/ci-workshop-app/bin/start_server.sh"]
49 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from math import sqrt
 3 | 
 4 | import joblib
 5 | import mlflow
 6 | import numpy as np
 7 | import pandas as pd
 8 | from sklearn import datasets, metrics
 9 | from sklearn.ensemble import RandomForestRegressor
10 | from sklearn.model_selection import train_test_split
11 | 
12 | import settings
13 | 
14 | # load data
15 | data = datasets.load_boston()
16 | 
17 | # preprocess data
18 | x = pd.DataFrame(data.data, columns=data.feature_names)
19 | column_order = x.columns
20 | y = pd.DataFrame(data.target, columns=["MEDV"])
21 | x_train, x_test, y_train, y_test = train_test_split(x, y)
22 | 
23 | # train model
24 | print('Training ML model...')
25 | N_ESTIMATORS = 2
26 | MAX_DEPTH = 2
27 | model = RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH)
28 | model = model.fit(x_train, y_train.values.ravel())
29 | 
30 | # save model 
31 | joblib.dump(model, 'models/model.joblib') 
32 | joblib.dump(column_order, 'models/column_order.joblib')
33 | 
34 | if settings.SHOULD_USE_MLFLOW:
35 |     # log training run to mlflow
36 |     mlflow.set_tracking_uri(uri=f'http://{settings.MLFLOW_IP}:5000')
37 |     if os.environ.get('CI', '') == 'true':
38 |         mlflow.set_experiment('CI')
39 |     else:
40 |         mlflow.set_experiment('dev')
41 | 
42 |     with mlflow.start_run() as run:
43 |         # calculate evaluation metrics
44 |         y_test_pred = model.predict(x_test)
45 |         rmse = sqrt(metrics.mean_squared_error(y_true=y_test, y_pred=y_test_pred))
46 |         r2_score = metrics.r2_score(y_true=y_test, y_pred=y_test_pred)
47 | 
48 |         # log hyperparameters to mlflow
49 |         mlflow.log_param('n_estimators', N_ESTIMATORS)
50 |         mlflow.log_param('max_depth', MAX_DEPTH)
51 |         
52 |         # log metrics to mlflow
53 |         mlflow.log_metric("rmse_validation_data", rmse)
54 |         mlflow.log_metric("r2_score_validation_data", r2_score)
55 | else:
56 |     print('Not logging training run because MLFlow tracking server is not up, or its URL is not set in train.py')


--------------------------------------------------------------------------------
/.circleci/config.heroku.reference.yaml:
--------------------------------------------------------------------------------
 1 | # .circleci/config.yml
 2 | version: 2
 3 | jobs:
 4 |   train_and_test:
 5 |     docker:
 6 |       - image: circleci/python:3.6.1
 7 |     working_directory: ~/repo
 8 |     steps:
 9 |       - checkout
10 |       - restore_cache:
11 |           keys:
12 |             - v1-dependencies-{{ checksum "requirements.txt" }}
13 |             # fallback to using the latest cache if no exact match is found
14 |             - v1-dependencies-
15 |       - run:
16 |           name: install dependencies
17 |           command: bin/install_dependencies.sh
18 |       - save_cache:
19 |           paths:
20 |             - .venv
21 |           key: v1-dependencies-{{ checksum "requirements.txt" }}
22 |       - run:
23 |           name: run unit tests
24 |           command: bin/test.sh
25 |       - run:
26 |           name: train model
27 |           command: bin/train_model.sh
28 |       - run:
29 |           name: run model metrics tests
30 |           command: bin/test_model_metrics.sh
31 |       - persist_to_workspace:
32 |           root: .
33 |           paths:
34 |             - .
35 |   deploy_staging:
36 |     docker:
37 |       - image: circleci/python:3.6.1
38 |     steps:
39 |       - attach_workspace:
40 |           at: .
41 |       - run:
42 |           name: deploy app to staging
43 |           command: bin/deploy_to_heroku.sh ci-workshop-app-bob-staging
44 |   deploy_prod:
45 |     docker:
46 |       - image: circleci/python:3.6.1
47 |     steps:
48 |       - attach_workspace:
49 |           at: .
50 |       - run:
51 |           name: deploy app to prod
52 |           command: bin/deploy_to_heroku.sh ci-workshop-app-bob-prod
53 | 
54 | 
55 | workflows:
56 |   version: 2
57 |   my_ci_pipeline:
58 |     jobs:
59 |       - train_and_test
60 |       - deploy_staging:
61 |           requires:
62 |             - train_and_test
63 |       - trigger_deploy:
64 |           type: approval
65 |           requires:
66 |             - deploy_staging
67 |       - deploy_prod:
68 |           requires:
69 |             - trigger_deploy


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # .circleci/config.yml
 2 | version: 2
 3 | jobs:
 4 |   train_and_test:
 5 |     docker:
 6 |       - image: circleci/python:3.6.1
 7 |     working_directory: ~/repo
 8 |     steps:
 9 |       - checkout
10 |       - restore_cache:
11 |           keys:
12 |             - v1-dependencies-{{ checksum "requirements.txt" }}
13 |             # fallback to using the latest cache if no exact match is found
14 |             - v1-dependencies-
15 |       - run:
16 |           name: install dependencies
17 |           command: |
18 |             python3 -m venv .venv
19 |             source .venv/bin/activate
20 |             pip install -r requirements.txt
21 |       - save_cache:
22 |           paths:
23 |             - .venv
24 |           key: v1-dependencies-{{ checksum "requirements.txt" }}
25 |       - run:
26 |           name: run unit tests
27 |           command: bin/test.sh
28 |       - run:
29 |           name: train model
30 |           command: bin/train_model.sh
31 |       - run:
32 |           name: run model metrics tests
33 |           command: bin/test_model_metrics.sh
34 |       - persist_to_workspace:
35 |           root: .
36 |           paths:
37 |             - .
38 |   deploy_staging:
39 |     docker:
40 |       - image: circleci/python:3.6.1
41 |     steps:
42 |       - attach_workspace:
43 |           at: .
44 |       - run:
45 |           name: deploy app to staging
46 |           command: bin/deploy_to_heroku.sh ci-workshop-app-bob-staging
47 |   deploy_prod:
48 |     docker:
49 |       - image: circleci/python:3.6.1
50 |     steps:
51 |       - attach_workspace:
52 |           at: .
53 |       - run:
54 |           name: deploy app to prod
55 |           command: bin/deploy_to_heroku.sh ci-workshop-app-bob-prod
56 | 
57 | 
58 | workflows:
59 |   version: 2
60 |   my_ci_pipeline:
61 |     jobs:
62 |       - train_and_test
63 |       - deploy_staging:
64 |           requires:
65 |             - train_and_test
66 |       - trigger_deploy:
67 |           type: approval
68 |           requires:
69 |             - deploy_staging
70 |       - deploy_prod:
71 |           requires:
72 |             - trigger_deploy


--------------------------------------------------------------------------------
/src/app_with_logging.py:
--------------------------------------------------------------------------------
 1 | import os, json, re
 2 | 
 3 | import joblib
 4 | import pandas as pd
 5 | from flask import Flask, jsonify, request
 6 | from fluent import sender
 7 | from fluent import event
 8 | from sklearn import datasets
 9 | import numpy as np
10 | import lime
11 | import lime.lime_tabular
12 | 
13 | app = Flask(__name__)
14 | column_order = joblib.load('models/column_order.joblib') 
15 | model = joblib.load('models/model.joblib') 
16 | 
17 | @app.route('/', methods=['GET'])
18 | def hello_world():
19 |     return jsonify({"response": "hello world!"})
20 | 
21 | def lime_explain(input):
22 |     boston = datasets.load_boston()
23 |     categorical_features = np.argwhere(np.array([len(set(boston.data[:,x])) for x in range(boston.data.shape[1])]) <= 10).flatten()
24 |     explainer = lime.lime_tabular.LimeTabularExplainer(boston.data, feature_names=boston.feature_names, class_names=['price'], categorical_features=categorical_features, verbose=True, mode='regression')
25 |     exp = explainer.explain_instance(np.array(input), model.predict, num_features=5).as_list()
26 | 
27 |     lime_feature_contributions = {}
28 |     for feature, contribution in exp:
29 |         feature_name = re.findall("[a-zA-Z]+", feature)[0]
30 |         lime_feature_contributions[f'LIME_{feature_name}'] = contribution
31 |     return lime_feature_contributions
32 | 
33 | 
34 | @app.route('/predict', methods=['POST'])
35 | def predict():
36 |     request_payload = request.json
37 |     input_features = pd.DataFrame([], columns=column_order)
38 |     input_features = input_features.append(request_payload, ignore_index=True)
39 |     input_features = input_features.fillna(0)
40 | 
41 |     prediction = model.predict(input_features.values.tolist()).tolist()[0]
42 | 
43 |     logger = sender.FluentSender('app', host='host.docker.internal', port=24224)
44 |     feature_names = column_order.tolist()
45 |     feature_values = input_features.values.tolist()[0]
46 |     lime_feature_contributions = lime_explain(feature_values)
47 | 
48 |     log_payload = {'prediction': prediction, **dict(zip(feature_names, feature_values)), **lime_feature_contributions}
49 |     if not logger.emit('prediction', log_payload):
50 |         print('logger error')
51 |         print(logger.last_error)
52 |         logger.clear_last_error() # clear stored error after handled errors
53 | 
54 |     return jsonify({'predicted price (thousands)': prediction})
55 | 
56 | if __name__ == '__main__':    
57 |     port = os.environ.get('PORT', 8080)
58 |     if port == 8080:
59 |         app.run(port=port, host='0.0.0.0', debug=True)
60 |     else:
61 |         app.run()
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Continuous Intelligence Workshop
 2 | 
 3 | A demo on how to apply continuous delivery principles to train, test and deploy ML models.
 4 | 
 5 | ### Setup
 6 | 
 7 | Note:
 8 | - If you encounter any errors, please refer to [FAQs](./docs/FAQs.md) for a list of common errors and how to fix them.
 9 | - [Windows users] If you're new to Docker, please use Git Bash to run the commands below
10 | 
11 | **Setup instructions**
12 | 
13 | 1. Please ensure you've completed the [pre-requisite setup](./docs/pre-requisites.md)
14 | 2. Fork repository: https://github.com/davified/ci-workshop-app
15 | 3. Clone repository: `git clone https://github.com/YOUR_USERNAME/ci-workshop-app`
16 | 4. Start Docker on your desktop (Note: Wait for Docker to complete startup before running the subsequent commands. You'll know when startup is completed when the docker icon in your taskbar stops animating)
17 | 5. Build docker image
18 | 
19 | ```shell
20 | # [Mac/Linux users]
21 | docker build . -t ci-workshop-app --build-arg user=$(whoami)
22 | 
23 | # [Windows users]
24 | MSYS_NO_PATHCONV=1 docker build . -t ci-workshop-app --build-arg user=$(whoami)
25 | ```
26 | 
27 | 6. Start docker container
28 | 
29 | ```shell
30 | # [Mac/Linux users]
31 | docker run -it -v $(pwd):/home/ci-workshop-app -p 8080:8080 ci-workshop-app bash
32 | 
33 | # [Windows users]
34 | winpty docker run -it -v C:\\Users\\path\\to\\your\\ci-workshop-app:/home/ci-workshop-app -p 8080:8080 ci-workshop-app bash
35 | # Note: to find the path, you can run `pwd` in git bash, and manually replace forward slashes (/) with double backslashes (\\)
36 | 
37 | ```
38 | 
39 | ```diff
40 | ! Pre-workshop setup stops here
41 | ```
42 | 
43 | ```shell
44 | ### Other useful docker commands ###
45 | # See list of running containers
46 | docker ps
47 | 
48 | # Start a bash shell in a running container when it’s running
49 | docker exec -it <container-id> bash
50 | ```
51 | 
52 | Now you're ready to roll!
53 | 
54 | 
55 | ### Common commands (run these in the container)
56 | 
57 | ```shell
58 | # Add some color to your terminal
59 | source bin/color_my_terminal.sh
60 | 
61 | # Run unit tests
62 | nosetests
63 | 
64 | # Train model
65 | python src/train.py
66 | 
67 | # Start flask app
68 | python src/app.py
69 | 
70 | # Make requests to your app
71 | # 1. In your browser, visit http://localhost:8080
72 | # 2. In another terminal in the container, run:
73 | bin/predict.sh http://localhost:8080
74 | 
75 | # You can also use this script to test your deployed application later:
76 | bin/predict.sh http://my-app.herokuapp.com
77 | ```
78 | 
79 | ### IDE configuration
80 | 
81 | Please refer to [FAQs](./docs/FAQs.md) for instructions on configuring VS Code or PyCharm.
82 | 
83 | ### Set up CD pipeline
84 | 
85 | Instructions for setting up your CD pipeline are in [docs/CD.md](./docs/CD.md). To keep this example simple, we will deploy to heroku.
86 | 
87 | Once the CD pipeline is set up, you only need to `git add`, `git commit` and `git push` your code changes, and the CD pipeline will do everything (train, test, deploy) for you.
88 | 


--------------------------------------------------------------------------------
/docs/FAQs.md:
--------------------------------------------------------------------------------
 1 | # FAQs
 2 | 
 3 | ### IDE configuration
 4 | To get the optimal coding workflow, we often rely on intellisense and code completion provided by our code editors. Unfortunately, this becomes [hard](https://github.com/Microsoft/vscode-python/issues/79#issuecomment-348193800) when our python virtual environment is contained within the docker container. As a workaround, you can:
 5 | - Run `bin/configure_venv_locally.sh`. This will create a duplicate python virtual environment (by the name of `.venv-local`) on your host (i.e. your computer)
 6 | - Configure your IDE with the python path of this virtual environment:
 7 |   - [VS Code](https://code.visualstudio.com/docs/python/environments#_select-and-activate-an-environment)
 8 |   - [PyCharm (community edition)](https://www.jetbrains.com/help/pycharm/creating-virtual-environment.html)
 9 |   - PyCharm (professional edition) users: you don't need this workaround. You can follow set up your IDE to use the virtual environment in the Docker container (see [instructions])(https://www.jetbrains.com/help/pycharm/using-docker-as-a-remote-interpreter.html)
10 | - configure autosave
11 | 
12 | 
13 | ### Common errors and how to fix them
14 | 
15 | 1. `docker run` causes the following error:
16 | ```shell
17 | docker: Error response from daemon: driver failed programming external connectivity on endpoint elated_brown (a26aea6b1fcd5f286dd7164b42
18 | 47de2f958f8280140b51ec39eed13e3801037b): Bind for 0.0.0.0:8080 failed: port is already allocated.
19 | 
20 | # Reason: some container is already running and taken port 8080
21 | # Solution: 
22 | # 1. get id of running container
23 | docker ps
24 | 
25 | # 2 stop container
26 | docker stop <container-id> 
27 | # e.g. docker stop 9d57a1f8f49a
28 | 
29 | # Now you can run `docker run` again
30 | ```
31 | 
32 | ### [Windows users] Common errors and how to fix them
33 | 
34 | 1. If you encounter the following error, when running `docker run ... -p 8080:8080 ...`:
35 | ```shell
36 | docker: Error response from daemon: driver failed programming external connectivity on endpoint zealous_rubin (f70ddf46807daed2b1a24e3f897af1dd587b97b30ef676c8fcdba40598756
37 | c49): Error starting userland proxy: mkdir /port/tcp:0.0.0.0:8080:tcp:172.17.0.2:8080: input/output error.
38 | 
39 | # Solution: 
40 | # 1. Right click docker icon --> Settings --> Daemon --> Ensure 'Experimental Features' is unchecked
41 | # 2. Restart docker
42 | ```
43 | 
44 | 2. You mounted a volume (e.g. `docker run -v /$(pwd):/home/`) but you don't see the mounted directory:
45 | ```shell
46 | # solution: replace /$(pwd) with the full path to the directory that you wish to mount:
47 | winpty docker run -it -v C:\\Users\\path\\to\\your\\ci-workshop-app:/home/ci-workshop-app -p 8080:8080 ci-workshop-app bash
48 | 
49 | # Note: to find the full path, you can run `pwd` in the directory that you wish to mount, and manually replace forward slashes (/) with double backslashes (\\)
50 | ```
51 | This is an open issue in Docker for Windows that has to do with how Git Bash converts filepaths: https://github.com/docker/toolbox/issues/673
52 | 
53 | 3. You edited a shell script and tried to run it but got some error about invalid characters (^M)
54 | ```shell
55 | # on git bash, convert line endings to unix endings
56 | dos2unix bin/my_file.sh
57 | 
58 | # now you can execute your script
59 | bin/my_file.sh
60 | ```
61 | 


--------------------------------------------------------------------------------
/docs/CD.md:
--------------------------------------------------------------------------------
  1 | # Setting up your CD pipeline
  2 | 
  3 | During the workshop, we will walk you through how to configure a CD pipeline for your project. We will specify our CD pipeline in `.circleci/config.yml`. And you can refer to `.circleci/config.heroku.reference.yaml` for the complete solution, if you wish to.
  4 | 
  5 | ### Steps to do before the workshop
  6 | - Create CircleCI account: https://circleci.com/ (free)
  7 | - Create heroku account: https://heroku.com/ (free)
  8 | - Fork this repository: https://github.com/davified/ci-workshop-app
  9 | 
 10 | ### One-time manual steps
 11 | #### CircleCI
 12 | - Create circleci project. Visit https://circleci.com/dashboard, login and click on 'Add Projects' on the left panel. Click on 'Set up project' for `ci-workshop-app`
 13 | 
 14 | #### Heroku
 15 | - Login to heroku by running: `heroku login` (complete authentication by clicking on the browser. if the browser doesn't open up automatically, you can copy and paste the link manually)
 16 | - Create a heroku project for app (staging): `heroku create ci-workshop-app-<YOUR_NAME>-staging`
 17 | - Create a heroku project for app (prod): `heroku create ci-workshop-app-<YOUR_NAME>-prod`
 18 | - If you encounter problems creating the 2 apps using ther `heroku` cli, you can create the 2 apps on the heroku website: https://dashboard.heroku.com/new-app
 19 | ___
 20 | 
 21 | ### Let's build our CD pipeline!
 22 | 
 23 | #### Iteration 1: Hello world
 24 | 
 25 | Let's create a simple pipeline to run 2 commands: `echo 'hello'` and `echo 'goodbye'`
 26 | 
 27 | **Your tasks**
 28 | - In your terminal, run:
 29 |   - `echo "HELLO WORLD!!!"`
 30 |   - `echo "GOODBYE!!!"`
 31 | - Copy and paste the following snippet in `.circleci/config.yml`
 32 | - Add, commit and push your changes to your repository:
 33 |   - `git add .circleci/config.yml`
 34 |   - `git commit -m "Creating pipeline to run hello world commands"`
 35 |   - `git push -u origin master`
 36 | 
 37 | ```yaml
 38 | # .circleci/config.yml
 39 | version: 2
 40 | jobs:
 41 |   hello_world:    # name of job
 42 |     docker:       # what docker image to use when running this job 
 43 |       - image: circleci/python:3.6.1
 44 |     working_directory: ~/repo
 45 |     steps:
 46 |       - run:      # my first step
 47 |           name: hello
 48 |           command: echo "HELLO WORLD!!!"
 49 |       - run:      # my second step
 50 |           name: bye
 51 |           command: echo "GOODBYE!!!"
 52 | 
 53 | workflows:
 54 |   version: 2
 55 |   my_ci_pipeline:
 56 |     jobs:
 57 |       - hello_world
 58 | ```
 59 | 
 60 | ___
 61 | 
 62 | #### Iteration 2: Train and test
 63 | 
 64 | Let's extend to pipeline to (i) run unit tests, (ii) train the model, and (iii) run metrics tests on the model.
 65 | 
 66 | **Your tasks**
 67 | - In your code editor, open and read the following bash scripts:
 68 |   - `bin/test.sh`
 69 |   - `bin/train_model.sh`
 70 |   - `bin/test_model_metrics.sh`
 71 | - Get a feel of what each bash script is doing by running them:
 72 |   - Start a bash terminal in your container: `docker run -it -v $(pwd):/home/ci-workshop-app -p 8080:8080 ci-workshop-app bash`
 73 |   - In the terminal, run each of the 3 scripts above (e.g. `bin/test.sh`)
 74 | - Copy and paste the following snippet in `.circleci/config.yml`
 75 | - git add, commit and push your changes to your repository
 76 | 
 77 | ```yaml
 78 | # .circleci/config.yml
 79 | version: 2
 80 | jobs:
 81 |   train_and_test:
 82 |     docker:
 83 |       - image: circleci/python:3.6.1
 84 |     working_directory: ~/repo
 85 |     steps:
 86 |       - checkout              # checkout source code
 87 |       - restore_cache:        # load cache (to save time)
 88 |           keys:
 89 |             - v1-dependencies-{{ checksum "requirements.txt" }}
 90 |             # fallback to using the latest cache if no exact match is found
 91 |             - v1-dependencies-
 92 |       - run:
 93 |           name: install dependencies
 94 |           command: bin/install_dependencies.sh
 95 |       - save_cache:           # save cache (to save time)
 96 |           paths:
 97 |             - .venv
 98 |           key: v1-dependencies-{{ checksum "requirements.txt" }}
 99 |       - run:
100 |           name: run unit tests
101 |           command: bin/test.sh
102 |       - run:
103 |           name: train model
104 |           command: bin/train_model.sh
105 |       - run:
106 |           name: run model metrics tests
107 |           command: bin/test_model_metrics.sh
108 |       - persist_to_workspace:   # save artifact
109 |           root: .
110 |           paths:
111 |             - .
112 | 
113 | workflows:
114 |   version: 2
115 |   my_ci_pipeline:
116 |     jobs:
117 |       - train_and_test
118 | ```
119 | 
120 | ___
121 | 
122 | #### Iteration 3: Deploy to staging and production
123 | 
124 | Let's deploy our app to staging and production!
125 | 
126 | **Your tasks**
127 | - In your code editor, open and read the following bash scripts:
128 |   - `bin/deploy_to_heroku.sh` - we wrote this script. this is how you can deploy an app to heroku
129 |   - `Procfile` - This is a simple shell script which heroku will run when it starts your application
130 | - Copy and paste the following snippet in `.circleci/config.yml`. Note:
131 |   - Replace `ci-workshop-app-bob-staging` and `ci-workshop-app-bob-prod` with the names of your staging and prod apps
132 |   - Keep the `train_and_test` configuration which you pasted in your previous task.
133 |   - Ensure indentation matches what you pasted in your previous task! Otherwise CircleCI will not be happy.
134 | - git add, commit and push your changes to your repository
135 | 
136 | 
137 | ```yaml
138 | # .circleci/config.yml
139 | version: 2
140 | jobs:
141 |   train_and_test:
142 |     # ... same as previous code snippet
143 |   deploy_staging:
144 |     docker:
145 |       - image: circleci/python:3.6.1
146 |     steps:
147 |       - attach_workspace:
148 |           at: .
149 |       - run:
150 |           name: deploy app to staging
151 |           command: bin/deploy_to_heroku.sh ci-workshop-app-bob-staging
152 |   deploy_prod:
153 |     docker:
154 |       - image: circleci/python:3.6.1
155 |     steps:
156 |       - attach_workspace:
157 |           at: .
158 |       - run:
159 |           name: deploy app to prod
160 |           command: bin/deploy_to_heroku.sh ci-workshop-app-bob-prod
161 | 
162 | 
163 | workflows:
164 |   version: 2
165 |   my_ci_pipeline:
166 |     jobs:
167 |       - train_and_test
168 |       - deploy_staging:
169 |           requires:
170 |             - train_and_test
171 |       - trigger_deploy:
172 |           type: approval
173 |           requires:
174 |             - deploy_staging
175 |       - deploy_prod:
176 |           requires:
177 |             - trigger_deploy
178 | ```
179 | 
180 | ___
181 | 
182 | #### Iteration 4: Deploy to production (for real)
183 | 
184 | - In your terminal, generate a heroku auth token and copy the 'Token' value : `heroku authorizations:create`
185 | - On CircleCI webpage, go to your project settings (click on the gear icon on your project) and click on 'Environment Variables' on the left panel. Add the following variable:
186 |   - Name: HEROKU_AUTH_TOKEN
187 |   - Value: (paste value created from previous step)
188 | - On CircleCI's workflows page, find the failed workflow and click on 'Rerun' 


--------------------------------------------------------------------------------