├── .dvc
    ├── .gitignore
    └── config
├── .dvcignore
├── .github
    └── workflows
    │   └── github-actions-demo.yml
├── .gitignore
├── README.md
├── ab_testing_example
    ├── flask_model_testing.py
    ├── flask_recommendation_service.py
    ├── recommend_pytorch_inf.py
    ├── recommend_pytorch_train.py
    └── two_sample_test.ipynb
├── ab_testing_example_posthog
    ├── README.md
    ├── app.py
    ├── env.example
    ├── requirements.txt
    ├── static
    │   ├── css
    │   │   └── main.css
    │   └── js
    │   │   └── main.js
    └── templates
    │   └── index.html
├── airflow_astro_example
    ├── .astro
    │   ├── config.yaml
    │   └── test_dag_integrity_default.py
    ├── .dockerignore
    ├── .gitignore
    ├── Dockerfile
    ├── README.md
    ├── dags
    │   ├── .airflowignore
    │   ├── example_dag_advanced.py
    │   └── example_dag_basic.py
    ├── packages.txt
    ├── requirements.txt
    └── tests
    │   └── dags
    │       └── test_dag_example.py
├── airflow_setup
    └── setup.sh
├── cloud_function_example
    ├── main.py
    └── requirements.txt
├── copilot_example
    ├── fastapi_example.py
    └── requirements.txt
├── cron_example
    ├── date_job_every_minute.txt
    ├── run_this_job.sh
    └── run_this_job.txt
├── docker_examples
    ├── docker_compose_example
    │   ├── README.md
    │   ├── docker-compose.yml
    │   ├── env.example
    │   └── init.sql
    ├── docker_example
    │   ├── Dockerfile
    │   ├── flask_simple_regression_service.py
    │   └── requirements.txt
    └── docker_pipeline_example
    │   ├── Dockerfile
    │   ├── flask_recommendation_service.py
    │   ├── movies.dat
    │   ├── recommend_pytorch_inf.py
    │   └── recommend_pytorch_train.py
├── dvc_example
    ├── data
    │   ├── .gitignore
    │   └── data.xml.dvc
    ├── remote
    │   └── files
    │   │   └── md5
    │   │       └── 22
    │   │           └── a1a2931c8370d3aeedd7183606fd7f
    └── requirements.txt
├── flask_examples
    ├── flask_example_food_api
    │   └── flask_food_service.py
    ├── flask_example_imagenet
    │   ├── flask_imagenet_improved
    │   │   ├── flask_imagenet_improved_service.py
    │   │   ├── static
    │   │   │   ├── pytorch.png
    │   │   │   └── style.css
    │   │   └── templates
    │   │   │   ├── index.html
    │   │   │   └── result.html
    │   ├── flask_imagenet_service.py
    │   └── imagenet_class_index.json
    ├── flask_example_recommendation
    │   └── flask_recommendation_service.py
    ├── flask_example_regression
    │   ├── flask_simple_regression_improved
    │   │   ├── flask_simple_regression_improved_service.py
    │   │   ├── static
    │   │   │   ├── pytorch.png
    │   │   │   └── style.css
    │   │   └── templates
    │   │   │   ├── index.html
    │   │   │   └── result.html
    │   ├── flask_simple_regression_service.py
    │   └── requirements.txt
    └── flask_example_weather
    │   └── flask_weather_service.py
├── github_actions_example
    ├── .github
    │   └── workflows
    │   │   └── ci.yml
    ├── README.md
    └── src
    │   └── index.js
├── kafka_example
    ├── client_notebooks
    │   ├── consumer_local_example.ipynb
    │   └── producer_local_example.ipynb
    ├── docker-compose.yml
    └── readme.md
├── kubernetes_examples
    ├── kubernetes_example_imperative
    │   ├── .weather_pod_additional.txt.swp
    │   ├── echo_server_imperative_example.txt
    │   └── minikube.sh
    ├── kubernetes_example_nginx_replica
    │   ├── .weather_pod_additional.txt.swp
    │   ├── nginx_example.yaml
    │   └── nginx_example_additional.md
    ├── kubernetes_example_recommendations_pod
    │   └── recommendation_pod_example.yaml
    ├── kubernetes_example_weather_deployment
    │   ├── .weather_pod_additional.txt.swp
    │   ├── weather_deployment_example.yaml
    │   └── weather_deployment_example_additional.md
    └── kubernetes_example_weather_pod
    │   ├── .weather_pod_additional.txt.swp
    │   ├── weather_pod_example.yaml
    │   └── weather_pod_example_additional.md
├── lambda_function_example
    ├── lambda_function.py
    ├── movie_dict.json
    └── top_n.json
├── mlflow_example
    ├── mlflow_example.py
    └── requirements.txt
├── model_example_regression
    ├── simple_regression_inf.py
    └── simple_regression_train.py
├── pyspark_example
    ├── app
    │   └── example.py
    ├── docker-compose.yml
    └── readme.md
├── pytest_example
    └── test_function_example.py
└── ray_cluster_example
    ├── Dockerfile
    ├── app
        └── example.py
    ├── docker-compose.yaml
    └── readme.md


/.dvc/.gitignore:
--------------------------------------------------------------------------------
1 | /config.local
2 | /tmp
3 | /cache
4 | 


--------------------------------------------------------------------------------
/.dvc/config:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/.dvc/config


--------------------------------------------------------------------------------
/.dvcignore:
--------------------------------------------------------------------------------
1 | # Add patterns of files dvc should ignore, which could improve
2 | # the performance. Learn more at
3 | # https://dvc.org/doc/user-guide/dvcignore
4 | 


--------------------------------------------------------------------------------
/.github/workflows/github-actions-demo.yml:
--------------------------------------------------------------------------------
 1 | name: GitHub Actions Demo
 2 | run-name: ${{ github.actor }} is testing out GitHub Actions 🚀
 3 | on: [push]
 4 | jobs:
 5 |   Explore-GitHub-Actions:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
 9 |       - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
10 |       - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
11 |       - name: Check out repository code
12 |         uses: actions/checkout@v4
13 |       - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
14 |       - run: echo "🖥️ The workflow is now ready to test your code on the runner."
15 |       - name: List files in the repository
16 |         run: |
17 |           ls ${{ github.workspace }}
18 |       - run: echo "🍏 This job's status is ${{ job.status }}."
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore archive files
 2 | archive
 3 | 
 4 | # Ignore private key files
 5 | *.pem
 6 | 
 7 | # Ignore VSCode settings
 8 | .vscode
 9 | 
10 | # Ignore Python bytecode files
11 | *.pyc
12 | __pycache__/
13 | 
14 | # Ignore output and input directories
15 | outputs
16 | inputs
17 | 
18 | # Ignore specific model files
19 | model-user.json
20 | 
21 | # Ignore Jupyter Notebook checkpoints
22 | .ipynb_checkpoints
23 | 
24 | # Ignore log files
25 | *.log
26 | 
27 | # Ignore macOS system files
28 | .DS_Store
29 | 
30 | # Ignore development directories
31 | dev
32 | 
33 | # Ignore pickle and data files
34 | *.pkl
35 | *.dat
36 | 
37 | # Ignore specific directories
38 | mlruns
39 | mlartifacts
40 | ml-1m
41 | 
42 | # Ignore environment files
43 | .env
44 | 
45 | # Ignore virtual environment directories
46 | venv/
47 | .env/
48 | env/
49 | ENV/
50 | env.bak/
51 | venv.bak/
52 | 
53 | # Ignore temporary files created by editors
54 | *.swp
55 | *~


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Code Examples for MLOps: (Introduction to a Few Tools for) Machine Learning Deployment
 3 | 
 4 |  - This repo contains code examples for the MLOps (Machine Learning Deployment) [course repo](https://github.com/thejat/mlops-notebooks).
 5 |  - Please clone the repo to a suitable linux machine (ideally debian flavored) and refer to the notes to get started.
 6 | 
 7 | #### Additional Information
 8 | 
 9 |   - Audience: Enthusiastic business analysts with beginning python programming experience.
10 | 
11 | #### Setting the Data Path
12 | 
13 | Some examples will need pre-trained models and movielens-1m data files (movies.dat, ratings.dat). Set the appropriate paths or copy into the appropriate folders as necessary.
14 | 


--------------------------------------------------------------------------------
/ab_testing_example/flask_model_testing.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | SYS_PATH_PREFIX = '/home/theja/Sync/uic/teach/'
  3 | sys.path.append(SYS_PATH_PREFIX + 'mlops-code/model_example_recommendation_pytorch') # for model definitions
  4 | sys.path.append(SYS_PATH_PREFIX + 'mlops-data/ml-1m') # for metadata
  5 | sys.path.append(SYS_PATH_PREFIX + 'mlops-data/models') # for the pytorch model
  6 | # TODO: improve by making a package
  7 | 
  8 | from recommend_pytorch_train import MF
  9 | from recommend_pytorch_inf import get_top_n, get_previously_seen
 10 | import torch
 11 | import surprise
 12 | import pandas as pd
 13 | import time
 14 | import random
 15 | from uuid import uuid4
 16 | from flask import (
 17 |     Flask,
 18 |     session,
 19 |     request,
 20 |     redirect,
 21 |     url_for,
 22 |     render_template_string
 23 | )
 24 | from planout.experiment import SimpleExperiment
 25 | from planout.ops.random import *
 26 | 
 27 | 
 28 | class ModelExperiment(SimpleExperiment):
 29 |     def setup(self):
 30 |         self.set_log_file('model_abtest.log')
 31 | 
 32 |     def assign(self, params, userid):
 33 |         params.use_pytorch = BernoulliTrial(p=0.5, unit=userid)
 34 |         if params.use_pytorch:
 35 |             params.model_type = 'pytorch1'
 36 |         else:
 37 |             params.model_type = 'pytorch2'
 38 | 
 39 | 
 40 | 
 41 | start_time = time.time()
 42 | 
 43 | 
 44 | # Metadata preload
 45 | movies_df = pd.read_csv('movies.dat',
 46 |                         sep="::", header=None, engine='python',
 47 |                         encoding="iso-8859-1")
 48 | movies_df.columns = ['iid', 'name', 'genre']
 49 | movies_df.set_index('iid', inplace=True)
 50 | data = surprise.Dataset.load_builtin('ml-1m')
 51 | trainset = data.build_full_trainset()
 52 | testset = trainset.build_anti_testset()
 53 | 
 54 | 
 55 | 
 56 | # Model preload
 57 | k = 100  # latent dimension
 58 | c_bias = 1e-6
 59 | c_vector = 1e-6
 60 | model = MF(trainset.n_users, trainset.n_items,
 61 |            k=k, c_bias=c_bias, c_vector=c_vector)
 62 | model.load_state_dict(torch.load(
 63 |     'recommendation_model_pytorch.pkl'))
 64 | model.eval()
 65 | 
 66 | print('Model and data preloading completed in ', time.time()-start_time)
 67 | model1 = model  # for demo purposes, both models are the same
 68 | model2 = model
 69 | 
 70 | 
 71 | app = Flask(__name__)
 72 | app.config.update(dict(
 73 |     DEBUG=True,
 74 |     SECRET_KEY='MODEL_TESTING_BY_THEJA_TULABANDHULA',
 75 | ))
 76 | 
 77 | 
 78 | @app.route('/', methods=["GET"])
 79 | def main():
 80 |     # if no userid is defined make one up
 81 |     if 'userid' not in session:
 82 |         session['userid'] = str(random.choice(trainset.all_users()))
 83 | 
 84 |     model_perf_exp = ModelExperiment(userid=session['userid'])
 85 |     model_type = model_perf_exp.get('model_type')
 86 |     resp = {}
 87 |     resp["success"] = False
 88 | 
 89 |     print(model_type, resp, session['userid'])
 90 | 
 91 |     try:
 92 |         if model_type == 'pytorch1':
 93 |             user_ratings = get_top_n(
 94 |                 model1, testset, trainset, session['userid'], movies_df, n=10)
 95 |         elif model_type == 'pytorch2':
 96 |             user_ratings = get_top_n(
 97 |                 model2, testset, trainset, session['userid'], movies_df, n=10)
 98 | 
 99 |         print(user_ratings)
100 |         resp["response"] = [x[1] for x in user_ratings]
101 |         resp["success"] = True
102 | 
103 |         print(model_type, resp, session['userid'])
104 | 
105 |         return render_template_string("""
106 |                 <html>
107 |                     <head>
108 |                         <title>Recommendation Service</title>
109 |                     </head>
110 |                     <body>
111 |                         <h3>
112 |                             Recommendations for userid {{ userid }} based on {{ model_type }} are shown below: <br>
113 |                         </h3>
114 | 
115 |                         <p>
116 | 
117 |                         {% for movie_item in resp['response'] %}
118 |                               <h5> {{movie_item}}</h5>
119 |                         {% endfor %}
120 | 
121 |                         </p>
122 | 
123 |                         <p>
124 |                             What will be your rating of this list (rate between 1-10 where 10 is the highest quality)?
125 |                         </p>
126 |                         <form action="/rate" method="GET">
127 |                             <input type="text" length="10" name="rate"></input>
128 |                             <input type="submit"></input>
129 |                         </form>
130 |                     <br>
131 |                     <p><a href="/">Reload without resetting my user ID. I'll get the same recommendations when I come back.</a></p>
132 |                     <p><a href="/reset">Reset my user ID so I am a different user and will get re-randomized into a new treatment.</a></p>
133 |                     </body>
134 |                 </html>
135 |             """, userid=session['userid'], model_type=model_type, resp=resp)
136 |     except:
137 |         return render_template_string("""
138 |             <html>
139 |                 <head>
140 |                     <title>Recommendation Service</title>
141 |                 </head>
142 |                 <body>
143 |                     <h3>
144 |                         Recommendations for userid {{ userid }} based on {{ model_type }} are shown below. <br>
145 |                     </h3>
146 |                     <p>
147 |                     {{resp}}
148 |                     </p>
149 | 
150 |                     <p>
151 |                         What will be your rating of this list (rate between 1-10 where 10 is the highest quality)?
152 |                     </p>
153 |                     <form action="/rate" method="GET">
154 |                         <input type="text" length="10" name="rate"></input>
155 |                         <input type="submit"></input>
156 |                     </form>
157 |                 <br>
158 |                 <p><a href="/">Reload without resetting my user ID. I'll get the same recommendations when I come back.</a></p>
159 |                 <p><a href="/reset">Reset my user ID so I am a different user and will get re-randomized into a new treatment.</a></p>
160 |                 </body>
161 |             </html>
162 |             """, userid=session['userid'], model_type=model_type, resp=resp)
163 | 
164 | 
165 | @app.route('/reset')
166 | def reset():
167 |     session.clear()
168 |     return redirect(url_for('main'))
169 | 
170 | 
171 | @app.route('/rate')
172 | def rate():
173 |     rate_string = request.args.get('rate')
174 |     try:
175 |         rate_val = int(rate_string)
176 |         assert rate_val > 0 and rate_val < 11
177 | 
178 |         model_perf_exp = ModelExperiment(userid=session['userid'])
179 |         model_perf_exp.log_event('rate', {'rate_val': rate_val})
180 | 
181 |         return render_template_string("""
182 |                     <html>
183 |                         <head>
184 |                             <title>Thank you for the feedback!</title>
185 |                         </head>
186 |                         <body>
187 |                             <p>You rating is {{ rate_val }}. Hit the back button or click below to go back to recommendations!</p>
188 |                             <p><a href="/">Back</a></p>
189 |                         </body>
190 |                     </html>
191 |                     """, rate_val=rate_val)
192 |     except:
193 |         return render_template_string("""
194 |                     <html>
195 |                         <head>
196 |                             <title>Bad rating!</title>
197 |                         </head>
198 |                         <body>
199 |                             <p>You rating could not be parsed. That's probably not a number between 1 and 10, so we won't be accepting your rating.</p>
200 |                             <p><a href="/">Back</a></p>
201 |                         </body>
202 |                     </html>
203 |                     """)
204 | 
205 | 
206 | # start the flask app, allow remote connections
207 | app.run(host='0.0.0.0')
208 | 


--------------------------------------------------------------------------------
/ab_testing_example/flask_recommendation_service.py:
--------------------------------------------------------------------------------
 1 | import flask
 2 | from recommend_pytorch_train import MF
 3 | from recommend_pytorch_inf import get_top_n, get_previously_seen
 4 | import torch
 5 | import pandas as pd
 6 | import surprise
 7 | import time
 8 | 
 9 | 
10 | app = flask.Flask(__name__)
11 | 
12 | start_time = time.time()
13 | 
14 | # data preload
15 | data = surprise.Dataset.load_builtin('ml-1m')
16 | trainset = data.build_full_trainset()
17 | testset = trainset.build_anti_testset()
18 | movies_df = pd.read_csv('./movies.dat',
19 |                         sep="::", header=None, engine='python')
20 | movies_df.columns = ['iid', 'name', 'genre']
21 | movies_df.set_index('iid', inplace=True)
22 | 
23 | # model preload
24 | k = 100  # latent dimension
25 | c_bias = 1e-6
26 | c_vector = 1e-6
27 | model = MF(trainset.n_users, trainset.n_items,
28 |            k=k, c_bias=c_bias, c_vector=c_vector)
29 | model.load_state_dict(torch.load(
30 |     './recommendation_model_pytorch.pkl'))  # TODO: prevent overwriting
31 | model.eval()
32 | 
33 | print('Model and data preloading completed in ', time.time()-start_time)
34 | 
35 | 
36 | @app.route("/", methods=["GET"])
37 | def recommend():
38 | 
39 |     data = {"success": False}
40 | 
41 |     if "uid" in flask.request.args:
42 | 
43 |         data['uid'] = str(flask.request.args['uid'])
44 | 
45 |         try:
46 |             data['seen'] = get_previously_seen(
47 |                 trainset, data['uid'], movies_df)
48 |             recommended = get_top_n(
49 |                 model, testset, trainset, data['uid'], movies_df, n=10)
50 |             print(recommended)
51 |             data['recommended'] = [x[1] for x in recommended]
52 |             data["success"] = True
53 |         except:
54 |             pass
55 | 
56 |     return flask.jsonify(data)
57 | 
58 | 
59 | # start the flask app, allow remote connections
60 | if __name__ == '__main__':
61 |     app.run(host='0.0.0.0')
62 | 


--------------------------------------------------------------------------------
/ab_testing_example/recommend_pytorch_inf.py:
--------------------------------------------------------------------------------
 1 | from recommend_pytorch_train import MF
 2 | from surprise import Dataset
 3 | import numpy as np
 4 | import torch
 5 | import torch.nn as nn
 6 | import pandas as pd
 7 | import pprint
 8 | 
 9 | 
10 | def get_top_n(model, testset, trainset, uid_input, movies_df, n=10):
11 | 
12 |     preds = []
13 |     try:
14 |         uid_input = int(trainset.to_inner_uid(uid_input))
15 |     except KeyError:
16 |         return preds
17 | 
18 |     # First map the predictions to each user.
19 |     for uid, iid, _ in testset:  # inefficient
20 |         try:
21 |             uid_internal = int(trainset.to_inner_uid(uid))
22 |         except KeyError:
23 |             continue
24 |         if uid_internal == uid_input:
25 |             try:
26 |                 iid_internal = int(trainset.to_inner_iid(iid))
27 |                 movie_name = movies_df.loc[int(iid), 'name']
28 |                 preds.append((iid, movie_name, float(
29 |                     model(torch.tensor([[uid_input, iid_internal]])))))
30 |             except KeyError:
31 |                 pass
32 |     # Then sort the predictions for each user and retrieve the k highest ones
33 |     if preds is not None:
34 |         preds.sort(key=lambda x: x[1], reverse=True)
35 |         if len(preds) > n:
36 |             preds = preds[:n]
37 |     return preds
38 | 
39 | 
40 | def get_previously_seen(trainset, uid, movies_df):
41 |     seen = []
42 |     for (iid, _) in trainset.ur[int(uid)]:
43 |         try:
44 |             seen.append(movies_df.loc[int(iid), 'name'])
45 |         except KeyError:
46 |             pass
47 |         if len(seen) > 10:
48 |             break
49 |     return seen
50 | 
51 | 
52 | def main():
53 |     # Data
54 |     movies_df = pd.read_csv('../data/ml-1m/movies.dat', sep="::",
55 |                             header=None, engine='python')
56 |     movies_df.columns = ['iid', 'name', 'genre']
57 |     movies_df.set_index('iid', inplace=True)
58 |     data = Dataset.load_builtin('ml-1m')
59 |     trainset = data.build_full_trainset()
60 |     testset = trainset.build_anti_testset()
61 | 
62 |     k = 100  # latent dimension
63 |     c_bias = 1e-6
64 |     c_vector = 1e-6
65 | 
66 |     model = MF(trainset.n_users, trainset.n_items,
67 |                k=k, c_bias=c_bias, c_vector=c_vector)
68 |     model.load_state_dict(torch.load('../data/models/recommendation_model_pytorch.pkl'))
69 |     model.eval()
70 | 
71 |     # Print the recommended items for sample users
72 |     sample_users = list(set([x[0] for x in testset]))[:4]
73 | 
74 |     for uid in sample_users:
75 | 
76 |         print('User:', uid)
77 |         print('\n')
78 | 
79 |         print('\tSeen:')
80 |         seen = get_previously_seen(trainset, uid, movies_df)
81 |         pprint.pprint(seen)
82 |         print('\n')
83 | 
84 |         print('\tRecommendations:')
85 |         recommended = get_top_n(model, testset, trainset, uid, movies_df, n=10)
86 |         pprint.pprint([x[1] for x in recommended])
87 |         print('\n')
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/ab_testing_example/recommend_pytorch_train.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/NicolasHug/Surprise
  2 | # can be replaced by explicitly importing the movielens data
  3 | from surprise import Dataset
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from sklearn.utils import shuffle
  9 | 
 10 | class Loader():
 11 |     current = 0
 12 | 
 13 |     def __init__(self, x, y, batchsize=1024, do_shuffle=True):
 14 |         self.shuffle = shuffle
 15 |         self.x = x
 16 |         self.y = y
 17 |         self.batchsize = batchsize
 18 |         self.batches = range(0, len(self.y), batchsize)
 19 |         if do_shuffle:
 20 |             # Every epoch re-shuffle the dataset
 21 |             self.x, self.y = shuffle(self.x, self.y)
 22 | 
 23 |     def __iter__(self):
 24 |         # Reset & return a new iterator
 25 |         self.x, self.y = shuffle(self.x, self.y, random_state=0)
 26 |         self.current = 0
 27 |         return self
 28 | 
 29 |     def __len__(self):
 30 |         # Return the number of batches
 31 |         return int(len(self.x) / self.batchsize)
 32 | 
 33 |     def __next__(self):
 34 |         n = self.batchsize
 35 |         if self.current + n >= len(self.y):
 36 |             raise StopIteration
 37 |         i = self.current
 38 |         xs = torch.from_numpy(self.x[i:i + n])
 39 |         ys = torch.from_numpy(self.y[i:i + n])
 40 |         self.current += n
 41 |         return (xs, ys)
 42 | 
 43 | 
 44 | class MF(nn.Module):
 45 | 
 46 |     def __init__(self, n_user, n_item, k=18, c_vector=1.0, c_bias=1.0):
 47 |         super(MF, self).__init__()
 48 |         self.k = k
 49 |         self.n_user = n_user
 50 |         self.n_item = n_item
 51 |         self.c_bias = c_bias
 52 |         self.c_vector = c_vector
 53 | 
 54 |         self.user = nn.Embedding(n_user, k)
 55 |         self.item = nn.Embedding(n_item, k)
 56 | 
 57 |         # We've added new terms here:
 58 |         self.bias_user = nn.Embedding(n_user, 1)
 59 |         self.bias_item = nn.Embedding(n_item, 1)
 60 |         self.bias = nn.Parameter(torch.ones(1))
 61 | 
 62 |     def forward(self, train_x):
 63 |         user_id = train_x[:, 0]
 64 |         item_id = train_x[:, 1]
 65 |         vector_user = self.user(user_id)
 66 |         vector_item = self.item(item_id)
 67 | 
 68 |         # Pull out biases
 69 |         bias_user = self.bias_user(user_id).squeeze()
 70 |         bias_item = self.bias_item(item_id).squeeze()
 71 |         biases = (self.bias + bias_user + bias_item)
 72 | 
 73 |         ui_interaction = torch.sum(vector_user * vector_item, dim=1)
 74 | 
 75 |         # Add bias prediction to the interaction prediction
 76 |         prediction = ui_interaction + biases
 77 |         return prediction
 78 | 
 79 |     def loss(self, prediction, target):
 80 | 
 81 |         def l2_regularize(array):
 82 |             loss = torch.sum(array**2)
 83 |             return loss
 84 | 
 85 |         loss_mse = F.mse_loss(prediction, target.squeeze())
 86 | 
 87 |         # Add new regularization to the biases
 88 |         prior_bias_user = l2_regularize(self.bias_user.weight) * self.c_bias
 89 |         prior_bias_item = l2_regularize(self.bias_item.weight) * self.c_bias
 90 | 
 91 |         prior_user = l2_regularize(self.user.weight) * self.c_vector
 92 |         prior_item = l2_regularize(self.item.weight) * self.c_vector
 93 |         total = loss_mse + prior_user + prior_item + prior_bias_user + prior_bias_item
 94 |         return total
 95 | 
 96 | 
 97 | def main():
 98 |     # Data
 99 |     data = Dataset.load_builtin('ml-1m')
100 |     trainset = data.build_full_trainset()
101 |     uir = np.array([x for x in trainset.all_ratings()])
102 |     train_x = test_x = uir[:, :2].astype(np.int64)  # for simplicity
103 |     train_y = test_y = uir[:, 2].astype(np.float32)
104 | 
105 |     # Parameters
106 |     lr = 5e-3
107 |     k = 100  # latent dimension
108 |     c_bias = 1e-6
109 |     c_vector = 1e-6
110 |     batchsize = 1024
111 |     num_epochs = 40
112 | 
113 |     model = MF(trainset.n_users, trainset.n_items,
114 |                k=k, c_bias=c_bias, c_vector=c_vector)
115 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
116 | 
117 | 
118 |     for epoch in range(num_epochs):
119 |         dataloader = Loader(train_x, train_y, batchsize=batchsize)
120 |         itr = 0
121 |         for batch in dataloader:
122 |             itr += 1
123 |             prediction = model(batch[0])
124 |             loss = model.loss(prediction, batch[1])
125 |             optimizer.zero_grad()
126 |             loss.backward()
127 |             optimizer.step()
128 |             if itr % 100 == 0:
129 |                 print(f"epoch: {epoch}. iteration: {itr}. training loss: {loss}")
130 | 
131 |     torch.save(model.state_dict(),
132 |                "../data/models/recommendation_model_pytorch.pkl")
133 | 
134 | 
135 | if __name__ == '__main__':
136 |     main()
137 | 


--------------------------------------------------------------------------------
/ab_testing_example/two_sample_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import json"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "with open('./model_abtest.log','r') as f:\n",
 20 |     "    raw = f.read().split('\\n')[:-1]"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "data_list = []\n",
 30 |     "for event in raw:\n",
 31 |     "    obs = json.loads(event)\n",
 32 |     "    obs['extra_data'] = obs.get('extra_data',None)\n",
 33 |     "    data_list.append(obs)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 4,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "data": {
 43 |       "text/html": [
 44 |        "<div>\n",
 45 |        "<style scoped>\n",
 46 |        "    .dataframe tbody tr th:only-of-type {\n",
 47 |        "        vertical-align: middle;\n",
 48 |        "    }\n",
 49 |        "\n",
 50 |        "    .dataframe tbody tr th {\n",
 51 |        "        vertical-align: top;\n",
 52 |        "    }\n",
 53 |        "\n",
 54 |        "    .dataframe thead th {\n",
 55 |        "        text-align: right;\n",
 56 |        "    }\n",
 57 |        "</style>\n",
 58 |        "<table border=\"1\" class=\"dataframe\">\n",
 59 |        "  <thead>\n",
 60 |        "    <tr style=\"text-align: right;\">\n",
 61 |        "      <th></th>\n",
 62 |        "      <th>name</th>\n",
 63 |        "      <th>time</th>\n",
 64 |        "      <th>salt</th>\n",
 65 |        "      <th>inputs</th>\n",
 66 |        "      <th>params</th>\n",
 67 |        "      <th>event</th>\n",
 68 |        "      <th>checksum</th>\n",
 69 |        "      <th>extra_data</th>\n",
 70 |        "    </tr>\n",
 71 |        "  </thead>\n",
 72 |        "  <tbody>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>0</th>\n",
 75 |        "      <td>ModelExperiment</td>\n",
 76 |        "      <td>1602739669</td>\n",
 77 |        "      <td>ModelExperiment</td>\n",
 78 |        "      <td>{'userid': '431'}</td>\n",
 79 |        "      <td>{'use_pytorch': 1, 'model_type': 'pytorch'}</td>\n",
 80 |        "      <td>exposure</td>\n",
 81 |        "      <td>796b9a12</td>\n",
 82 |        "      <td>None</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>1</th>\n",
 86 |        "      <td>ModelExperiment</td>\n",
 87 |        "      <td>1602739720</td>\n",
 88 |        "      <td>ModelExperiment</td>\n",
 89 |        "      <td>{'userid': '431'}</td>\n",
 90 |        "      <td>{'use_pytorch': 1, 'model_type': 'pytorch'}</td>\n",
 91 |        "      <td>exposure</td>\n",
 92 |        "      <td>796b9a12</td>\n",
 93 |        "      <td>None</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>2</th>\n",
 97 |        "      <td>ModelExperiment</td>\n",
 98 |        "      <td>1602739722</td>\n",
 99 |        "      <td>ModelExperiment</td>\n",
100 |        "      <td>{'userid': '431'}</td>\n",
101 |        "      <td>{'use_pytorch': 1, 'model_type': 'pytorch'}</td>\n",
102 |        "      <td>exposure</td>\n",
103 |        "      <td>796b9a12</td>\n",
104 |        "      <td>None</td>\n",
105 |        "    </tr>\n",
106 |        "    <tr>\n",
107 |        "      <th>3</th>\n",
108 |        "      <td>ModelExperiment</td>\n",
109 |        "      <td>1602739722</td>\n",
110 |        "      <td>ModelExperiment</td>\n",
111 |        "      <td>{'userid': '431'}</td>\n",
112 |        "      <td>{'use_pytorch': 1, 'model_type': 'pytorch'}</td>\n",
113 |        "      <td>exposure</td>\n",
114 |        "      <td>796b9a12</td>\n",
115 |        "      <td>None</td>\n",
116 |        "    </tr>\n",
117 |        "    <tr>\n",
118 |        "      <th>4</th>\n",
119 |        "      <td>ModelExperiment</td>\n",
120 |        "      <td>1602739724</td>\n",
121 |        "      <td>ModelExperiment</td>\n",
122 |        "      <td>{'userid': '637'}</td>\n",
123 |        "      <td>{'use_pytorch': 0, 'model_type': 'surprise'}</td>\n",
124 |        "      <td>exposure</td>\n",
125 |        "      <td>796b9a12</td>\n",
126 |        "      <td>None</td>\n",
127 |        "    </tr>\n",
128 |        "  </tbody>\n",
129 |        "</table>\n",
130 |        "</div>"
131 |       ],
132 |       "text/plain": [
133 |        "              name        time             salt             inputs  \\\n",
134 |        "0  ModelExperiment  1602739669  ModelExperiment  {'userid': '431'}   \n",
135 |        "1  ModelExperiment  1602739720  ModelExperiment  {'userid': '431'}   \n",
136 |        "2  ModelExperiment  1602739722  ModelExperiment  {'userid': '431'}   \n",
137 |        "3  ModelExperiment  1602739722  ModelExperiment  {'userid': '431'}   \n",
138 |        "4  ModelExperiment  1602739724  ModelExperiment  {'userid': '637'}   \n",
139 |        "\n",
140 |        "                                         params     event  checksum extra_data  \n",
141 |        "0   {'use_pytorch': 1, 'model_type': 'pytorch'}  exposure  796b9a12       None  \n",
142 |        "1   {'use_pytorch': 1, 'model_type': 'pytorch'}  exposure  796b9a12       None  \n",
143 |        "2   {'use_pytorch': 1, 'model_type': 'pytorch'}  exposure  796b9a12       None  \n",
144 |        "3   {'use_pytorch': 1, 'model_type': 'pytorch'}  exposure  796b9a12       None  \n",
145 |        "4  {'use_pytorch': 0, 'model_type': 'surprise'}  exposure  796b9a12       None  "
146 |       ]
147 |      },
148 |      "execution_count": 4,
149 |      "metadata": {},
150 |      "output_type": "execute_result"
151 |     }
152 |    ],
153 |    "source": [
154 |     "df = pd.DataFrame.from_dict(data_list) \n",
155 |     "df.head()"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 5,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "exp_df = df.loc[df.event=='rate',['params','extra_data']]\n",
165 |     "exp_df['variant'] = exp_df['params'].apply(lambda x: x['use_pytorch'])\n",
166 |     "exp_df['rating'] = exp_df['extra_data'].apply(lambda x: x['rate_val'])"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 6,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "data": {
176 |       "text/html": [
177 |        "<div>\n",
178 |        "<style scoped>\n",
179 |        "    .dataframe tbody tr th:only-of-type {\n",
180 |        "        vertical-align: middle;\n",
181 |        "    }\n",
182 |        "\n",
183 |        "    .dataframe tbody tr th {\n",
184 |        "        vertical-align: top;\n",
185 |        "    }\n",
186 |        "\n",
187 |        "    .dataframe thead th {\n",
188 |        "        text-align: right;\n",
189 |        "    }\n",
190 |        "</style>\n",
191 |        "<table border=\"1\" class=\"dataframe\">\n",
192 |        "  <thead>\n",
193 |        "    <tr style=\"text-align: right;\">\n",
194 |        "      <th></th>\n",
195 |        "      <th>params</th>\n",
196 |        "      <th>extra_data</th>\n",
197 |        "      <th>variant</th>\n",
198 |        "      <th>rating</th>\n",
199 |        "    </tr>\n",
200 |        "  </thead>\n",
201 |        "  <tbody>\n",
202 |        "    <tr>\n",
203 |        "      <th>32</th>\n",
204 |        "      <td>{'use_pytorch': 0, 'model_type': 'surprise'}</td>\n",
205 |        "      <td>{'rate_val': 3}</td>\n",
206 |        "      <td>0</td>\n",
207 |        "      <td>3</td>\n",
208 |        "    </tr>\n",
209 |        "    <tr>\n",
210 |        "      <th>36</th>\n",
211 |        "      <td>{'use_pytorch': 1, 'model_type': 'pytorch'}</td>\n",
212 |        "      <td>{'rate_val': 7}</td>\n",
213 |        "      <td>1</td>\n",
214 |        "      <td>7</td>\n",
215 |        "    </tr>\n",
216 |        "    <tr>\n",
217 |        "      <th>41</th>\n",
218 |        "      <td>{'use_pytorch': 1, 'model_type': 'pytorch'}</td>\n",
219 |        "      <td>{'rate_val': 0}</td>\n",
220 |        "      <td>1</td>\n",
221 |        "      <td>0</td>\n",
222 |        "    </tr>\n",
223 |        "    <tr>\n",
224 |        "      <th>44</th>\n",
225 |        "      <td>{'use_pytorch': 0, 'model_type': 'surprise'}</td>\n",
226 |        "      <td>{'rate_val': 7}</td>\n",
227 |        "      <td>0</td>\n",
228 |        "      <td>7</td>\n",
229 |        "    </tr>\n",
230 |        "    <tr>\n",
231 |        "      <th>48</th>\n",
232 |        "      <td>{'use_pytorch': 0, 'model_type': 'surprise'}</td>\n",
233 |        "      <td>{'rate_val': 3}</td>\n",
234 |        "      <td>0</td>\n",
235 |        "      <td>3</td>\n",
236 |        "    </tr>\n",
237 |        "  </tbody>\n",
238 |        "</table>\n",
239 |        "</div>"
240 |       ],
241 |       "text/plain": [
242 |        "                                          params       extra_data  variant  \\\n",
243 |        "32  {'use_pytorch': 0, 'model_type': 'surprise'}  {'rate_val': 3}        0   \n",
244 |        "36   {'use_pytorch': 1, 'model_type': 'pytorch'}  {'rate_val': 7}        1   \n",
245 |        "41   {'use_pytorch': 1, 'model_type': 'pytorch'}  {'rate_val': 0}        1   \n",
246 |        "44  {'use_pytorch': 0, 'model_type': 'surprise'}  {'rate_val': 7}        0   \n",
247 |        "48  {'use_pytorch': 0, 'model_type': 'surprise'}  {'rate_val': 3}        0   \n",
248 |        "\n",
249 |        "    rating  \n",
250 |        "32       3  \n",
251 |        "36       7  \n",
252 |        "41       0  \n",
253 |        "44       7  \n",
254 |        "48       3  "
255 |       ]
256 |      },
257 |      "execution_count": 6,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "exp_df.head()"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 7,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "d1 = exp_df.loc[exp_df.variant==0,'rating'].values\n",
273 |     "d2 = exp_df.loc[exp_df.variant==1,'rating'].values"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 8,
279 |    "metadata": {},
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "0.9650813700331365 0.37882815062317066\n"
286 |      ]
287 |     }
288 |    ],
289 |    "source": [
290 |     "import scipy.stats\n",
291 |     "statistic, pval = scipy.stats.ttest_ind(d1,d2)\n",
292 |     "print(statistic,pval)"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 9,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "import random\n",
302 |     "d1 = random.choices(range(1,6),k=50)\n",
303 |     "d2 = random.choices(range(3,10),k=50)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 10,
309 |    "metadata": {},
310 |    "outputs": [
311 |     {
312 |      "data": {
313 |       "text/plain": [
314 |        "Ttest_indResult(statistic=-10.876400571212649, pvalue=1.5325813144977069e-18)"
315 |       ]
316 |      },
317 |      "execution_count": 10,
318 |      "metadata": {},
319 |      "output_type": "execute_result"
320 |     }
321 |    ],
322 |    "source": [
323 |     "scipy.stats.ttest_ind(d1,d2)"
324 |    ]
325 |   }
326 |  ],
327 |  "metadata": {
328 |   "kernelspec": {
329 |    "display_name": "Python 3",
330 |    "language": "python",
331 |    "name": "python3"
332 |   },
333 |   "language_info": {
334 |    "codemirror_mode": {
335 |     "name": "ipython",
336 |     "version": 3
337 |    },
338 |    "file_extension": ".py",
339 |    "mimetype": "text/x-python",
340 |    "name": "python",
341 |    "nbconvert_exporter": "python",
342 |    "pygments_lexer": "ipython3",
343 |    "version": "3.8.2"
344 |   }
345 |  },
346 |  "nbformat": 4,
347 |  "nbformat_minor": 4
348 | }
349 | 


--------------------------------------------------------------------------------
/ab_testing_example_posthog/README.md:
--------------------------------------------------------------------------------
 1 | # My Flask App
 2 | 
 3 | This is a simple Flask application.
 4 | 
 5 | ## Installation
 6 | 
 7 | 1. Clone this repository.
 8 | 2. Install the dependencies by running `pip install -r requirements.txt`.
 9 | 
10 | ## Running the Application
11 | 
12 | To run the application, use the command `python app.py`.
13 | 
14 | The application will start a web server on your local machine. You can access it by going to `http://127.0.0.1:5000` in your web browser.
15 | 
16 | ## Structure
17 | 
18 | - `app.py`: This is the main Python file that runs the Flask application.
19 | - `templates/index.html`: This is the main HTML file for the web application.
20 | - `static/css/main.css`: This is the CSS file for styling the HTML file.
21 | - `static/js/main.js`: This is the JavaScript file for adding interactivity to the HTML file.
22 | - `requirements.txt`: This file lists all the Python dependencies for the project.
23 | 
24 | ## Dependencies
25 | 
26 | - Flask: A lightweight WSGI web application framework.
27 | 
28 | Please ensure you have the above dependencies installed, or install them using the `requirements.txt` file.


--------------------------------------------------------------------------------
/ab_testing_example_posthog/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template, make_response, request
 2 | from posthog import Posthog
 3 | import uuid
 4 | from dotenv import load_dotenv
 5 | import os
 6 | 
 7 | load_dotenv()
 8 | 
 9 | posthog = Posthog(
10 |   os.getenv('POSTHOG_API_KEY'), 
11 |   host='https://us.i.posthog.com'
12 | )
13 | 
14 | 
15 | app = Flask(__name__)
16 | 
17 | @app.route('/')
18 | def home():
19 |     return render_template('index.html')
20 | 
21 | @app.route("/blog/<string:slug>", methods=["GET", "POST"])
22 | def blog(slug):
23 | 
24 |   response = make_response()
25 | 
26 |   if 'user_id' not in request.cookies:
27 |     user_id = str(uuid.uuid4())
28 |     response.set_cookie('user_id', user_id)
29 |   else:
30 |     user_id = request.cookies.get('user_id')
31 | 
32 |   flag_key = "mlops-exp"
33 |   flag = posthog.get_feature_flag(flag_key, user_id)
34 | 
35 |   if request.method == "GET":
36 |     if (flag == 'test'):
37 |       response.data = f"""
38 |         <p>Welcome to the very cool blog: {slug}</p>
39 |         <form method="post" action="/blog/{slug}">
40 |             <input type="submit" value="Like this cool blog" name="like"/>
41 |         </form>
42 |       """
43 |       return response
44 | 
45 |     response.data = f"""
46 |       <p>Welcome to the blog post: {slug}</p>
47 |       <form method="post" action="/blog/{slug}">
48 |           <input type="submit" value="Like" name="like"/>
49 |       </form>
50 |     """
51 |     return response
52 | 
53 | 
54 |   elif request.method == "POST":
55 |     posthog.capture(
56 |       user_id, 
57 |       "liked_post", 
58 |       {
59 |         'slug': slug,
60 |         f'$feature/{flag_key}': flag
61 |         
62 |       }
63 |     )
64 |     return f"<p>Thanks for liking {slug}</p>"
65 | 
66 | if __name__ == '__main__':
67 |     app.run(debug=True)


--------------------------------------------------------------------------------
/ab_testing_example_posthog/env.example:
--------------------------------------------------------------------------------
1 | POSTHOG_API_KEY="TBD"


--------------------------------------------------------------------------------
/ab_testing_example_posthog/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==1.1.2
2 | posthog
3 | python-dotenv


--------------------------------------------------------------------------------
/ab_testing_example_posthog/static/css/main.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     font-family: Arial, sans-serif;
 3 |     margin: 0;
 4 |     padding: 0;
 5 |     background-color: #f0f0f0;
 6 | }
 7 | 
 8 | .container {
 9 |     width: 80%;
10 |     margin: 0 auto;
11 | }


--------------------------------------------------------------------------------
/ab_testing_example_posthog/static/js/main.js:
--------------------------------------------------------------------------------
1 | // This file is intentionally left blank


--------------------------------------------------------------------------------
/ab_testing_example_posthog/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>My Flask App</title>
 5 |     <link rel="stylesheet" href="{{ url_for('static', filename='css/main.css') }}">
 6 | </head>
 7 | <body>
 8 |     <h1>Welcome to My Flask App!</h1>
 9 |     <script src="{{ url_for('static', filename='js/main.js') }}"></script>
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/airflow_astro_example/.astro/config.yaml:
--------------------------------------------------------------------------------
1 | project:
2 |   name: airflow-astro-example
3 | 


--------------------------------------------------------------------------------
/airflow_astro_example/.astro/test_dag_integrity_default.py:
--------------------------------------------------------------------------------
  1 | """Test the validity of all DAGs. **USED BY DEV PARSE COMMAND DO NOT EDIT**"""
  2 | from contextlib import contextmanager
  3 | import logging
  4 | import os
  5 | 
  6 | import pytest
  7 | 
  8 | from airflow.models import DagBag, Variable, Connection
  9 | from airflow.hooks.base import BaseHook
 10 | from airflow.utils.db import initdb
 11 | 
 12 | # init airflow database
 13 | initdb()
 14 | 
 15 | # The following code patches errors caused by missing OS Variables, Airflow Connections, and Airflow Variables
 16 | 
 17 | 
 18 | # =========== MONKEYPATCH BaseHook.get_connection() ===========
 19 | def basehook_get_connection_monkeypatch(key: str, *args, **kwargs):
 20 |     print(
 21 |         f"Attempted to fetch connection during parse returning an empty Connection object for {key}"
 22 |     )
 23 |     return Connection(key)
 24 | 
 25 | 
 26 | BaseHook.get_connection = basehook_get_connection_monkeypatch
 27 | # # =========== /MONKEYPATCH BASEHOOK.GET_CONNECTION() ===========
 28 | 
 29 | 
 30 | # =========== MONKEYPATCH OS.GETENV() ===========
 31 | def os_getenv_monkeypatch(key: str, *args, **kwargs):
 32 |     default = None
 33 |     if args:
 34 |         default = args[0]  # os.getenv should get at most 1 arg after the key
 35 |     if kwargs:
 36 |         default = kwargs.get(
 37 |             "default", None
 38 |         )  # and sometimes kwarg if people are using the sig
 39 | 
 40 |     env_value = os.environ.get(key, None)
 41 | 
 42 |     if env_value:
 43 |         return env_value  # if the env_value is set, return it
 44 |     if (
 45 |         key == "JENKINS_HOME" and default is None
 46 |     ):  # fix https://github.com/astronomer/astro-cli/issues/601
 47 |         return None
 48 |     if default:
 49 |         return default  # otherwise return whatever default has been passed
 50 |     return f"MOCKED_{key.upper()}_VALUE"  # if absolutely nothing has been passed - return the mocked value
 51 | 
 52 | 
 53 | os.getenv = os_getenv_monkeypatch
 54 | # # =========== /MONKEYPATCH OS.GETENV() ===========
 55 | 
 56 | # =========== MONKEYPATCH VARIABLE.GET() ===========
 57 | 
 58 | 
 59 | class magic_dict(dict):
 60 |     def __init__(self, *args, **kwargs):
 61 |         self.update(*args, **kwargs)
 62 | 
 63 |     def __getitem__(self, key):
 64 |         return {}.get(key, "MOCKED_KEY_VALUE")
 65 | 
 66 | 
 67 | _no_default = object()  # allow falsey defaults
 68 | 
 69 | 
 70 | def variable_get_monkeypatch(key: str, default_var=_no_default, deserialize_json=False):
 71 |     print(
 72 |         f"Attempted to get Variable value during parse, returning a mocked value for {key}"
 73 |     )
 74 | 
 75 |     if default_var is not _no_default:
 76 |         return default_var
 77 |     if deserialize_json:
 78 |         return magic_dict()
 79 |     return "NON_DEFAULT_MOCKED_VARIABLE_VALUE"
 80 | 
 81 | 
 82 | Variable.get = variable_get_monkeypatch
 83 | # # =========== /MONKEYPATCH VARIABLE.GET() ===========
 84 | 
 85 | 
 86 | @contextmanager
 87 | def suppress_logging(namespace):
 88 |     """
 89 |     Suppress logging within a specific namespace to keep tests "clean" during build
 90 |     """
 91 |     logger = logging.getLogger(namespace)
 92 |     old_value = logger.disabled
 93 |     logger.disabled = True
 94 |     try:
 95 |         yield
 96 |     finally:
 97 |         logger.disabled = old_value
 98 | 
 99 | 
100 | def get_import_errors():
101 |     """
102 |     Generate a tuple for import errors in the dag bag, and include DAGs without errors.
103 |     """
104 |     with suppress_logging("airflow"):
105 |         dag_bag = DagBag(include_examples=False)
106 | 
107 |         def strip_path_prefix(path):
108 |             return os.path.relpath(path, os.environ.get("AIRFLOW_HOME"))
109 | 
110 |         # Initialize an empty list to store the tuples
111 |         result = []
112 | 
113 |         # Iterate over the items in import_errors
114 |         for k, v in dag_bag.import_errors.items():
115 |             result.append((strip_path_prefix(k), v.strip()))
116 | 
117 |         # Check if there are DAGs without errors
118 |         for file_path in dag_bag.dags:
119 |             # Check if the file_path is not in import_errors, meaning no errors
120 |             if file_path not in dag_bag.import_errors:
121 |                 result.append((strip_path_prefix(file_path), "No import errors"))
122 | 
123 |         return result
124 | 
125 | 
126 | @pytest.mark.parametrize(
127 |     "rel_path, rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]
128 | )
129 | def test_file_imports(rel_path, rv):
130 |     """Test for import errors on a file"""
131 |     if rv != "No import errors":
132 |         # If rv is not "No import errors," consider it a failed test
133 |         raise Exception(f"{rel_path} failed to import with message \n {rv}")
134 |     else:
135 |         # If rv is "No import errors," consider it a passed test
136 |         print(f"{rel_path} passed the import test")
137 | 


--------------------------------------------------------------------------------
/airflow_astro_example/.dockerignore:
--------------------------------------------------------------------------------
1 | astro
2 | .git
3 | .env
4 | airflow_settings.yaml
5 | logs/
6 | .venv
7 | airflow.db
8 | airflow.cfg
9 | 


--------------------------------------------------------------------------------
/airflow_astro_example/.gitignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | .env
 3 | .DS_Store # macOS specific ignore
 4 | airflow_settings.yaml
 5 | __pycache__/
 6 | astro
 7 | .venv
 8 | airflow-webserver.pid
 9 | webserver_config.py
10 | airflow.cfg
11 | airflow.db
12 | 


--------------------------------------------------------------------------------
/airflow_astro_example/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM quay.io/astronomer/astro-runtime:10.3.0
2 | 


--------------------------------------------------------------------------------
/airflow_astro_example/README.md:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ========
 3 | 
 4 | Welcome to Astronomer! This project was generated after you ran 'astro dev init' using the Astronomer CLI. This readme describes the contents of the project, as well as how to run Apache Airflow on your local machine.
 5 | 
 6 | Project Contents
 7 | ================
 8 | 
 9 | Your Astro project contains the following files and folders:
10 | 
11 | - dags: This folder contains the Python files for your Airflow DAGs. By default, this directory includes two example DAGs:
12 |     - `example_dag_basic`: This DAG shows a simple ETL data pipeline example with three TaskFlow API tasks that run daily.
13 |     - `example_dag_advanced`: This advanced DAG showcases a variety of Airflow features like branching, Jinja templates, task groups and several Airflow operators.
14 | - Dockerfile: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. If you want to execute other commands or overrides at runtime, specify them here.
15 | - include: This folder contains any additional files that you want to include as part of your project. It is empty by default.
16 | - packages.txt: Install OS-level packages needed for your project by adding them to this file. It is empty by default.
17 | - requirements.txt: Install Python packages needed for your project by adding them to this file. It is empty by default.
18 | - plugins: Add custom or community plugins for your project to this file. It is empty by default.
19 | - airflow_settings.yaml: Use this local-only file to specify Airflow Connections, Variables, and Pools instead of entering them in the Airflow UI as you develop DAGs in this project.
20 | 
21 | Deploy Your Project Locally
22 | ===========================
23 | 
24 | 1. Start Airflow on your local machine by running 'astro dev start'.
25 | 
26 | This command will spin up 4 Docker containers on your machine, each for a different Airflow component:
27 | 
28 | - Postgres: Airflow's Metadata Database
29 | - Webserver: The Airflow component responsible for rendering the Airflow UI
30 | - Scheduler: The Airflow component responsible for monitoring and triggering tasks
31 | - Triggerer: The Airflow component responsible for triggering deferred tasks
32 | 
33 | 2. Verify that all 4 Docker containers were created by running 'docker ps'.
34 | 
35 | Note: Running 'astro dev start' will start your project with the Airflow Webserver exposed at port 8080 and Postgres exposed at port 5432. If you already have either of those ports allocated, you can either [stop your existing Docker containers or change the port](https://docs.astronomer.io/astro/test-and-troubleshoot-locally#ports-are-not-available).
36 | 
37 | 3. Access the Airflow UI for your local Airflow project. To do so, go to http://localhost:8080/ and log in with 'admin' for both your Username and Password.
38 | 
39 | You should also be able to access your Postgres Database at 'localhost:5432/postgres'.
40 | 
41 | Deploy Your Project to Astronomer
42 | =================================
43 | 
44 | If you have an Astronomer account, pushing code to a Deployment on Astronomer is simple. For deploying instructions, refer to Astronomer documentation: https://docs.astronomer.io/cloud/deploy-code/
45 | 
46 | Contact
47 | =======
48 | 
49 | The Astronomer CLI is maintained with love by the Astronomer team. To report a bug or suggest a change, reach out to our support.
50 | 


--------------------------------------------------------------------------------
/airflow_astro_example/dags/.airflowignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/airflow_astro_example/dags/.airflowignore


--------------------------------------------------------------------------------
/airflow_astro_example/dags/example_dag_advanced.py:
--------------------------------------------------------------------------------
  1 | from pendulum import datetime, duration
  2 | 
  3 | # Airflow Operators are templates for tasks and encompass the logic that your DAG will actually execute.
  4 | # To use an operator in your DAG, you first have to import it.
  5 | # To learn more about operators, see: https://registry.astronomer.io/.
  6 | 
  7 | # DAG and task decorators for interfacing with the TaskFlow API
  8 | from airflow.decorators import dag, task, task_group
  9 | 
 10 | # A function that sets sequential dependencies between tasks including lists of tasks
 11 | from airflow.models.baseoperator import chain
 12 | 
 13 | from airflow.operators.bash import BashOperator
 14 | from airflow.operators.empty import EmptyOperator
 15 | from airflow.operators.weekday import BranchDayOfWeekOperator
 16 | 
 17 | # Used to label node edges in the Airflow UI
 18 | from airflow.utils.edgemodifier import Label
 19 | 
 20 | # Used to determine the day of the week
 21 | from airflow.utils.weekday import WeekDay
 22 | 
 23 | 
 24 | """
 25 | This DAG is intended to demonstrate a number of core Apache Airflow concepts that are central to the pipeline
 26 | authoring experience, including the TaskFlow API, Edge Labels, Jinja templating, branching,
 27 | generating tasks within a loop, task groups, and trigger rules.
 28 | 
 29 | First, this DAG checks if the current day is a weekday or weekend. Next, the DAG checks which day of the week
 30 | it is. Lastly, the DAG prints out a bash statement based on which day it is. On Tuesday, for example, the DAG
 31 | prints "It's Tuesday and I'm busy with studying".
 32 | 
 33 | This DAG uses the following operators:
 34 | 
 35 | BashOperator -
 36 |     Executes a Bash script, command, or set of commands.
 37 | 
 38 |     See more info about this operator here:
 39 |         https://registry.astronomer.io/providers/apache-airflow/modules/bashoperator
 40 | 
 41 | EmptyOperator -
 42 |     Does nothing but can be used to structure your DAG.
 43 | 
 44 |     See more info about this operator here:
 45 |         https://registry.astronomer.io/providers/apache-airflow/modules/emptyoperator
 46 | 
 47 | BranchDayOfWeekOperator -
 48 |     Branches into one of two lists of tasks depending on the current day.
 49 | 
 50 |     See more info about this operator here:
 51 |         https://registry.astronomer.io/providers/apache-airflow/modules/branchdayofweekoperator
 52 | """
 53 | 
 54 | # Reference data that defines "weekday" as well as the activity assigned to each day of the week
 55 | DAY_ACTIVITY_MAPPING = {
 56 |     "monday": {"is_weekday": True, "activity": "guitar lessons"},
 57 |     "tuesday": {"is_weekday": True, "activity": "studying"},
 58 |     "wednesday": {"is_weekday": True, "activity": "soccer practice"},
 59 |     "thursday": {"is_weekday": True, "activity": "contributing to Airflow"},
 60 |     "friday": {"is_weekday": True, "activity": "family dinner"},
 61 |     "saturday": {"is_weekday": False, "activity": "going to the beach"},
 62 |     "sunday": {"is_weekday": False, "activity": "sleeping in"},
 63 | }
 64 | 
 65 | # The TaskFlow API is also used in a number of tasks within this DAG. Check out of the TaskFlow API tutorial
 66 | # to learn more.
 67 | #   https://airflow.apache.org/docs/apache-airflow/stable/tutorial/taskflow.html
 68 | 
 69 | 
 70 | # This is the TaskFlow equivalent of the PythonOperator:
 71 | #   https://registry.astronomer.io/providers/apache-airflow/modules/pythonoperator
 72 | @task(
 73 |     # By default the function name is used as the `task_id`, but it can be overriden if desired.
 74 |     task_id="going_to_the_beach",
 75 |     multiple_outputs=True,  # multiple_outputs=True unrolls dictionaries into separate XCom values
 76 | )
 77 | def _going_to_the_beach() -> dict[str, str]:
 78 |     return {
 79 |         "subject": "Beach day!",
 80 |         "body": "It's Saturday and I'm heading to the beach.<br>Come join me!",
 81 |     }
 82 | 
 83 | 
 84 | # This is the TaskFlow API equivalent to the BranchPythonOperator:
 85 | #   https://registry.astronomer.io/providers/apache-airflow/modules/branchpythonoperator
 86 | # The task retrieves the activity from the "DAY_ACTIVITY_MAPPING" dictionary.
 87 | @task.branch
 88 | def get_activity(day_name: str) -> str:
 89 |     activity_id = DAY_ACTIVITY_MAPPING[day_name]["activity"].replace(" ", "_")
 90 | 
 91 |     if DAY_ACTIVITY_MAPPING[day_name]["is_weekday"]:
 92 |         return f"weekday_activities.{activity_id}"
 93 | 
 94 |     return f"weekend_activities.{activity_id}"
 95 | 
 96 | 
 97 | # This the TaskFlow API equivalent to the PythonVirtualEnvOperator:
 98 | #   https://registry.astronomer.io/providers/apache-airflow/modules/pythonvirtualenvoperator
 99 | @task.virtualenv(requirements=["beautifulsoup4==4.11.2"])
100 | def inviting_friends(subject: str, body: str) -> None:
101 |     from bs4 import BeautifulSoup
102 | 
103 |     print("Inviting friends...")
104 |     html_doc = f"<title>{subject}</title><p>{body}</p>"
105 |     soup = BeautifulSoup(html_doc, "html.parser")
106 |     print(soup.prettify())
107 | 
108 | 
109 | # When using the DAG decorator, the "dag" argument doesn't need to be specified for each task.
110 | # The "dag_id" value defaults to the name of the function it is decorating if not explicitly set.
111 | # In this example, the "dag_id" value would be "example_dag_advanced".
112 | @dag(
113 |     # This DAG is set to run for the first time on January 1, 2023.
114 |     # Best practice is to use a static start_date.
115 |     # Subsequent DAG runs are instantiated based on the "schedule" parameter below.
116 |     start_date=datetime(2023, 1, 1),
117 |     # This defines how many instantiations of this DAG (DAG Runs) can execute concurrently. In this case,
118 |     # we're only allowing 1 DAG run at any given time, as opposed to allowing multiple overlapping DAG runs.
119 |     max_active_runs=1,
120 |     # This defines how often your DAG will run, or the schedule by which DAG runs are created. It can be
121 |     # defined as a cron expression, custom timetable, existing presets or using the Dataset feature.
122 |     # This DAG uses a preset to run daily.
123 |     schedule="@daily",
124 |     # Default settings applied to all tasks within the DAG; can be overwritten at the task level.
125 |     default_args={
126 |         "owner": "community",  # Defines the value of the "owner" column in the DAG view of the Airflow UI
127 |         "retries": 2,  # If a task fails, it will retry 2 times.
128 |         "retry_delay": duration(
129 |             minutes=3
130 |         ),  # A task that fails will wait 3 minutes to retry.
131 |     },
132 |     default_view="graph",  # This defines the default view for this DAG in the Airflow UI
133 |     # When catchup=False, your DAG will only run for the latest schedule interval. In this case, this means
134 |     # that tasks will not be run between January 1st, 2023 and 1 day ago. When turned on, this DAG's first run
135 |     # will be for today, per the @daily schedule
136 |     catchup=False,
137 |     tags=["example"],  # If set, this tag is shown in the DAG view of the Airflow UI
138 | )
139 | def example_dag_advanced():
140 |     # EmptyOperator placeholder for first task
141 |     begin = EmptyOperator(task_id="begin")
142 |     # Last task will only trigger if all upstream tasks have succeeded or been skipped
143 |     end = EmptyOperator(task_id="end", trigger_rule="none_failed")
144 | 
145 |     # This task checks which day of the week it is
146 |     check_day_of_week = BranchDayOfWeekOperator(
147 |         task_id="check_day_of_week",
148 |         week_day={WeekDay.SATURDAY, WeekDay.SUNDAY},  # This checks day of week
149 |         follow_task_ids_if_true="weekend",  # Next task if criteria is met
150 |         follow_task_ids_if_false="weekday",  # Next task if criteria is not met
151 |         use_task_execution_day=True,  # If True, uses task’s execution day to compare with is_today
152 |     )
153 | 
154 |     weekend = EmptyOperator(task_id="weekend")  # "weekend" placeholder task
155 |     weekday = EmptyOperator(task_id="weekday")  # "weekday" placeholder task
156 | 
157 |     # Templated value for determining the name of the day of week based on the start date of the DAG Run
158 |     day_name = "{{ dag_run.start_date.strftime('%A').lower() }}"
159 | 
160 |     # Begin weekday tasks.
161 |     # Tasks within this TaskGroup (weekday tasks) will be grouped together in the Airflow UI
162 |     @task_group
163 |     def weekday_activities():
164 |         # TaskFlow functions can also be reused which is beneficial if you want to use the same callable for
165 |         # multiple tasks and want to use different task attributes.
166 |         # See this tutorial for more information:
167 |         #   https://airflow.apache.org/docs/apache-airflow/stable/tutorial/taskflow.html#reusing-a-decorated-task
168 |         which_weekday_activity_day = get_activity.override(
169 |             task_id="which_weekday_activity_day"
170 |         )(day_name)
171 | 
172 |         for day, day_info in DAY_ACTIVITY_MAPPING.items():
173 |             if day_info["is_weekday"]:
174 |                 day_of_week = Label(label=day)
175 |                 activity = day_info["activity"]
176 | 
177 |                 # This task prints the weekday activity to bash
178 |                 do_activity = BashOperator(
179 |                     task_id=activity.replace(" ", "_"),
180 |                     # This is the Bash command to run
181 |                     bash_command=f"echo It's {day.capitalize()} and I'm busy with {activity}.",
182 |                 )
183 | 
184 |                 # Declaring task dependencies within the "TaskGroup" via the classic bitshift operator.
185 |                 which_weekday_activity_day >> day_of_week >> do_activity
186 | 
187 |     # Begin weekend tasks
188 |     # Tasks within this TaskGroup will be grouped together in the UI
189 |     @task_group
190 |     def weekend_activities():
191 |         which_weekend_activity_day = get_activity.override(
192 |             task_id="which_weekend_activity_day"
193 |         )(day_name)
194 | 
195 |         # Labels that will appear in the Graph view of the Airflow UI
196 |         saturday = Label(label="saturday")
197 |         sunday = Label(label="sunday")
198 | 
199 |         # This task runs the Sunday activity of sleeping for a random interval between 1 and 30 seconds
200 |         sleeping_in = BashOperator(
201 |             task_id="sleeping_in", bash_command="sleep $[ (1 + $RANDOM % 30) ]s"
202 |         )
203 | 
204 |         going_to_the_beach = _going_to_the_beach()  # Calling the TaskFlow task
205 | 
206 |         # Because the "_going_to_the_beach()" function has "multiple_outputs" enabled, each dict key is
207 |         # accessible as their own "XCom" key.
208 |         _inviting_friends = inviting_friends(
209 |             subject=going_to_the_beach["subject"], body=going_to_the_beach["body"]
210 |         )
211 | 
212 |         # Using "chain()" here for list-to-list dependencies which are not supported by the bitshift
213 |         # operator and to simplify the notation for the desired dependency structure.
214 |         chain(
215 |             which_weekend_activity_day,
216 |             [saturday, sunday],
217 |             [going_to_the_beach, sleeping_in],
218 |         )
219 | 
220 |     # Call the @task_group TaskFlow functions to instantiate them in the DAG
221 |     _weekday_activities = weekday_activities()
222 |     _weekend_activities = weekend_activities()
223 | 
224 |     # High-level dependencies between tasks
225 |     chain(
226 |         begin,
227 |         check_day_of_week,
228 |         [weekday, weekend],
229 |         [_weekday_activities, _weekend_activities],
230 |         end,
231 |     )
232 | 
233 |     # Task dependency created by XComArgs:
234 |     # going_to_the_beach >> inviting_friends
235 | 
236 | 
237 | example_dag_advanced()
238 | 


--------------------------------------------------------------------------------
/airflow_astro_example/dags/example_dag_basic.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pendulum import datetime
 3 | 
 4 | from airflow.decorators import (
 5 |     dag,
 6 |     task,
 7 | )  # DAG and task decorators for interfacing with the TaskFlow API
 8 | 
 9 | 
10 | # When using the DAG decorator, The "dag_id" value defaults to the name of the function
11 | # it is decorating if not explicitly set. In this example, the "dag_id" value would be "example_dag_basic".
12 | @dag(
13 |     # This defines how often your DAG will run, or the schedule by which your DAG runs. In this case, this DAG
14 |     # will run daily
15 |     schedule="@daily",
16 |     # This DAG is set to run for the first time on January 1, 2023. Best practice is to use a static
17 |     # start_date. Subsequent DAG runs are instantiated based on the schedule
18 |     start_date=datetime(2023, 1, 1),
19 |     # When catchup=False, your DAG will only run the latest run that would have been scheduled. In this case, this means
20 |     # that tasks will not be run between January 1, 2023 and 30 mins ago. When turned on, this DAG's first
21 |     # run will be for the next 30 mins, per the its schedule
22 |     catchup=False,
23 |     default_args={
24 |         "retries": 2,  # If a task fails, it will retry 2 times.
25 |     },
26 |     tags=["example"],
27 | )  # If set, this tag is shown in the DAG view of the Airflow UI
28 | def example_dag_basic():
29 |     """
30 |     ### Basic ETL Dag
31 |     This is a simple ETL data pipeline example that demonstrates the use of
32 |     the TaskFlow API using three simple tasks for extract, transform, and load.
33 |     For more information on Airflow's TaskFlow API, reference documentation here:
34 |     https://airflow.apache.org/docs/apache-airflow/stable/tutorial_taskflow_api.html
35 |     """
36 | 
37 |     @task()
38 |     def extract():
39 |         """
40 |         #### Extract task
41 |         A simple "extract" task to get data ready for the rest of the
42 |         pipeline. In this case, getting data is simulated by reading from a
43 |         hardcoded JSON string.
44 |         """
45 |         data_string = '{"1001": Hello, "1002": 433.21, "1003": 502.22}'
46 | 
47 |         order_data_dict = json.loads(data_string)
48 |         return order_data_dict
49 | 
50 |     @task(
51 |         multiple_outputs=True
52 |     )  # multiple_outputs=True unrolls dictionaries into separate XCom values
53 |     def transform(order_data_dict: dict):
54 |         """
55 |         #### Transform task
56 |         A simple "transform" task which takes in the collection of order data and
57 |         computes the total order value.
58 |         """
59 |         total_order_value = 0
60 | 
61 |         for value in order_data_dict.values():
62 |             total_order_value += value
63 | 
64 |         return {"total_order_value": total_order_value}
65 | 
66 |     @task()
67 |     def load(total_order_value: float):
68 |         """
69 |         #### Load task
70 |         A simple "load" task that takes in the result of the "transform" task and prints it out,
71 |         instead of saving it to end user review
72 |         """
73 | 
74 |         print(f"Total order value is: {total_order_value:.2f}")
75 | 
76 |     order_data = extract()
77 |     order_summary = transform(order_data)
78 |     load(order_summary["total_order_value"])
79 | 
80 | 
81 | example_dag_basic()
82 | 


--------------------------------------------------------------------------------
/airflow_astro_example/packages.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/airflow_astro_example/packages.txt


--------------------------------------------------------------------------------
/airflow_astro_example/requirements.txt:
--------------------------------------------------------------------------------
1 | # Astro Runtime includes the following pre-installed providers packages: https://docs.astronomer.io/astro/runtime-image-architecture#provider-packages
2 | 


--------------------------------------------------------------------------------
/airflow_astro_example/tests/dags/test_dag_example.py:
--------------------------------------------------------------------------------
 1 | """Example DAGs test. This test ensures that all Dags have tags, retries set to two, and no import errors. This is an example pytest and may not be fit the context of your DAGs. Feel free to add and remove tests."""
 2 | 
 3 | import os
 4 | import logging
 5 | from contextlib import contextmanager
 6 | import pytest
 7 | from airflow.models import DagBag
 8 | 
 9 | 
10 | @contextmanager
11 | def suppress_logging(namespace):
12 |     logger = logging.getLogger(namespace)
13 |     old_value = logger.disabled
14 |     logger.disabled = True
15 |     try:
16 |         yield
17 |     finally:
18 |         logger.disabled = old_value
19 | 
20 | 
21 | def get_import_errors():
22 |     """
23 |     Generate a tuple for import errors in the dag bag
24 |     """
25 |     with suppress_logging("airflow"):
26 |         dag_bag = DagBag(include_examples=False)
27 | 
28 |         def strip_path_prefix(path):
29 |             return os.path.relpath(path, os.environ.get("AIRFLOW_HOME"))
30 | 
31 |         # prepend "(None,None)" to ensure that a test object is always created even if it's a no op.
32 |         return [(None, None)] + [
33 |             (strip_path_prefix(k), v.strip()) for k, v in dag_bag.import_errors.items()
34 |         ]
35 | 
36 | 
37 | def get_dags():
38 |     """
39 |     Generate a tuple of dag_id, <DAG objects> in the DagBag
40 |     """
41 |     with suppress_logging("airflow"):
42 |         dag_bag = DagBag(include_examples=False)
43 | 
44 |     def strip_path_prefix(path):
45 |         return os.path.relpath(path, os.environ.get("AIRFLOW_HOME"))
46 | 
47 |     return [(k, v, strip_path_prefix(v.fileloc)) for k, v in dag_bag.dags.items()]
48 | 
49 | 
50 | @pytest.mark.parametrize(
51 |     "rel_path,rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]
52 | )
53 | def test_file_imports(rel_path, rv):
54 |     """Test for import errors on a file"""
55 |     if rel_path and rv:
56 |         raise Exception(f"{rel_path} failed to import with message \n {rv}")
57 | 
58 | 
59 | APPROVED_TAGS = {}
60 | 
61 | 
62 | @pytest.mark.parametrize(
63 |     "dag_id,dag,fileloc", get_dags(), ids=[x[2] for x in get_dags()]
64 | )
65 | def test_dag_tags(dag_id, dag, fileloc):
66 |     """
67 |     test if a DAG is tagged and if those TAGs are in the approved list
68 |     """
69 |     assert dag.tags, f"{dag_id} in {fileloc} has no tags"
70 |     if APPROVED_TAGS:
71 |         assert not set(dag.tags) - APPROVED_TAGS
72 | 
73 | 
74 | @pytest.mark.parametrize(
75 |     "dag_id,dag, fileloc", get_dags(), ids=[x[2] for x in get_dags()]
76 | )
77 | def test_dag_retries(dag_id, dag, fileloc):
78 |     """
79 |     test if a DAG has retries set
80 |     """
81 |     assert (
82 |         dag.default_args.get("retries", None) >= 2
83 |     ), f"{dag_id} in {fileloc} must have task retries >= 2."
84 | 


--------------------------------------------------------------------------------
/airflow_setup/setup.sh:
--------------------------------------------------------------------------------
 1 | AIRFLOW_VERSION=2.10.5
 2 | 
 3 | # Extract the version of Python you have installed. If you're currently using a Python version that is not supported by Airflow, you may want to set this manually.
 4 | # See above for supported versions.
 5 | PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
 6 | 
 7 | CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt"
 8 | # For example this would install 2.10.5 with python 3.8: https://raw.githubusercontent.com/apache/airflow/constraints-2.10.5/constraints-3.8.txt
 9 | 
10 | pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"


--------------------------------------------------------------------------------
/cloud_function_example/main.py:
--------------------------------------------------------------------------------
 1 | def predict(request):
 2 | 
 3 |     from flask import jsonify
 4 |     import pickle
 5 |     from google.cloud import storage
 6 | 
 7 |     def get_model(b, A):
 8 |         def line(x):
 9 |             return b * x + A
10 |         return line
11 | 
12 |     storage_client = storage.Client()
13 |     bucket = storage_client.get_bucket("simple-regression-bucket")
14 |     blob = bucket.blob("simple_regression.pkl")
15 |     blob.download_to_filename("/tmp/simple_regression.pkl")
16 | 
17 |     model_params = pickle.load(
18 |         open('/tmp/simple_regression.pkl', 'rb'))
19 | 
20 |     model = get_model(model_params[0], model_params[1])
21 | 
22 |     request_json = request.get_json()
23 |     if "x" in request.args:
24 |         try:
25 |             return jsonify({'input': request.args['x'], 'prediction': model(float(request.args['x']))})
26 |         except:
27 |             pass
28 |     elif request_json and 'x' in request_json:
29 |         return jsonify({'input': request_json['x'], 'prediction': model(float(request_json['x']))})
30 |     else:
31 |         return jsonify({'success': 'false', 'message': 'Input x was not passed correctly.'})


--------------------------------------------------------------------------------
/cloud_function_example/requirements.txt:
--------------------------------------------------------------------------------
1 | # Function dependencies, for example:
2 | # package>=version
3 | flask
4 | numpy
5 | google-cloud-storage


--------------------------------------------------------------------------------
/copilot_example/fastapi_example.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | 
 3 | app = FastAPI()
 4 | 
 5 | @app.get("/")
 6 | def read_root():
 7 |     return {"Hello": "World"}
 8 | 
 9 | @app.get("/items/{item_id}")
10 | def read_item(item_id: int, q: str = None):
11 |     return {"item_id": item_id, "q": q}
12 | 
13 | # run the script using `uvicorn fastapi_example:app --reload` in the command line


--------------------------------------------------------------------------------
/copilot_example/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.104.1


--------------------------------------------------------------------------------
/cron_example/date_job_every_minute.txt:
--------------------------------------------------------------------------------
 1 | Wed Feb 14 03:44:01 PM CST 2024
 2 | Wed Feb 14 03:45:01 PM CST 2024
 3 | Wed Feb 14 03:46:01 PM CST 2024
 4 | Wed Feb 14 03:47:01 PM CST 2024
 5 | Wed Feb 14 03:48:01 PM CST 2024
 6 | Wed Feb 14 03:49:01 PM CST 2024
 7 | Wed Feb 14 03:50:01 PM CST 2024
 8 | Wed Feb 14 03:51:01 PM CST 2024
 9 | Wed Feb 14 03:52:01 PM CST 2024
10 | Wed Feb 14 03:53:01 PM CST 2024
11 | Wed Feb 14 03:54:01 PM CST 2024
12 | Wed Feb 14 03:55:01 PM CST 2024
13 | Wed Feb 14 03:56:01 PM CST 2024
14 | Wed Feb 14 03:57:01 PM CST 2024
15 | Wed Feb 14 03:58:01 PM CST 2024
16 | Wed Feb 14 03:59:01 PM CST 2024
17 | Wed Feb 14 04:00:01 PM CST 2024
18 | Wed Feb 14 04:01:01 PM CST 2024
19 | Wed Feb 14 04:02:01 PM CST 2024
20 | Wed Feb 14 04:03:01 PM CST 2024
21 | Wed Feb 14 04:04:01 PM CST 2024
22 | Wed Feb 12 22:17:01 UTC 2025
23 | Wed Feb 12 22:18:01 UTC 2025
24 | 


--------------------------------------------------------------------------------
/cron_example/run_this_job.sh:
--------------------------------------------------------------------------------
1 | date >> /home/theja/mlops-code-examples/cron_example/date_job_every_minute.txt
2 | 


--------------------------------------------------------------------------------
/cron_example/run_this_job.txt:
--------------------------------------------------------------------------------
1 | To run the script:
2 | 
3 | Step 1: open cron editor on the command line with `crontab -e`
4 | 
5 | Step 2: insert the following line (replace localmachine with your username)
6 | 
7 | * * * * * /home/theja/mlops-code-examples/cron_example/run_this_job.sh
8 | 
9 | 


--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/README.md:
--------------------------------------------------------------------------------
 1 | # Docker Compose Example
 2 | 
 3 | ## Description
 4 | 
 5 | This project provides examples of using Docker Compose to orchestrate containerized applications.
 6 | 
 7 | ## Table of Contents
 8 | 
 9 | - [Installation](#installation)
10 | - [Usage](#usage)
11 | - [Contributing](#contributing)
12 | - [License](#license)
13 | 
14 | ## Installation
15 | 
16 | To get started, follow these steps:
17 | 
18 | 1. Clone the repository.
19 | 2. Install Docker and Docker Compose.
20 | 3. Run `docker-compose up` to start the application.
21 | 
22 | ## Usage
23 | 
24 | To connect to the Docker container and manage your PostgreSQL database, you can use pgAdmin 4. Follow these steps:
25 | 
26 | 1. Open a web browser and visit `http://localhost:5050`.
27 | 2. Log in to pgAdmin 4 using the default credentials (username: `pgadmin4@pgadmin.org`, password: `admin`).
28 | 3. Click on "Add New Server" in the "Quick Links" section.
29 | 4. Enter a name for the server and switch to the "Connection" tab.
30 | 5. In the "Host name/address" field, enter the name of the Docker container running PostgreSQL (e.g., `postgres`).
31 | 6. Set the "Port" to `5432`.
32 | 7. Enter the username and password for the PostgreSQL database.
33 | 8. Click "Save" to connect to the Docker container.
34 | 
35 | Now you can use pgAdmin 4 to manage your PostgreSQL database running in the Docker container.
36 | 
37 | 
38 | ## Contributing
39 | 
40 | Contributions are welcome! Please fork the repository and submit a pull request.
41 | 
42 | ## License
43 | 
44 | This example is licensed under the [MIT License](LICENSE).


--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   database:
 5 |     image: 'postgres:latest'
 6 |     restart: always
 7 |     ports:
 8 |       - 15432:5432
 9 |     env_file:
10 |       - .env
11 |     volumes:
12 |       - ./db-data/:/var/lib/postgresql/data/
13 |       - ./init.sql:/docker-entrypoint-initdb.d/init.sql


--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/env.example:
--------------------------------------------------------------------------------
1 | POSTGRES_PASSWORD=changethis!


--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/init.sql:
--------------------------------------------------------------------------------
 1 | -- create a table
 2 | CREATE TABLE test(
 3 |   id INT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
 4 |   name TEXT NOT NULL,
 5 |   archived BOOLEAN NOT NULL DEFAULT FALSE
 6 | );
 7 | 
 8 | -- add test data
 9 | INSERT INTO test (name, archived)
10 |   VALUES ('Theja', true),
11 |   ('UIC', false);
12 | 


--------------------------------------------------------------------------------
/docker_examples/docker_example/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:buster-slim
2 | RUN apt-get update \
3 |   && apt-get install -y python3-pip python3-dev \
4 |   && pip3 install flask
5 | WORKDIR /app
6 | COPY flask_simple_regression_service.py .
7 | ENTRYPOINT ["python3","flask_simple_regression_service.py"]


--------------------------------------------------------------------------------
/docker_examples/docker_example/flask_simple_regression_service.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, jsonify, request
 2 | 
 3 | 
 4 | def model(x):
 5 |     return 2*x + 2
 6 | 
 7 | app = Flask(__name__)
 8 | @app.route("/", methods=["GET"])
 9 | def predict():
10 | 
11 |     if "x" in request.args:
12 |         try:
13 |             return jsonify({'input': request.args['x'], 'prediction': model(float(request.args['x']))})
14 |         except:
15 |             pass
16 | 
17 |     return jsonify({'status': 'false', 'message': 'Input x was not passed.'})
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     app.run(host="0.0.0.0",port=5002)
22 | 


--------------------------------------------------------------------------------
/docker_examples/docker_example/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==3.0.0


--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3:latest
 2 | 
 3 | RUN conda install -y flask pandas \
 4 | && conda install -c conda-forge scikit-surprise scikit-learn \
 5 | && conda install pytorch torchvision cpuonly -c pytorch 
 6 | 
 7 | COPY . .
 8 | RUN mkdir -p /root/.surprise_data/ml-1m
 9 | RUN cp -r ml-1m /root/.surprise_data/ml-1m/
10 | CMD ["python","flask_recommendation_service.py"]
11 | 


--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/flask_recommendation_service.py:
--------------------------------------------------------------------------------
 1 | # load Flask
 2 | import flask
 3 | from recommend_pytorch_train import MF
 4 | from recommend_pytorch_inf import get_top_n, get_previously_seen
 5 | import torch
 6 | import pandas as pd
 7 | import surprise
 8 | import time
 9 | 
10 | 
11 | app = flask.Flask(__name__)
12 | 
13 | start_time = time.time()
14 | 
15 | # data preload
16 | data = surprise.Dataset.load_builtin('ml-1m')
17 | trainset = data.build_full_trainset()
18 | testset = trainset.build_anti_testset()
19 | movies_df = pd.read_csv('./movies.dat',
20 |                         sep="::", header=None, engine='python', encoding="iso-8859-1")
21 | movies_df.columns = ['iid', 'name', 'genre']
22 | movies_df.set_index('iid', inplace=True)
23 | 
24 | # model preload
25 | k = 100  # latent dimension
26 | c_bias = 1e-6
27 | c_vector = 1e-6
28 | model = MF(trainset.n_users, trainset.n_items,
29 |            k=k, c_bias=c_bias, c_vector=c_vector)
30 | model.load_state_dict(torch.load(
31 |     './recommendation_model_pytorch.pkl'))  # TODO: prevent overwriting
32 | model.eval()
33 | 
34 | print('Model and data preloading completed in ', time.time()-start_time)
35 | 
36 | 
37 | @app.route("/", methods=["GET"])
38 | def recommend():
39 | 
40 |     data = {"success": False}
41 | 
42 |     if "uid" in flask.request.args:
43 | 
44 |         data['uid'] = str(flask.request.args['uid'])
45 | 
46 |         try:
47 |             data['seen'] = get_previously_seen(
48 |                 trainset, data['uid'], movies_df)
49 |             recommended = get_top_n(
50 |                 model, testset, trainset, data['uid'], movies_df, n=10)
51 |             print(recommended)
52 |             data['recommended'] = [x[1] for x in recommended]
53 |             data["success"] = True
54 |         except:
55 |             pass
56 | 
57 |     return flask.jsonify(data)
58 | 
59 | 
60 | # start the flask app, allow remote connections
61 | if __name__ == '__main__':
62 |     app.run(host='0.0.0.0')
63 | 


--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/movies.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/docker_examples/docker_pipeline_example/movies.dat


--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/recommend_pytorch_inf.py:
--------------------------------------------------------------------------------
 1 | from recommend_pytorch_train import MF
 2 | from surprise import Dataset
 3 | import numpy as np
 4 | import torch
 5 | import torch.nn as nn
 6 | import pandas as pd
 7 | import pprint
 8 | 
 9 | 
10 | def get_top_n(model, testset, trainset, uid_input, movies_df, n=10):
11 | 
12 |     preds = []
13 |     try:
14 |         uid_input = int(trainset.to_inner_uid(uid_input))
15 |     except KeyError:
16 |         return preds
17 | 
18 |     # First map the predictions to each user.
19 |     for uid, iid, _ in testset:  # inefficient
20 |         try:
21 |             uid_internal = int(trainset.to_inner_uid(uid))
22 |         except KeyError:
23 |             continue
24 |         if uid_internal == uid_input:
25 |             try:
26 |                 iid_internal = int(trainset.to_inner_iid(iid))
27 |                 movie_name = movies_df.loc[int(iid), 'name']
28 |                 preds.append((iid, movie_name, float(
29 |                     model(torch.tensor([[uid_input, iid_internal]])))))
30 |             except KeyError:
31 |                 pass
32 |     # Then sort the predictions for each user and retrieve the k highest ones
33 |     if preds is not None:
34 |         preds.sort(key=lambda x: x[1], reverse=True)
35 |         if len(preds) > n:
36 |             preds = preds[:n]
37 |     return preds
38 | 
39 | 
40 | def get_previously_seen(trainset, uid, movies_df):
41 |     seen = []
42 |     for (iid, _) in trainset.ur[int(uid)]:
43 |         try:
44 |             seen.append(movies_df.loc[int(iid), 'name'])
45 |         except KeyError:
46 |             pass
47 |         if len(seen) > 10:
48 |             break
49 |     return seen
50 | 
51 | 
52 | def main():
53 |     # Data
54 |     movies_df = pd.read_csv('../data/ml-1m/movies.dat', sep="::",
55 |                             header=None, engine='python')
56 |     movies_df.columns = ['iid', 'name', 'genre']
57 |     movies_df.set_index('iid', inplace=True)
58 |     data = Dataset.load_builtin('ml-1m')
59 |     trainset = data.build_full_trainset()
60 |     testset = trainset.build_anti_testset()
61 | 
62 |     k = 100  # latent dimension
63 |     c_bias = 1e-6
64 |     c_vector = 1e-6
65 | 
66 |     model = MF(trainset.n_users, trainset.n_items,
67 |                k=k, c_bias=c_bias, c_vector=c_vector)
68 |     model.load_state_dict(torch.load('../data/models/recommendation_model_pytorch.pkl'))
69 |     model.eval()
70 | 
71 |     # Print the recommended items for sample users
72 |     sample_users = list(set([x[0] for x in testset]))[:4]
73 | 
74 |     for uid in sample_users:
75 | 
76 |         print('User:', uid)
77 |         print('\n')
78 | 
79 |         print('\tSeen:')
80 |         seen = get_previously_seen(trainset, uid, movies_df)
81 |         pprint.pprint(seen)
82 |         print('\n')
83 | 
84 |         print('\tRecommendations:')
85 |         recommended = get_top_n(model, testset, trainset, uid, movies_df, n=10)
86 |         pprint.pprint([x[1] for x in recommended])
87 |         print('\n')
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/recommend_pytorch_train.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/NicolasHug/Surprise
  2 | # can be replaced by explicitly importing the movielens data
  3 | from surprise import Dataset
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from sklearn.utils import shuffle
  9 | 
 10 | class Loader():
 11 |     current = 0
 12 | 
 13 |     def __init__(self, x, y, batchsize=1024, do_shuffle=True):
 14 |         self.shuffle = shuffle
 15 |         self.x = x
 16 |         self.y = y
 17 |         self.batchsize = batchsize
 18 |         self.batches = range(0, len(self.y), batchsize)
 19 |         if do_shuffle:
 20 |             # Every epoch re-shuffle the dataset
 21 |             self.x, self.y = shuffle(self.x, self.y)
 22 | 
 23 |     def __iter__(self):
 24 |         # Reset & return a new iterator
 25 |         self.x, self.y = shuffle(self.x, self.y, random_state=0)
 26 |         self.current = 0
 27 |         return self
 28 | 
 29 |     def __len__(self):
 30 |         # Return the number of batches
 31 |         return int(len(self.x) / self.batchsize)
 32 | 
 33 |     def __next__(self):
 34 |         n = self.batchsize
 35 |         if self.current + n >= len(self.y):
 36 |             raise StopIteration
 37 |         i = self.current
 38 |         xs = torch.from_numpy(self.x[i:i + n])
 39 |         ys = torch.from_numpy(self.y[i:i + n])
 40 |         self.current += n
 41 |         return (xs, ys)
 42 | 
 43 | 
 44 | class MF(nn.Module):
 45 | 
 46 |     def __init__(self, n_user, n_item, k=18, c_vector=1.0, c_bias=1.0):
 47 |         super(MF, self).__init__()
 48 |         self.k = k
 49 |         self.n_user = n_user
 50 |         self.n_item = n_item
 51 |         self.c_bias = c_bias
 52 |         self.c_vector = c_vector
 53 | 
 54 |         self.user = nn.Embedding(n_user, k)
 55 |         self.item = nn.Embedding(n_item, k)
 56 | 
 57 |         # We've added new terms here:
 58 |         self.bias_user = nn.Embedding(n_user, 1)
 59 |         self.bias_item = nn.Embedding(n_item, 1)
 60 |         self.bias = nn.Parameter(torch.ones(1))
 61 | 
 62 |     def forward(self, train_x):
 63 |         user_id = train_x[:, 0]
 64 |         item_id = train_x[:, 1]
 65 |         vector_user = self.user(user_id)
 66 |         vector_item = self.item(item_id)
 67 | 
 68 |         # Pull out biases
 69 |         bias_user = self.bias_user(user_id).squeeze()
 70 |         bias_item = self.bias_item(item_id).squeeze()
 71 |         biases = (self.bias + bias_user + bias_item)
 72 | 
 73 |         ui_interaction = torch.sum(vector_user * vector_item, dim=1)
 74 | 
 75 |         # Add bias prediction to the interaction prediction
 76 |         prediction = ui_interaction + biases
 77 |         return prediction
 78 | 
 79 |     def loss(self, prediction, target):
 80 | 
 81 |         def l2_regularize(array):
 82 |             loss = torch.sum(array**2)
 83 |             return loss
 84 | 
 85 |         loss_mse = F.mse_loss(prediction, target.squeeze())
 86 | 
 87 |         # Add new regularization to the biases
 88 |         prior_bias_user = l2_regularize(self.bias_user.weight) * self.c_bias
 89 |         prior_bias_item = l2_regularize(self.bias_item.weight) * self.c_bias
 90 | 
 91 |         prior_user = l2_regularize(self.user.weight) * self.c_vector
 92 |         prior_item = l2_regularize(self.item.weight) * self.c_vector
 93 |         total = loss_mse + prior_user + prior_item + prior_bias_user + prior_bias_item
 94 |         return total
 95 | 
 96 | 
 97 | def main():
 98 |     # Data
 99 |     data = Dataset.load_builtin('ml-1m')
100 |     trainset = data.build_full_trainset()
101 |     uir = np.array([x for x in trainset.all_ratings()])
102 |     train_x = test_x = uir[:, :2].astype(np.int64)  # for simplicity
103 |     train_y = test_y = uir[:, 2].astype(np.float32)
104 | 
105 |     # Parameters
106 |     lr = 5e-3
107 |     k = 100  # latent dimension
108 |     c_bias = 1e-6
109 |     c_vector = 1e-6
110 |     batchsize = 1024
111 |     num_epochs = 40
112 | 
113 |     model = MF(trainset.n_users, trainset.n_items,
114 |                k=k, c_bias=c_bias, c_vector=c_vector)
115 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
116 | 
117 | 
118 |     for epoch in range(num_epochs):
119 |         dataloader = Loader(train_x, train_y, batchsize=batchsize)
120 |         itr = 0
121 |         for batch in dataloader:
122 |             itr += 1
123 |             prediction = model(batch[0])
124 |             loss = model.loss(prediction, batch[1])
125 |             optimizer.zero_grad()
126 |             loss.backward()
127 |             optimizer.step()
128 |             if itr % 100 == 0:
129 |                 print(f"epoch: {epoch}. iteration: {itr}. training loss: {loss}")
130 | 
131 |     torch.save(model.state_dict(),
132 |                "../data/models/recommendation_model_pytorch.pkl")
133 | 
134 | 
135 | if __name__ == '__main__':
136 |     main()
137 | 


--------------------------------------------------------------------------------
/dvc_example/data/.gitignore:
--------------------------------------------------------------------------------
1 | /data.xml
2 | 


--------------------------------------------------------------------------------
/dvc_example/data/data.xml.dvc:
--------------------------------------------------------------------------------
1 | outs:
2 | - md5: 22a1a2931c8370d3aeedd7183606fd7f
3 |   size: 14445097
4 |   hash: md5
5 |   path: data.xml
6 | 


--------------------------------------------------------------------------------
/dvc_example/requirements.txt:
--------------------------------------------------------------------------------
1 | dvc==3.43.1
2 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_food_api/flask_food_service.py:
--------------------------------------------------------------------------------
 1 |  # load Flask 
 2 | import flask
 3 | import requests
 4 | 
 5 | app = flask.Flask(__name__)
 6 | 
 7 | @app.route("/", methods=["GET"])
 8 | def food():
 9 | 
10 |     data = {"success": False}
11 | 
12 |     if "msg" in flask.request.args:
13 |         data['foodname'] = str(flask.request.args['msg'])
14 |         try:
15 |             req = requests.get(f"https://foodish-api.herokuapp.com/api/images/{data['foodname']}")
16 |             data["response"] = req.json()
17 |             data["success"] = True
18 |         except:
19 |             pass
20 |     else:
21 |         try:
22 |             req = requests.get("https://foodish-api.herokuapp.com/api/")
23 |             data["response"] = req.json()
24 |             data["success"] = True
25 |         except:
26 |             pass
27 | 
28 |     if data['success']:
29 |         img_str= f"""
30 |                       <img src="{data["response"]['image']}" alt="Random Food" width="500" height="600"> 
31 |                  """
32 |     else:
33 |         img_str= "Food API failed"
34 |         
35 |     return f"""
36 |                 <!doctype html>
37 |                 <html>
38 |                 <head>
39 |                 <title>Our Funky HTML Page</title>
40 |                 <meta name="description" content="Our first page">
41 |                 <meta name="keywords" content="html tutorial template">
42 |                 <link rel="stylesheet" href="https://unpkg.com/flexgrid.io@3.0.4/dist/flexgrid.min.css" />
43 | 
44 |                 </head>
45 |                 <body>
46 |                  <div class="row xs-justify-center xs-items-center">
47 | 
48 |                     {img_str}
49 | 
50 |                  </div>
51 |                 </body>
52 |                 </html>
53 |         """
54 |     
55 | # start the flask app, allow remote connections
56 | if __name__ == '__main__':
57 |     app.run(host='0.0.0.0')


--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/flask_imagenet_improved_service.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The MIT License (MIT)
  3 | 
  4 | Copyright (c) 2019 Avinash Sajjanshetty <hi@avi.im>
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  7 | this software and associated documentation files (the "Software"), to deal in
  8 | the Software without restriction, including without limitation the rights to
  9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 10 | the Software, and to permit persons to whom the Software is furnished to do so,
 11 | subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | 
 24 | 
 25 | # PyTorch Flask API
 26 | 
 27 | 
 28 | Check the demo [here](https://pytorch-imagenet.herokuapp.com/).
 29 | 
 30 | If you'd like to check a super simple API server, then check [this repo](https://github.com/avinassh/pytorch-flask-api).
 31 | 
 32 | 
 33 | ## Requirements
 34 | 
 35 | Runs with Python-3.7.3
 36 | 
 37 | Install them from `requirements.txt`:
 38 | 
 39 |     pip install -r requirements.txt
 40 | 
 41 | 
 42 | requirements.txt has:
 43 |     Flask==1.0.3
 44 |     https://download.pytorch.org/whl/cpu/torch-1.0.0-cp37-cp37m-linux_x86_64.whl
 45 |     torchvision==0.2.1
 46 |     numpy==1.16.4
 47 |     Pillow==7.1.0
 48 | 
 49 | ## Local Deployment
 50 | 
 51 | Run the server:
 52 | 
 53 |     python app.py
 54 | 
 55 | 
 56 | ## Heroku Deployment
 57 | 
 58 | [![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/avinassh/pytorch-flask-api-heroku)
 59 | 
 60 | 
 61 | ## License
 62 | 
 63 | The mighty MIT license. Please check `LICENSE` for more details.
 64 | 
 65 | 
 66 | 
 67 | """
 68 | 
 69 | 
 70 | import os
 71 | import json
 72 | from flask import Flask, render_template, request, redirect
 73 | import io
 74 | from PIL import Image
 75 | from torchvision import models
 76 | import torchvision.transforms as transforms
 77 | 
 78 | 
 79 | def get_model():
 80 |     model = models.densenet121(pretrained=True)
 81 |     model.eval()
 82 |     return model
 83 | 
 84 | 
 85 | def transform_image(image_bytes):
 86 |     my_transforms = transforms.Compose([transforms.Resize(255),
 87 |                                         transforms.CenterCrop(224),
 88 |                                         transforms.ToTensor(),
 89 |                                         transforms.Normalize(
 90 |                                             [0.485, 0.456, 0.406],
 91 |                                             [0.229, 0.224, 0.225])])
 92 |     image = Image.open(io.BytesIO(image_bytes))
 93 |     return my_transforms(image).unsqueeze(0)
 94 | 
 95 | 
 96 | # ImageNet classes are often of the form `can_opener` or `Egyptian_cat`
 97 | # will use this method to properly format it so that we get
 98 | # `Can Opener` or `Egyptian Cat`
 99 | def format_class_name(class_name):
100 |     class_name = class_name.replace('_', ' ')
101 |     class_name = class_name.title()
102 |     return class_name
103 | 
104 | def get_prediction(image_bytes):
105 |     try:
106 |         tensor = transform_image(image_bytes=image_bytes)
107 |         outputs = model.forward(tensor)
108 |     except Exception:
109 |         return 0, 'error'
110 |     _, y_hat = outputs.max(1)
111 |     predicted_idx = str(y_hat.item())
112 |     return imagenet_class_index[predicted_idx]
113 | 
114 | 
115 | model = get_model()
116 | imagenet_class_index = json.load(open('../imagenet_class_index.json'))
117 | 
118 | app = Flask(__name__)
119 | 
120 | 
121 | @app.route('/', methods=['GET', 'POST'])
122 | def upload_file():
123 |     if request.method == 'POST':
124 |         if 'file' not in request.files:
125 |             return redirect(request.url)
126 |         file = request.files.get('file')
127 |         if not file:
128 |             return
129 |         img_bytes = file.read()
130 |         class_id, class_name = get_prediction(image_bytes=img_bytes)
131 |         class_name = format_class_name(class_name)
132 |         return render_template('result.html', class_id=class_id,
133 |                                class_name=class_name)
134 |     return render_template('index.html')
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     app.run(debug=True, port=int(os.environ.get('PORT', 5000)))
139 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/static/pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/flask_examples/flask_example_imagenet/flask_imagenet_improved/static/pytorch.png


--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/static/style.css:
--------------------------------------------------------------------------------
 1 | html,
 2 | body {
 3 |   height: 100%;
 4 | }
 5 | 
 6 | body {
 7 |   display: -ms-flexbox;
 8 |   display: flex;
 9 |   -ms-flex-align: center;
10 |   align-items: center;
11 |   padding-top: 40px;
12 |   padding-bottom: 40px;
13 |   background-color: #f5f5f5;
14 | }
15 | 
16 | .form-signin {
17 |   width: 100%;
18 |   max-width: 330px;
19 |   padding: 15px;
20 |   margin: auto;
21 | }
22 | 
23 | .form-signin .form-control {
24 |   position: relative;
25 |   box-sizing: border-box;
26 |   height: auto;
27 |   padding: 10px;
28 |   font-size: 16px;
29 | }
30 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 6 |     <link rel="stylesheet" href="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/css/bootstrap.min.css" integrity="sha384-GJzZqFGwb1QTTN6wy59ffF1BuGJpLSa9DkKMp0DgiMDm4iYMj70gZWKYbI706tWS" crossorigin="anonymous">
 7 |     <style>
 8 |       .bd-placeholder-img {
 9 |         font-size: 1.125rem;
10 |         text-anchor: middle;
11 |       }
12 | 
13 |       @media (min-width: 768px) {
14 |         .bd-placeholder-img-lg {
15 |           font-size: 3.5rem;
16 |         }
17 |       }
18 |     </style>
19 |     <link rel="stylesheet" href="/static/style.css">
20 | 
21 |     <title>Image Prediction using PyTorch</title>
22 |   </head>
23 |   <body class="text-center">
24 |     <form class="form-signin" method=post enctype=multipart/form-data>
25 |         <img class="mb-4" src="/static/pytorch.png" alt="" width="72">
26 |         <h1 class="h3 mb-3 font-weight-normal">Upload any image</h1>
27 |         <input type="file" name="file" class="form-control-file" id="inputfile">
28 |         <br/>
29 |         <button class="btn btn-lg btn-primary btn-block" type="submit">Upload</button>
30 |         <p class="mt-5 mb-3 text-muted">Built using Pytorch, Flask and Love</p>
31 |     </form>
32 |     <script src="//code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
33 |     <script src="//cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.6/umd/popper.min.js" integrity="sha384-wHAiFfRlMFy6i5SRaxvfOCifBUQy1xHdJ/yoi7FRNXMRBu5WHdZYu1hA6ZOblgut" crossorigin="anonymous"></script>
34 |     <script src="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/js/bootstrap.min.js" integrity="sha384-B0UglyR+jN6CkvvICOB2joaf5I4l3gm9GU6Hc1og6Ls7i6U/mkkaduKaBhlAXv9k" crossorigin="anonymous"></script>
35 |     <script type="text/javascript">
36 |       $('#inputfile').bind('change', function() {
37 |           let fileSize = this.files[0].size/1024/1024; // this gives in MB
38 |           if (fileSize > 1) {
39 |             $("#inputfile").val(null);
40 |             alert('file is too big. images more than 1MB are not allowed')
41 |             return
42 |           }
43 | 
44 |           let ext = $('#inputfile').val().split('.').pop().toLowerCase();
45 |           if($.inArray(ext, ['jpg','jpeg']) == -1) {
46 |             $("#inputfile").val(null);
47 |             alert('only jpeg/jpg files are allowed!');
48 |           }
49 |       });
50 |     </script>
51 |     <!-- Github Ribbon Start-->
52 |     <a href="https://github.com/avinassh/pytorch-flask-api-heroku" class="github-corner"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#0E2E3B; color:#FFFFFF; position: absolute; top: 0; border: 0; right: 0;"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
53 |     <!-- Github Ribbon End-->
54 |   </body>
55 | </html>


--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/templates/result.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 6 |     <link rel="stylesheet" href="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/css/bootstrap.min.css" integrity="sha384-GJzZqFGwb1QTTN6wy59ffF1BuGJpLSa9DkKMp0DgiMDm4iYMj70gZWKYbI706tWS" crossorigin="anonymous">
 7 |     <style>
 8 |       .bd-placeholder-img {
 9 |         font-size: 1.125rem;
10 |         text-anchor: middle;
11 |       }
12 | 
13 |       @media (min-width: 768px) {
14 |         .bd-placeholder-img-lg {
15 |           font-size: 3.5rem;
16 |         }
17 |       }
18 |     </style>
19 |     <link rel="stylesheet" href="/static/style.css">
20 | 
21 |     <title>Image Prediction using PyTorch</title>
22 |   </head>
23 |   <body class="text-center">
24 |     <form class="form-signin" method=post enctype=multipart/form-data>
25 |         <img class="mb-4" src="/static/pytorch.png" alt="" width="72">
26 |         <h1 class="h3 mb-3 font-weight-normal">Prediction</h1>
27 |         <h5 class="h5 mb-3 font-weight-normal">Detected Image: {{ class_name }}</h5>
28 |         <h5 class="h6 mb-3 font-weight-normal">ImageNet Class ID: {{ class_id }}</h5>
29 |         <p class="mt-5 mb-3 text-muted">Built using Pytorch, Flask and Love</p>
30 |     </form>
31 |     <script src="//code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
32 |     <script src="//cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.6/umd/popper.min.js" integrity="sha384-wHAiFfRlMFy6i5SRaxvfOCifBUQy1xHdJ/yoi7FRNXMRBu5WHdZYu1hA6ZOblgut" crossorigin="anonymous"></script>
33 |     <script src="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/js/bootstrap.min.js" integrity="sha384-B0UglyR+jN6CkvvICOB2joaf5I4l3gm9GU6Hc1og6Ls7i6U/mkkaduKaBhlAXv9k" crossorigin="anonymous"></script>
34 |     <!-- Github Ribbon Start-->
35 |     <a href="https://github.com/avinassh/pytorch-flask-api-heroku" class="github-corner"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#0E2E3B; color:#FFFFFF; position: absolute; top: 0; border: 0; right: 0;"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
36 |     <!-- Github Ribbon End-->
37 |   </body>
38 | </html>


--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_service.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The MIT License (MIT)
  3 | 
  4 | Copyright (c) 2019 Avinash Sajjanshetty <hi@avi.im>
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  7 | this software and associated documentation files (the "Software"), to deal in
  8 | the Software without restriction, including without limitation the rights to
  9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 10 | the Software, and to permit persons to whom the Software is furnished to do so,
 11 | subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | 
 24 | # PyTorch Flask API
 25 | 
 26 | This repo contains a sample code to show how to create a Flask API server by deploying our PyTorch model. This is a sample code which goes with [tutorial](https://pytorch.org/tutorials/intermediate/flask_rest_api_tutorial.html).
 27 | 
 28 | If you'd like to learn how to deploy to Heroku, then check [this repo](https://github.com/avinassh/pytorch-flask-api-heroku).
 29 | 
 30 | 
 31 | ## How to 
 32 | 
 33 | Install the dependencies:
 34 | 
 35 |     pip install -r requirements.txt
 36 | 
 37 | requirements.txt should have:
 38 |     Flask==1.0.3
 39 |     torchvision==0.3.0
 40 | 
 41 | Run the Flask server:
 42 | 
 43 |     FLASK_ENV=development FLASK_APP=[YOUR-FILE-NAME].py flask run
 44 | 
 45 | 
 46 | From another tab, send the image file in a request:
 47 | 
 48 |     curl -X POST -F file=@cat_pic.jpeg http://localhost:5000/predict
 49 | 
 50 | 
 51 | ## License
 52 | 
 53 | The mighty MIT license. Please check `LICENSE` for more details.
 54 | 
 55 | """
 56 | 
 57 | import io
 58 | import json
 59 | 
 60 | from torchvision import models
 61 | import torchvision.transforms as transforms
 62 | from PIL import Image
 63 | from flask import Flask, jsonify, request
 64 | 
 65 | 
 66 | app = Flask(__name__)
 67 | imagenet_class_index = json.load(open('./imagenet_class_index.json'))
 68 | model = models.densenet121(pretrained=True)
 69 | model.eval()
 70 | 
 71 | 
 72 | def transform_image(image_bytes):
 73 |     my_transforms = transforms.Compose([transforms.Resize(255),
 74 |                                         transforms.CenterCrop(224),
 75 |                                         transforms.ToTensor(),
 76 |                                         transforms.Normalize(
 77 |                                             [0.485, 0.456, 0.406],
 78 |                                             [0.229, 0.224, 0.225])])
 79 |     image = Image.open(io.BytesIO(image_bytes))
 80 |     return my_transforms(image).unsqueeze(0)
 81 | 
 82 | 
 83 | def get_prediction(image_bytes):
 84 |     tensor = transform_image(image_bytes=image_bytes)
 85 |     outputs = model.forward(tensor)
 86 |     _, y_hat = outputs.max(1)
 87 |     predicted_idx = str(y_hat.item())
 88 |     return imagenet_class_index[predicted_idx]
 89 | 
 90 | 
 91 | @app.route('/predict', methods=['POST'])
 92 | def predict():
 93 |     if request.method == 'POST':
 94 |         file = request.files['file']
 95 |         img_bytes = file.read()
 96 |         class_id, class_name = get_prediction(image_bytes=img_bytes)
 97 |         return jsonify({'class_id': class_id, 'class_name': class_name})
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     app.run()
102 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/imagenet_class_index.json:
--------------------------------------------------------------------------------
1 | {"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]}


--------------------------------------------------------------------------------
/flask_examples/flask_example_recommendation/flask_recommendation_service.py:
--------------------------------------------------------------------------------
 1 | # load Flask
 2 | import flask
 3 | from recommend_pytorch_train import MF
 4 | from recommend_pytorch_inf import get_top_n, get_previously_seen
 5 | import torch
 6 | import pandas as pd
 7 | import surprise
 8 | import time
 9 | 
10 | 
11 | app = flask.Flask(__name__)
12 | 
13 | start_time = time.time()
14 | 
15 | # data preload
16 | data = surprise.Dataset.load_builtin('ml-1m')
17 | trainset = data.build_full_trainset()
18 | testset = trainset.build_anti_testset()
19 | movies_df = pd.read_csv('../data/ml-1m/movies.dat',
20 |                         sep="::", header=None, engine='python')
21 | movies_df.columns = ['iid', 'name', 'genre']
22 | movies_df.set_index('iid', inplace=True)
23 | 
24 | # model preload
25 | k = 30  # latent dimension
26 | c_bias = 1e-6
27 | c_vector = 1e-6
28 | model = MF(trainset.n_users, trainset.n_items,
29 |            k=k, c_bias=c_bias, c_vector=c_vector)
30 | model.load_state_dict(torch.load(
31 |     '../data/models/recommendation_model_pytorch.pkl'))  # TODO: prevent overwriting
32 | model.eval()
33 | 
34 | print('Model and data preloading completed in ', time.time()-start_time)
35 | 
36 | 
37 | @app.route("/", methods=["GET"])
38 | def recommend():
39 | 
40 |     data = {"success": False}
41 | 
42 |     if "uid" in flask.request.args:
43 | 
44 |         data['uid'] = str(flask.request.args['uid'])
45 | 
46 |         try:
47 |             data['seen'] = get_previously_seen(
48 |                 trainset, data['uid'], movies_df)
49 |             recommended = get_top_n(
50 |                 model, testset, trainset, data['uid'], movies_df, n=10)
51 |             print(recommended)
52 |             data['recommended'] = [x[1] for x in recommended]
53 |             data["success"] = True
54 |         except:
55 |             pass
56 | 
57 |     return flask.jsonify(data)
58 | 
59 | 
60 | # start the flask app, allow remote connections
61 | if __name__ == '__main__':
62 |     app.run(host='0.0.0.0')
63 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/flask_simple_regression_improved_service.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from flask import Flask, jsonify, request, render_template
 3 | 
 4 | 
 5 | def get_model(b, A):
 6 |     def line(x):
 7 |         return b * x + A
 8 |     return line
 9 | 
10 | 
11 | model_params = pickle.load(
12 |     open('/home/theja/teach/mlops-data/models/simple_regression.pkl', 'rb'))
13 | model = get_model(model_params[0], model_params[1])
14 | 
15 | 
16 | app = Flask(__name__)
17 | 
18 | 
19 | @app.route("/", methods=["GET", "POST"])
20 | def predict():
21 | 
22 |     if "x" in request.args:
23 |         try:
24 |             return jsonify({'input': request.args['x'], 'prediction': model(float(request.args['x']))})
25 |         except:
26 |             return jsonify({'success': 'false', 'message': 'Input x was not passed correctly.'})
27 |     elif request.method == 'POST':
28 |         result = {'x': request.form.get('x'), 'prediction': None}
29 |         try:
30 |             x = float(request.form['x'])
31 |             result['prediction'] = model(float(request.form['x']))
32 |         except:
33 |             pass
34 |         return render_template('result.html', result=result)
35 | 
36 |     return render_template('index.html')
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     app.run()
41 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/static/pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/flask_examples/flask_example_regression/flask_simple_regression_improved/static/pytorch.png


--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/static/style.css:
--------------------------------------------------------------------------------
 1 | html,
 2 | body {
 3 |   height: 100%;
 4 | }
 5 | 
 6 | body {
 7 |   display: -ms-flexbox;
 8 |   display: flex;
 9 |   -ms-flex-align: center;
10 |   align-items: center;
11 |   padding-top: 40px;
12 |   padding-bottom: 40px;
13 |   background-color: #f5f5f5;
14 | }
15 | 
16 | .form-signin {
17 |   width: 100%;
18 |   max-width: 330px;
19 |   padding: 15px;
20 |   margin: auto;
21 | }
22 | 
23 | .form-signin .form-control {
24 |   position: relative;
25 |   box-sizing: border-box;
26 |   height: auto;
27 |   padding: 10px;
28 |   font-size: 16px;
29 | }
30 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |   <meta charset="utf-8">
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
  7 |   <link rel="stylesheet" href="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/css/bootstrap.min.css"
  8 |     integrity="sha384-GJzZqFGwb1QTTN6wy59ffF1BuGJpLSa9DkKMp0DgiMDm4iYMj70gZWKYbI706tWS" crossorigin="anonymous">
  9 |   <style>
 10 |     .bd-placeholder-img {
 11 |       font-size: 1.125rem;
 12 |       text-anchor: middle;
 13 |     }
 14 | 
 15 |     @media (min-width: 768px) {
 16 |       .bd-placeholder-img-lg {
 17 |         font-size: 3.5rem;
 18 |       }
 19 |     }
 20 |   </style>
 21 |   <link rel="stylesheet" href="/static/style.css">
 22 | 
 23 |   <title>Simple Model Serving</title>
 24 | </head>
 25 | 
 26 | <body class="text-center">
 27 |   <form class="form-signin" method=post action="http://localhost:5000/">
 28 |     <h1 class="h3 mb-3 font-weight-normal">Insert the feature value</h1>
 29 |     <p>Feature val <input type="text" name="x" /></p>
 30 |     <br />
 31 |     <button class="btn btn-lg btn-primary btn-block" type="submit">Submit</button>
 32 |     <p class="mt-5 mb-3 text-muted">Simple Model Prediction Service</p>
 33 |   </form>
 34 |   <script src="//code.jquery.com/jquery-3.3.1.slim.min.js"
 35 |     integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"
 36 |     crossorigin="anonymous"></script>
 37 |   <script src="//cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.6/umd/popper.min.js"
 38 |     integrity="sha384-wHAiFfRlMFy6i5SRaxvfOCifBUQy1xHdJ/yoi7FRNXMRBu5WHdZYu1hA6ZOblgut"
 39 |     crossorigin="anonymous"></script>
 40 |   <script src="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/js/bootstrap.min.js"
 41 |     integrity="sha384-B0UglyR+jN6CkvvICOB2joaf5I4l3gm9GU6Hc1og6Ls7i6U/mkkaduKaBhlAXv9k"
 42 |     crossorigin="anonymous"></script>
 43 |   <script type="text/javascript">
 44 |     $('#inputfile').bind('change', function () {
 45 |       let fileSize = this.files[0].size / 1024 / 1024; // this gives in MB
 46 |       if (fileSize > 1) {
 47 |         $("#inputfile").val(null);
 48 |         alert('file is too big. images more than 1MB are not allowed')
 49 |         return
 50 |       }
 51 | 
 52 |       let ext = $('#inputfile').val().split('.').pop().toLowerCase();
 53 |       if ($.inArray(ext, ['jpg', 'jpeg']) == -1) {
 54 |         $("#inputfile").val(null);
 55 |         alert('only jpeg/jpg files are allowed!');
 56 |       }
 57 |     });
 58 |   </script>
 59 |   <!-- Github Ribbon Start-->
 60 |   <a href="https://github.com/avinassh/pytorch-flask-api-heroku" class="github-corner"><svg width="80" height="80"
 61 |       viewBox="0 0 250 250" style="fill:#0E2E3B; color:#FFFFFF; position: absolute; top: 0; border: 0; right: 0;">
 62 |       <path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path>
 63 |       <path
 64 |         d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2"
 65 |         fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path>
 66 |       <path
 67 |         d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z"
 68 |         fill="currentColor" class="octo-body"></path>
 69 |     </svg></a>
 70 |   <style>
 71 |     .github-corner:hover .octo-arm {
 72 |       animation: octocat-wave 560ms ease-in-out
 73 |     }
 74 | 
 75 |     @keyframes octocat-wave {
 76 | 
 77 |       0%,
 78 |       100% {
 79 |         transform: rotate(0)
 80 |       }
 81 | 
 82 |       20%,
 83 |       60% {
 84 |         transform: rotate(-25deg)
 85 |       }
 86 | 
 87 |       40%,
 88 |       80% {
 89 |         transform: rotate(10deg)
 90 |       }
 91 |     }
 92 | 
 93 |     @media (max-width:500px) {
 94 |       .github-corner:hover .octo-arm {
 95 |         animation: none
 96 |       }
 97 | 
 98 |       .github-corner .octo-arm {
 99 |         animation: octocat-wave 560ms ease-in-out
100 |       }
101 |     }
102 |   </style>
103 |   <!-- Github Ribbon End-->
104 | </body>
105 | 
106 | </html>


--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/templates/result.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="utf-8">
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 7 |   <link rel="stylesheet" href="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/css/bootstrap.min.css"
 8 |     integrity="sha384-GJzZqFGwb1QTTN6wy59ffF1BuGJpLSa9DkKMp0DgiMDm4iYMj70gZWKYbI706tWS" crossorigin="anonymous">
 9 |   <style>
10 |     .bd-placeholder-img {
11 |       font-size: 1.125rem;
12 |       text-anchor: middle;
13 |     }
14 | 
15 |     @media (min-width: 768px) {
16 |       .bd-placeholder-img-lg {
17 |         font-size: 3.5rem;
18 |       }
19 |     }
20 |   </style>
21 |   <link rel="stylesheet" href="/static/style.css">
22 | 
23 |   <title>Prediction</title>
24 | </head>
25 | 
26 | <body class="text-center">
27 | 
28 |   <form class="form-signin" method=post enctype=multipart/form-data>
29 |     <h1 class="h3 mb-3 font-weight-normal">Prediction</h1>
30 |     <h5 class="h5 mb-3 font-weight-normal">Input: {{ result['x']}}</h5>
31 |     <h5 class="h6 mb-3 font-weight-normal">Prediction: {{ result['prediction'] }}</h5>
32 |     <p class="mt-5 mb-3 text-muted">Simple Prediction Service</p>
33 |   </form>
34 |   <script src="//code.jquery.com/jquery-3.3.1.slim.min.js"
35 |     integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"
36 |     crossorigin="anonymous"></script>
37 |   <script src="//cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.6/umd/popper.min.js"
38 |     integrity="sha384-wHAiFfRlMFy6i5SRaxvfOCifBUQy1xHdJ/yoi7FRNXMRBu5WHdZYu1hA6ZOblgut"
39 |     crossorigin="anonymous"></script>
40 |   <script src="//stackpath.bootstrapcdn.com/bootstrap/4.2.1/js/bootstrap.min.js"
41 |     integrity="sha384-B0UglyR+jN6CkvvICOB2joaf5I4l3gm9GU6Hc1og6Ls7i6U/mkkaduKaBhlAXv9k"
42 |     crossorigin="anonymous"></script>
43 |   <!-- Github Ribbon Start-->
44 |   <a href="https://github.com/avinassh/pytorch-flask-api-heroku" class="github-corner"><svg width="80" height="80"
45 |       viewBox="0 0 250 250" style="fill:#0E2E3B; color:#FFFFFF; position: absolute; top: 0; border: 0; right: 0;">
46 |       <path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path>
47 |       <path
48 |         d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2"
49 |         fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path>
50 |       <path
51 |         d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z"
52 |         fill="currentColor" class="octo-body"></path>
53 |     </svg></a>
54 |   <style>
55 |     .github-corner:hover .octo-arm {
56 |       animation: octocat-wave 560ms ease-in-out
57 |     }
58 | 
59 |     @keyframes octocat-wave {
60 | 
61 |       0%,
62 |       100% {
63 |         transform: rotate(0)
64 |       }
65 | 
66 |       20%,
67 |       60% {
68 |         transform: rotate(-25deg)
69 |       }
70 | 
71 |       40%,
72 |       80% {
73 |         transform: rotate(10deg)
74 |       }
75 |     }
76 | 
77 |     @media (max-width:500px) {
78 |       .github-corner:hover .octo-arm {
79 |         animation: none
80 |       }
81 | 
82 |       .github-corner .octo-arm {
83 |         animation: octocat-wave 560ms ease-in-out
84 |       }
85 |     }
86 |   </style>
87 |   <!-- Github Ribbon End-->
88 | </body>
89 | 
90 | </html>


--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_service.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, jsonify, request
 2 | import flask
 3 | import os
 4 | 
 5 | def model(x):
 6 |     return 2*x+1
 7 | 
 8 | app = Flask(__name__)
 9 | 
10 | @app.route("/", methods=["GET"])
11 | def hello():
12 |     return f"flask version: {flask.__version__}"
13 | 
14 | """
15 | Endpoint to make a prediction based on the input parameter 'x'.
16 | 
17 | This function handles GET requests to the /mypredict route. It checks if the 
18 | 'x' parameter is present in the request arguments. If 'x' is present, it attempts 
19 | to convert 'x' to a float and pass it to the model for prediction. The result is 
20 | returned as a JSON response containing the input and the prediction. If 'x' is 
21 | not present or an error occurs during processing, an error message is returned.
22 | 
23 | Returns:
24 |     Response: A JSON response containing the input and prediction if successful, 
25 |               or an error message if 'x' is not provided or an error occurs.
26 | """
27 | @app.route("/mypredict", methods=["GET"])
28 | def predict():
29 |     # check if x is in the arguments
30 |     if "x" in request.args:
31 |         try:
32 |             return jsonify({'input': request.args['x'],
33 |                              'prediction': model(float(request.args['x']))})
34 |         except:
35 |             pass
36 | 
37 |     return jsonify({'success': 'false', 'message': 'Input x was not passed.'})
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     host = os.getenv('FLASK_RUN_HOST', '127.0.0.1')
42 |     port = int(os.getenv('FLASK_RUN_PORT', 5000))
43 | 
44 |     app.run(host=host, port=port)
45 | 


--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/requirements.txt:
--------------------------------------------------------------------------------
1 | flask==3.1.0


--------------------------------------------------------------------------------
/flask_examples/flask_example_weather/flask_weather_service.py:
--------------------------------------------------------------------------------
 1 | # load Flask
 2 | import flask
 3 | import requests
 4 | from flask import jsonify
 5 | from geopy.geocoders import Nominatim
 6 | 
 7 | app = flask.Flask(__name__)
 8 | 
 9 | # define a predict function as an endpoint
10 | @app.route("/", methods=["GET", "POST"])
11 | def weather():
12 | 
13 |     data = {"success": False}
14 |     # https://pypi.org/project/geopy/
15 |     geolocator = Nominatim(user_agent="cloud_function_weather_app")
16 | 
17 |     # Works with post req:
18 |     # curl -i -H "Content-Type: application/json" -X POST -d "{\"msg\":\"Chicago\"}" localhost:5000
19 |     # params = flask.request.json
20 |     # if params is None:
21 |     #     params = flask.request.args
22 | 
23 |     if flask.request.is_json:
24 |         params = flask.request.json
25 |     else:
26 |         params = flask.request.args
27 | 
28 |     # params = request.get_json()
29 |     if "msg" in params:
30 |         location = geolocator.geocode(str(params["msg"]))
31 |         data["location"] = [
32 |             location.address,
33 |             location.latitude,
34 |             location.longitude,
35 |             location.altitude,
36 |         ]
37 |         # https://www.weather.gov/documentation/services-web-api
38 |         try:
39 |             result1 = requests.get(
40 |                 f"https://api.weather.gov/points/{location.latitude},{location.longitude}"
41 |             )
42 |             result2 = requests.get(f"{result1.json()['properties']['forecast']}")
43 |             data["response"] = result2.json()
44 |             data["success"] = True
45 |         except:
46 |             pass
47 |     return jsonify(data)
48 | 
49 | 
50 | # start the flask app, allow remote connections
51 | if __name__ == "__main__":
52 |     app.run(host="0.0.0.0")
53 | 


--------------------------------------------------------------------------------
/github_actions_example/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - name: Checkout code
17 |         uses: actions/checkout@v2
18 | 
19 |       - name: Set up Node.js
20 |         uses: actions/setup-node@v2
21 |         with:
22 |           node-version: '14'
23 | 
24 |       - name: Install dependencies
25 |         run: npm install
26 | 
27 |       - name: Run tests
28 |         run: npm test
29 | 
30 |       - name: Build
31 |         run: npm run build


--------------------------------------------------------------------------------
/github_actions_example/README.md:
--------------------------------------------------------------------------------
 1 | # My Project
 2 | 
 3 | This project is a simple JavaScript application that serves as an entry point for demonstrating a GitHub Actions workflow.
 4 | 
 5 | ## Purpose
 6 | 
 7 | The purpose of this project is to showcase how to set up a continuous integration pipeline using GitHub Actions.
 8 | 
 9 | ## Setup
10 | 
11 | To set up this project locally, follow these steps:
12 | 
13 | 1. Clone the repository:
14 |    ```
15 |    git clone https://github.com/yourusername/my-project.git
16 |    ```
17 | 
18 | 2. Navigate into the project directory:
19 |    ```
20 |    cd my-project
21 |    ```
22 | 
23 | 3. Install the necessary dependencies (if any):
24 |    ```
25 |    npm install
26 |    ```
27 | 
28 | 4. Run the application:
29 |    ```
30 |    node src/index.js
31 |    ```
32 | 
33 | ## GitHub Actions
34 | 
35 | This project includes a GitHub Actions workflow defined in `.github/workflows/ci.yml` that runs on specified events to ensure code quality and functionality.


--------------------------------------------------------------------------------
/github_actions_example/src/index.js:
--------------------------------------------------------------------------------
1 | console.log("Hello, World! This is the entry point of my project.");


--------------------------------------------------------------------------------
/kafka_example/client_notebooks/consumer_local_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "id": "b152a484-14df-4835-a31c-36e806ed917f",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from confluent_kafka import Consumer\n",
 11 |     "\n",
 12 |     "conf = {'bootstrap.servers': \"localhost:9092\",\n",
 13 |     "        'group.id': \"foo\",\n",
 14 |     "        'enable.auto.commit': False,\n",
 15 |     "        'auto.offset.reset': 'earliest'}\n",
 16 |     "\n",
 17 |     "consumer = Consumer(conf)"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 7,
 23 |    "id": "fe58d7c8-586f-4437-b4a7-6dbc40be9798",
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "consumer.subscribe([\"mlops-topic\"])"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 14,
 33 |    "id": "8fb97686-4c56-4c73-a359-bc344576ea2e",
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "msg = consumer.poll(timeout=1.0)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 15,
 43 |    "id": "1327eec3-e411-4749-b666-698381a3f3f8",
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "data": {
 48 |       "text/plain": [
 49 |        "b'this is mlops course 3'"
 50 |       ]
 51 |      },
 52 |      "execution_count": 15,
 53 |      "metadata": {},
 54 |      "output_type": "execute_result"
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "msg.value()"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 5,
 64 |    "id": "f9008409-40f4-4466-8462-6de38feb36e7",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "consumer.close()"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "id": "357d6527-f084-4823-9962-77c3a5965785",
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": []
 78 |   }
 79 |  ],
 80 |  "metadata": {
 81 |   "kernelspec": {
 82 |    "display_name": "Python 3",
 83 |    "language": "python",
 84 |    "name": "python3"
 85 |   },
 86 |   "language_info": {
 87 |    "codemirror_mode": {
 88 |     "name": "ipython",
 89 |     "version": 3
 90 |    },
 91 |    "file_extension": ".py",
 92 |    "mimetype": "text/x-python",
 93 |    "name": "python",
 94 |    "nbconvert_exporter": "python",
 95 |    "pygments_lexer": "ipython3",
 96 |    "version": "3.8.2"
 97 |   }
 98 |  },
 99 |  "nbformat": 4,
100 |  "nbformat_minor": 5
101 | }
102 | 


--------------------------------------------------------------------------------
/kafka_example/client_notebooks/producer_local_example.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 4,
 6 |    "id": "18669f83-1d05-4af8-b2c0-12ed1d0920d6",
 7 |    "metadata": {},
 8 |    "outputs": [],
 9 |    "source": [
10 |     "from confluent_kafka import Producer\n",
11 |     "import socket\n",
12 |     "\n",
13 |     "conf = {'bootstrap.servers': \"localhost:9092\",\n",
14 |     "        'client.id': socket.gethostname()}\n",
15 |     "\n",
16 |     "producer = Producer(conf)"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": 5,
22 |    "id": "c4fd9b3b-1bf4-42c8-93aa-282a750262f7",
23 |    "metadata": {},
24 |    "outputs": [],
25 |    "source": [
26 |     "producer.produce(\"mlops-topic\", key=\"key\", value=\"this is mlops course\")"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": 6,
32 |    "id": "ed58b93e-628d-4101-99c0-12f38d491c24",
33 |    "metadata": {},
34 |    "outputs": [],
35 |    "source": [
36 |     "producer.produce(\"mlops-topic\", key=\"key\", value=\"this is mlops course 2\")"
37 |    ]
38 |   },
39 |   {
40 |    "cell_type": "code",
41 |    "execution_count": 7,
42 |    "id": "f499c9a1-b1c5-41d6-b90d-0114383ef86d",
43 |    "metadata": {},
44 |    "outputs": [],
45 |    "source": [
46 |     "producer.produce(\"mlops-topic\", key=\"key\", value=\"this is mlops course 3\")"
47 |    ]
48 |   },
49 |   {
50 |    "cell_type": "code",
51 |    "execution_count": null,
52 |    "id": "27022fab-30a8-47f7-b78d-f8b43cb9533a",
53 |    "metadata": {},
54 |    "outputs": [],
55 |    "source": []
56 |   }
57 |  ],
58 |  "metadata": {
59 |   "kernelspec": {
60 |    "display_name": "Python 3",
61 |    "language": "python",
62 |    "name": "python3"
63 |   },
64 |   "language_info": {
65 |    "codemirror_mode": {
66 |     "name": "ipython",
67 |     "version": 3
68 |    },
69 |    "file_extension": ".py",
70 |    "mimetype": "text/x-python",
71 |    "name": "python",
72 |    "nbconvert_exporter": "python",
73 |    "pygments_lexer": "ipython3",
74 |    "version": "3.8.2"
75 |   }
76 |  },
77 |  "nbformat": 4,
78 |  "nbformat_minor": 5
79 | }
80 | 


--------------------------------------------------------------------------------
/kafka_example/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   zookeeper:
 5 |     image: 'confluentinc/cp-zookeeper:latest'
 6 |     environment:
 7 |       ZOOKEEPER_CLIENT_PORT: 2181
 8 |       ZOOKEEPER_TICK_TIME: 2000
 9 |     ports:
10 |       - "2181:2181"
11 |     networks:
12 |       - kafka-net
13 |     volumes:
14 |       - zookeeper_data:/var/lib/zookeeper/data
15 |       - zookeeper_log:/var/lib/zookeeper/log
16 | 
17 |   kafka:
18 |     image: 'confluentinc/cp-kafka:latest'
19 |     depends_on:
20 |       - zookeeper
21 |     environment:
22 |       KAFKA_BROKER_ID: 1
23 |       KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
24 |       KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
25 |       KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
26 |       KAFKA_LOG_DIRS: /var/lib/kafka/data
27 |     ports:
28 |       - "9092:9092"
29 |     networks:
30 |       - kafka-net
31 |     volumes:
32 |       - kafka_data:/var/lib/kafka/data
33 | 
34 | networks:
35 |   kafka-net:
36 |     driver: bridge
37 | 
38 | volumes:
39 |   zookeeper_data:
40 |   zookeeper_log:
41 |   kafka_data:


--------------------------------------------------------------------------------
/kafka_example/readme.md:
--------------------------------------------------------------------------------
 1 | # Kafka with Docker and Zookeeper
 2 | 
 3 | This repository demonstrates how to set up Apache Kafka with Zookeeper using Docker and Docker Compose.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | Ensure you have the following installed on your machine:
 8 | - [Docker](https://www.docker.com/products/docker-desktop)
 9 | - [Docker Compose](https://docs.docker.com/compose/install/)
10 | 
11 | ## Setup Instructions
12 | 
13 | 1. **Clone the repository** using git clone and navigate to kafka example directory.
14 | 
15 | 2. **Start the Services**:
16 | 
17 |     Run the following command to bring up Kafka and Zookeeper:
18 | 
19 |     ```bash
20 |     docker-compose up -d
21 |     ```
22 | 
23 |     This will start the Kafka broker on port `9092` and Zookeeper on port `2181`.
24 | 
25 | 3. **Verify Kafka Setup**:
26 | 
27 |     - To **list the topics**, use:
28 | 
29 |       ```bash
30 |       docker exec -it <kafka_container_name> kafka-topics --bootstrap-server localhost:9092 --list
31 |       ```
32 | 
33 |     - To **create a new topic**, use:
34 | 
35 |       ```bash
36 |       docker exec -it <kafka_container_name> kafka-topics --bootstrap-server localhost:9092 --create --topic test-topic --partitions 1 --replication-factor 1
37 |       ```
38 | 
39 |       Replace `<kafka_container_name>` with the actual name of the Kafka container. You can find the container name by running:
40 | 
41 |       ```bash
42 |       docker ps
43 |       ```
44 | 
45 | 4. **Kafka CLI Examples**:
46 | 
47 |     - **Producing messages** to a topic:
48 | 
49 |       ```bash
50 |       docker exec -it <kafka_container_name> kafka-console-producer --bootstrap-server localhost:9092 --topic test-topic
51 |       ```
52 | 
53 |       Type your message and hit Enter to send it.
54 | 
55 |     - **Consuming messages** from a topic:
56 | 
57 |       ```bash
58 |       docker exec -it <kafka_container_name> kafka-console-consumer --bootstrap-server localhost:9092 --topic test-topic --from-beginning
59 |       ```
60 | 
61 | ## Useful Commands
62 | 
63 | - **Stop services**:
64 | 
65 |     ```bash
66 |     docker-compose down
67 |     ```
68 | 
69 | - **Restart services**:
70 | 
71 |     ```bash
72 |     docker-compose restart
73 |     ```
74 | 
75 | ## Troubleshooting
76 | 
77 | - If Kafka fails to start, ensure no other service is using port `9092` or `2181`.
78 | - You can check logs using:
79 | 
80 |     ```bash
81 |     docker-compose logs
82 |     ```
83 | 
84 | ## License
85 | 
86 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
87 | 


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_imperative/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_imperative/.weather_pod_additional.txt.swp


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_imperative/echo_server_imperative_example.txt:
--------------------------------------------------------------------------------
 1 | # Step 1: Deploy a echo server, which just responds back with whatever was sent to it
 2 | 
 3 | kubectl create deployment hello-minikube --image=k8s.gcr.io/echoserver:1.4
 4 | 
 5 | # Step 2: Expost the port via NodePort spec
 6 | 
 7 | kubectl expose deployment hello-minikube --type=NodePort --port=8080
 8 | 
 9 | # Step 3: Pick the port from the result returned from running the above command (it is the latter higher number)
10 | 
11 | kubectl get services hello-minikube
12 | 
13 | #Step 4: Get the IP of the cluster
14 | 
15 | minikube ip
16 | 
17 | 
18 | # Step 5: Open browser with the ip:port or use curl (with an aexample ip and port as shown below)
19 | 
20 | curl -X POST http://192.168.99.101:31313 \
21 |    -H "Content-Type: application/json" \
22 |    -d '{"productId": 123456, "quantity": 100}'  


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_imperative/minikube.sh:
--------------------------------------------------------------------------------
1 | minikube start \
2 |     --addons="dashboard" \
3 |     --addons="metrics-server"
4 | 


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_nginx_replica/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_nginx_replica/.weather_pod_additional.txt.swp


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_nginx_replica/nginx_example.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app: nginx-replica-example
 7 |   name: nginx-replica-example
 8 | spec:
 9 |   replicas: 3
10 |   selector:
11 |     matchLabels:
12 |       app: nginx-replica-example
13 |   strategy: {}
14 |   template:
15 |     metadata:
16 |       creationTimestamp: null
17 |       labels:
18 |         app: nginx-replica-example
19 |     spec:
20 |       containers:
21 |       - image: nginx:1.18.0
22 |         name: nginx-replica-example-k8s
23 |         resources: {}
24 | status: {}


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_nginx_replica/nginx_example_additional.md:
--------------------------------------------------------------------------------
 1 | ## Before running kubectl apply:
 2 | 
 3 |  - Change to minikube's docker thats on the master using
 4 |         minikube docker-env
 5 |         eval $(minikube -p minikube docker-env)
 6 |  - Build the image for this docker runtime (go into the docker_example folder and then) using
 7 |         docker build -t minikube_weather .
 8 | 
 9 | ## Running kubectl apply:
10 | 	
11 | 	kubectl apply -f weather_pod_example.yaml
12 | 
13 | ## After running kubectl apply:
14 | 
15 |  - Expose the container to the world
16 | 		kubectl expose pod test --type=NodePort --port=5000
17 | 
18 |  - Make an example request (find the cluster's IP and port as in the imperative example)
19 |  		curl http://192.168.99.101:30325?msg=Chicago


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_recommendations_pod/recommendation_pod_example.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: recommendations-pod
 5 |   labels:
 6 |     ml: pytorch
 7 | spec:
 8 |   containers:
 9 |     - name: recommendation-service
10 |       image: recommendations:latest
11 |       imagePullPolicy: Never
12 | 


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_deployment/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_weather_deployment/.weather_pod_additional.txt.swp


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_deployment/weather_deployment_example.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app: weather-minikube
 7 |   name: weather-deployment
 8 | spec:
 9 |   replicas: 1
10 |   selector:
11 |     matchLabels:
12 |       app: weather-minikube
13 |   strategy: {}
14 |   template:
15 |     metadata:
16 |       creationTimestamp: null
17 |       labels:
18 |         app: weather-minikube
19 |     spec:
20 |       containers:
21 |       - image: minikube_weather:latest
22 |         name: weather-service-k8s
23 |         resources: {}
24 |         imagePullPolicy: Never
25 | status: {}


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_deployment/weather_deployment_example_additional.md:
--------------------------------------------------------------------------------
 1 | ## Before running kubectl apply:
 2 | 
 3 |  - Change to minikube's docker thats on the master using
 4 |         minikube docker-env
 5 |         eval $(minikube -p minikube docker-env)
 6 |  - Build the image for this docker runtime (go into the docker_example folder and then) using
 7 |         docker build -t minikube_weather .
 8 | 
 9 | ## Running kubectl apply:
10 | 	
11 | 	kubectl apply -f weather_pod_example.yaml
12 | 
13 | ## After running kubectl apply:
14 | 
15 |  - Expose the container to the world
16 | 		kubectl expose pod test --type=NodePort --port=5000
17 | 
18 |  - Make an example request (find the cluster's IP and port as in the imperative example)
19 |  		curl http://192.168.99.101:30325?msg=Chicago


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_pod/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_weather_pod/.weather_pod_additional.txt.swp


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_pod/weather_pod_example.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: weather-pod
 5 |   labels:
 6 |     site: myhomepage
 7 | spec:
 8 |   containers:
 9 |     - name: test
10 |       image: minikube_weather:latest
11 |       imagePullPolicy: Never


--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_pod/weather_pod_example_additional.md:
--------------------------------------------------------------------------------
 1 | ## Before running kubectl apply:
 2 | 
 3 |  - Change to minikube's docker thats on the master using
 4 |         minikube docker-env
 5 |         eval $(minikube -p minikube docker-env)
 6 |  - Build the image for this docker runtime (go into the docker_example folder and then) using
 7 |         docker build -t minikube_weather .
 8 | 
 9 | ## Running kubectl apply:
10 | 	
11 | 	kubectl apply -f weather_pod_example.yaml
12 | 
13 | ## After running kubectl apply:
14 | 
15 |  - Expose the container to the world
16 | 		kubectl expose pod test --type=NodePort --port=5000
17 | 
18 |  - Make an example request (find the cluster's IP and port as in the imperative example)
19 |  		curl http://192.168.99.101:30325?msg=Chicago


--------------------------------------------------------------------------------
/lambda_function_example/lambda_function.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | # top_n = {'196':[(1,3),(2,4)]}
 4 | # movie_dict = {1:{'name':'a'},2:{'name':'b'}}
 5 | 
 6 | def lambda_handler(event,context):
 7 |     data = {"success": False}
 8 | 
 9 | 
10 |     with open("top_n.json", "r") as read_file:
11 |         top_n = json.load(read_file)
12 |     with open("movie_dict.json", "r") as read_file:
13 |         movie_dict = json.load(read_file)
14 | 
15 | 
16 |     print(event) #debug
17 |     if "body" in event:
18 |         event = event["body"]
19 |         if event is not None:
20 |             event = json.loads(event)
21 |         else:
22 |             event = {}
23 | 
24 |     if "uid" in event: 
25 |         data["response"] = str([movie_dict.get(iid,{'name':None})['name'] for (iid, _) in top_n[event.get("uid")]])
26 |         data["success"] = True
27 | 
28 |     return {
29 |         'statusCode': 200,
30 |         'headers':{'Content-Type':'application/json'},
31 |         'body': json.dumps(data)
32 |     } 


--------------------------------------------------------------------------------
/mlflow_example/mlflow_example.py:
--------------------------------------------------------------------------------
 1 | import mlflow
 2 | from mlflow.models import infer_signature
 3 | from sklearn import datasets
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.linear_model import LogisticRegression
 6 | from sklearn.metrics import accuracy_score
 7 | 
 8 | 
 9 | # Load the Iris dataset
10 | X, y = datasets.load_iris(return_X_y=True)
11 | 
12 | # Split the data into training and test sets
13 | X_train, X_test, y_train, y_test = train_test_split(
14 |     X, y, test_size=0.2, random_state=42
15 | )
16 | 
17 | # Define the model hyperparameters
18 | params = {
19 |     "solver": "lbfgs",
20 |     "max_iter": 10,
21 |     "multi_class": "auto",
22 |     "random_state": 8888,
23 | }
24 | 
25 | # Train the model
26 | lr = LogisticRegression(**params)
27 | lr.fit(X_train, y_train)
28 | 
29 | # Predict on the test set
30 | y_pred = lr.predict(X_test)
31 | 
32 | # Calculate metrics
33 | accuracy = accuracy_score(y_test, y_pred)
34 | 
35 | 
36 | # Set our tracking server uri for logging
37 | mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")
38 | 
39 | # Create a new MLflow Experiment
40 | mlflow.set_experiment("MLops Course")
41 | 
42 | # Start an MLflow run
43 | with mlflow.start_run():
44 |     # Log the hyperparameters
45 |     mlflow.log_params(params)
46 | 
47 |     # Log the loss metric
48 |     mlflow.log_metric("accuracy", accuracy)
49 | 
50 |     # Set a tag that we can use to remind ourselves what this run was for
51 |     mlflow.set_tag("Training Info", "Basic LR model for iris data")
52 | 
53 |     # Infer the model signature
54 |     signature = infer_signature(X_train, lr.predict(X_train))
55 | 
56 |     # Log the model
57 |     model_info = mlflow.sklearn.log_model(
58 |         sk_model=lr,
59 |         artifact_path="iris_model",
60 |         signature=signature,
61 |         input_example=X_train,
62 |         registered_model_name="tracking-quickstart",
63 |     )
64 | 


--------------------------------------------------------------------------------
/mlflow_example/requirements.txt:
--------------------------------------------------------------------------------
1 | mlflow==2.20.1
2 | 


--------------------------------------------------------------------------------
/model_example_regression/simple_regression_inf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | def get_model(b, A):
 7 |     def line(x):
 8 |         return b * x + A
 9 |     return line
10 | 
11 | 
12 | if __name__ == "__main__":
13 | 
14 |     model_params = pickle.load(
15 |         open('../data/models/simple_regression.pkl', 'rb'))
16 |     model = get_model(model_params[0], model_params[1])
17 | 
18 |     X = np.linspace(start=-1, stop=1, num=50)
19 |     Ypred = [model(x) for x in X]
20 |     plt.plot(X, Ypred)
21 |     plt.title('Simple regression.')
22 |     plt.ylabel('y predicted values')
23 |     plt.xlabel('x values')
24 |     plt.show()
25 | 


--------------------------------------------------------------------------------
/model_example_regression/simple_regression_train.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | import pickle
 4 | 
 5 | 
 6 | def fit(X, Y):
 7 |     """
 8 |     From https://code.activestate.com/recipes/578914-simple-linear-regression-with-pure-python/
 9 |     """
10 | 
11 |     def mean(Xs):
12 |         return sum(Xs) / len(Xs)
13 | 
14 |     m_X = mean(X)
15 |     m_Y = mean(Y)
16 | 
17 |     def std(Xs, m):
18 |         normalizer = len(Xs) - 1
19 |         return math.sqrt(sum((pow(x - m, 2) for x in Xs)) / normalizer)
20 | 
21 |     def pearson_r(Xs, Ys):
22 | 
23 |         sum_xy = 0
24 |         sum_sq_v_x = 0
25 |         sum_sq_v_y = 0
26 | 
27 |         for (x, y) in zip(Xs, Ys):
28 |             var_x = x - m_X
29 |             var_y = y - m_Y
30 |             sum_xy += var_x * var_y
31 |             sum_sq_v_x += pow(var_x, 2)
32 |             sum_sq_v_y += pow(var_y, 2)
33 |         return sum_xy / math.sqrt(sum_sq_v_x * sum_sq_v_y)
34 | 
35 |     r = pearson_r(X, Y)
36 | 
37 |     b = r * (std(Y, m_Y) / std(X, m_X))
38 |     A = m_Y - b * m_X
39 | 
40 |     def line(x):
41 |         return b * x + A
42 | 
43 |     return line, [b, A]
44 | 
45 | 
46 | if __name__ == "__main__":
47 | 
48 |     X = np.array([1, 2, 3, 5, 22, -10])
49 |     Y = 2.5*X + 3  # y = 1 * x_0 + 2 * x_1 + 3
50 | 
51 |     model, model_params = fit(X, Y)
52 |     print('2', model(2))
53 |     print('-1', model(-1))
54 |     print('0', model(0))
55 | 
56 |     pickle.dump(model_params, open(
57 |         '../data/models/simple_regression.pkl', 'wb'))
58 | 


--------------------------------------------------------------------------------
/pyspark_example/app/example.py:
--------------------------------------------------------------------------------
 1 | # your_script.py
 2 | from pyspark.sql import SparkSession
 3 | 
 4 | # Create a Spark session
 5 | spark = SparkSession.builder.appName("PySpark Example").getOrCreate()
 6 | 
 7 | # Create a simple DataFrame
 8 | data = [("John", 30), ("Jane", 25), ("Sam", 35)]
 9 | df = spark.createDataFrame(data, ["Name", "Age"])
10 | 
11 | # Show the DataFrame
12 | df.show()
13 | 
14 | # Stop the Spark session
15 | spark.stop()


--------------------------------------------------------------------------------
/pyspark_example/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   spark-master:
 4 |     image: bitnami/spark:latest
 5 |     container_name: spark-master
 6 |     environment:
 7 |       - SPARK_MODE=master
 8 |       - SPARK_RPC_AUTHENTICATION_ENABLED=no
 9 |       - SPARK_RPC_ENCRYPTION_ENABLED=no
10 |       - SPARK_SSL_ENABLED=no
11 |     ports:
12 |       - "8086:8080"
13 |       - "7077:7077"
14 | 
15 |   spark-worker-1:
16 |     image: bitnami/spark:latest
17 |     container_name: spark-worker-1
18 |     environment:
19 |       - SPARK_MODE=worker
20 |       - SPARK_MASTER_URL=spark://spark-master:7077
21 |       - SPARK_WORKER_MEMORY=1G
22 |       - SPARK_WORKER_CORES=1
23 |     depends_on:
24 |       - spark-master
25 |     ports:
26 |       - "8081:8081"
27 | 
28 |   spark-worker-2:
29 |     image: bitnami/spark:latest
30 |     container_name: spark-worker-2
31 |     environment:
32 |       - SPARK_MODE=worker
33 |       - SPARK_MASTER_URL=spark://spark-master:7077
34 |       - SPARK_WORKER_MEMORY=1G
35 |       - SPARK_WORKER_CORES=1
36 |     depends_on:
37 |       - spark-master
38 |     ports:
39 |       - "8082:8082"
40 | 
41 |   spark-pyspark:
42 |     image: bitnami/spark:latest
43 |     container_name: spark-pyspark
44 |     environment:
45 |       - SPARK_MODE=client
46 |     depends_on:
47 |       - spark-master
48 |     volumes:
49 |       - ./app:/app
50 |     command: "spark-submit --master spark://spark-master:7077 /app/example.py"
51 | 


--------------------------------------------------------------------------------
/pyspark_example/readme.md:
--------------------------------------------------------------------------------
  1 | # Apache Spark Cluster with PySpark using Docker
  2 | 
  3 | This project demonstrates how to set up an Apache Spark cluster with PySpark using Docker and Docker Compose. The setup includes a Spark master node, two Spark worker nodes, and a PySpark client that can submit jobs to the cluster.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | Ensure you have the following installed on your machine:
  8 | - [Docker](https://www.docker.com/products/docker-desktop)
  9 | - [Docker Compose](https://docs.docker.com/compose/install/)
 10 | 
 11 | ## Setup Instructions
 12 | 
 13 | 1. **Clone this repository** or create the following `docker-compose.yml` file in your project directory:
 14 | 
 15 |     ```yaml
 16 |     version: '3'
 17 |     services:
 18 |       spark-master:
 19 |         image: bitnami/spark:latest
 20 |         container_name: spark-master
 21 |         environment:
 22 |           - SPARK_MODE=master
 23 |           - SPARK_RPC_AUTHENTICATION_ENABLED=no
 24 |           - SPARK_RPC_ENCRYPTION_ENABLED=no
 25 |           - SPARK_SSL_ENABLED=no
 26 |         ports:
 27 |           - "8080:8080"
 28 |           - "7077:7077"
 29 | 
 30 |       spark-worker-1:
 31 |         image: bitnami/spark:latest
 32 |         container_name: spark-worker-1
 33 |         environment:
 34 |           - SPARK_MODE=worker
 35 |           - SPARK_MASTER_URL=spark://spark-master:7077
 36 |           - SPARK_WORKER_MEMORY=1G
 37 |           - SPARK_WORKER_CORES=1
 38 |         depends_on:
 39 |           - spark-master
 40 |         ports:
 41 |           - "8081:8081"
 42 | 
 43 |       spark-worker-2:
 44 |         image: bitnami/spark:latest
 45 |         container_name: spark-worker-2
 46 |         environment:
 47 |           - SPARK_MODE=worker
 48 |           - SPARK_MASTER_URL=spark://spark-master:7077
 49 |           - SPARK_WORKER_MEMORY=1G
 50 |           - SPARK_WORKER_CORES=1
 51 |         depends_on:
 52 |           - spark-master
 53 |         ports:
 54 |           - "8082:8082"
 55 | 
 56 |       spark-pyspark:
 57 |         image: bitnami/spark:latest
 58 |         container_name: spark-pyspark
 59 |         environment:
 60 |           - SPARK_MODE=client
 61 |         depends_on:
 62 |           - spark-master
 63 |         volumes:
 64 |           - ./app:/app
 65 |         command: "spark-submit --master spark://spark-master:7077 /app/example.py"
 66 |     ```
 67 | 
 68 | 2. **Create the PySpark Script**:
 69 | 
 70 |     Create a directory called `app`, and inside it, create a PySpark script named `example.py` with the following content:
 71 | 
 72 |     ```python
 73 |     # your_script.py
 74 |     from pyspark.sql import SparkSession
 75 | 
 76 |     # Create a Spark session
 77 |     spark = SparkSession.builder.appName("PySpark Example").getOrCreate()
 78 | 
 79 |     # Create a simple DataFrame
 80 |     data = [("John", 30), ("Jane", 25), ("Sam", 35)]
 81 |     df = spark.createDataFrame(data, ["Name", "Age"])
 82 | 
 83 |     # Show the DataFrame
 84 |     df.show()
 85 | 
 86 |     # Stop the Spark session
 87 |     spark.stop()
 88 |     ```
 89 | 
 90 | 3. **Start the Spark Cluster**:
 91 | 
 92 |     Run the following command to start the Spark master and worker nodes, as well as the PySpark client:
 93 | 
 94 |     ```bash
 95 |     docker-compose up -d
 96 |     ```
 97 | 
 98 |     This will launch:
 99 |     - Spark Master on `http://localhost:8080`
100 |     - Two Spark Workers on `http://localhost:8081` and `http://localhost:8082`
101 |     - PySpark client that runs the job defined in `your_script.py`.
102 | 
103 | 4. **Check the Spark UI**:
104 | 
105 |     - **Spark Master UI**: Visit `http://localhost:8080` to monitor the Spark master.
106 |     - **Worker UIs**: Workers are available at `http://localhost:8081` and `http://localhost:8082`.
107 | 
108 | 5. **Submit the PySpark Job**:
109 | 
110 |     The PySpark job (`example.py`) is automatically submitted when you start the containers. To check the logs of the PySpark job, run:
111 | 
112 |     ```bash
113 |     docker logs spark-pyspark
114 |     ```
115 | 
116 | 6. **Stop the Cluster**:
117 | 
118 |     To stop the Spark cluster, use:
119 | 
120 |     ```bash
121 |     docker-compose down
122 |     ```
123 | 
124 | ## Useful Commands
125 | 
126 | - **Start the cluster**: `docker-compose up -d`
127 | - **Stop the cluster**: `docker-compose down`
128 | - **Check PySpark job logs**: `docker logs spark-pyspark`
129 | - **Check running containers**: `docker ps`
130 | - **Restart the cluster**: `docker-compose restart`
131 | 
132 | ## Directory Structure
133 | 
134 | . ├── docker-compose.yml └── app └── example.py
135 | 
136 | 
137 | ## Troubleshooting
138 | 
139 | - **Ports Conflict**: Ensure that ports `8080`, `7077`, `8081`, and `8082` are not being used by other services on your machine.
140 | - **Logs**: Check logs for more detailed error messages using:
141 | 
142 |     ```bash
143 |     docker-compose logs
144 |     ```
145 | 
146 | ## License
147 | 
148 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
149 | 


--------------------------------------------------------------------------------
/pytest_example/test_function_example.py:
--------------------------------------------------------------------------------
 1 | # content of test_sample.py
 2 | def inc(x):
 3 |     return x + 1
 4 | 
 5 | 
 6 | def test_answer():
 7 |     assert inc(3) == 4
 8 | 
 9 | def test_answer2():
10 |     assert inc(3) == 4
11 | 
12 | def test_answer3():
13 |     assert inc(3) == 4
14 | 
15 | def test_answer4():
16 |     assert inc(3) == 5  # This test will fail


--------------------------------------------------------------------------------
/ray_cluster_example/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use the official Ray Docker image
2 | FROM rayproject/ray:latest
3 | 
4 | # Install additional dependencies (if necessary)
5 | # RUN pip install <your-dependencies>
6 | 
7 | # Set environment variables to suppress unnecessary warnings
8 | ENV PYTHONUNBUFFERED=1
9 | 


--------------------------------------------------------------------------------
/ray_cluster_example/app/example.py:
--------------------------------------------------------------------------------
 1 | import ray
 2 | 
 3 | # Initialize Ray, automatically connects to the Ray cluster
 4 | ray.init(address='auto')
 5 | 
 6 | @ray.remote
 7 | def square(x):
 8 |     return x * x
 9 | 
10 | if __name__ == "__main__":
11 |     # Distribute computation across the Ray cluster
12 |     futures = [square.remote(i) for i in range(100)]
13 |     results = ray.get(futures)
14 |     
15 |     print(results)


--------------------------------------------------------------------------------
/ray_cluster_example/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   ray-head:
 3 |     build: .
 4 |     command: >
 5 |       bash -c "ray start --head --port=6379 --dashboard-host 0.0.0.0 && tail -f /dev/null"
 6 |     ports:
 7 |       - "8265:8265"   # Ray Dashboard
 8 |       - "6380:6379"   # Change external port to 6380, while internal remains 6379 for Ray
 9 |     volumes:
10 |       - ./app:/app    # Optional: Mount your app directory
11 |     environment:
12 |       - PYTHONUNBUFFERED=1
13 |     networks:
14 |       - ray-network
15 | 
16 |   ray-worker:
17 |     build: .
18 |     command: >
19 |       bash -c "ray start --address='ray-head:6379' && tail -f /dev/null"
20 |     depends_on:
21 |       - ray-head
22 |     volumes:
23 |       - ./app:/app    # Optional: Mount your app directory
24 |     environment:
25 |       - PYTHONUNBUFFERED=1
26 |     networks:
27 |       - ray-network
28 |     deploy:
29 |       replicas: 2  # Number of worker nodes
30 | 
31 | networks:
32 |   ray-network:
33 |     driver: bridge
34 | 


--------------------------------------------------------------------------------
/ray_cluster_example/readme.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # Ray Cluster Example using Docker Compose
  4 | 
  5 | This project sets up a **Ray cluster** using Docker Compose. The cluster consists of a head node and multiple worker nodes, allowing for distributed computation and parallel task execution with Ray.
  6 | 
  7 | ## Table of Contents
  8 | 
  9 | - [Prerequisites](#prerequisites)
 10 | - [Getting Started](#getting-started)
 11 |   - [1. Clone the Repository](#1-clone-the-repository)
 12 |   - [2. Build the Docker Images](#2-build-the-docker-images)
 13 |   - [3. Start the Ray Cluster](#3-start-the-ray-cluster)
 14 |   - [4. Access the Ray Dashboard](#4-access-the-ray-dashboard)
 15 |   - [5. Running a Distributed Task](#5-running-a-distributed-task)
 16 | - [Scaling the Cluster](#scaling-the-cluster)
 17 | - [Stopping the Cluster](#stopping-the-cluster)
 18 | - [Troubleshooting](#troubleshooting)
 19 | 
 20 | ## Prerequisites
 21 | 
 22 | Make sure you have the following installed on your system:
 23 | 
 24 | - [Docker](https://www.docker.com/get-started)
 25 | - [Docker Compose](https://docs.docker.com/compose/install/)
 26 | 
 27 | ## Getting Started
 28 | 
 29 | ### 1. Clone the Repository
 30 | 
 31 | ```bash
 32 | git clone https://github.com/thejat/mlops-code-examples.git
 33 | cd mlops-code-examples
 34 | cd ray_cluster_example
 35 | ```
 36 | 
 37 | ### 2. Build the Docker Images
 38 | 
 39 | To build the Ray head and worker Docker images:
 40 | 
 41 | ```bash
 42 | docker-compose build
 43 | ```
 44 | 
 45 | ### 3. Start the Ray Cluster
 46 | 
 47 | To start the Ray cluster, including one head node and two worker nodes:
 48 | 
 49 | ```bash
 50 | docker-compose up -d
 51 | ```
 52 | 
 53 | ### 4. Access the Ray Dashboard
 54 | 
 55 | You can monitor the Ray cluster through the **Ray Dashboard**. The dashboard will be available at [http://localhost:8265](http://localhost:8265).
 56 | 
 57 | ### 5. Running a Distributed Task
 58 | 
 59 | Once the Ray cluster is running, you can submit jobs or run distributed tasks. For example, you can create a simple script like `example.py` in the `app` directory:
 60 | 
 61 | ```python
 62 | import ray
 63 | 
 64 | # Connect to the Ray cluster
 65 | ray.init(address='auto')
 66 | 
 67 | @ray.remote
 68 | def square(x):
 69 |     return x * x
 70 | 
 71 | if __name__ == "__main__":
 72 |     # Distribute tasks across the Ray cluster
 73 |     results = ray.get([square.remote(i) for i in range(100)])
 74 |     print(results)
 75 | ```
 76 | 
 77 | To run the script inside the head node container:
 78 | 
 79 | ```bash
 80 | docker exec -it ray_cluster_example-ray-head-1 python /app/example.py
 81 | ```
 82 | 
 83 | This will distribute the computation across the Ray cluster and return the results.
 84 | 
 85 | ## Scaling the Cluster
 86 | 
 87 | To scale the number of worker nodes up or down, adjust the `deploy.replicas` value in the `docker-compose.yml` file under the `ray-worker` service:
 88 | 
 89 | ```yaml
 90 | deploy:
 91 |   replicas: 4  # Number of worker nodes
 92 | ```
 93 | 
 94 | Then apply the changes:
 95 | 
 96 | ```bash
 97 | docker-compose up -d --scale ray-worker=4
 98 | ```
 99 | 
100 | ## Stopping the Cluster
101 | 
102 | To stop the cluster and remove the containers:
103 | 
104 | ```bash
105 | docker-compose down
106 | ```
107 | 
108 | ## Troubleshooting
109 | 
110 | - **Port Conflict**: If you encounter a port conflict on `6379` (Redis or Ray head port), edit the `docker-compose.yml` file and change the external port for the Ray head service. For example, use `6380:6379` to avoid conflicts.
111 |   
112 | - **Containers Exiting Immediately**: Ensure that the containers remain running by using `tail -f /dev/null` in the `docker-compose.yml` to keep the head and worker nodes alive after starting Ray.
113 | 


--------------------------------------------------------------------------------