9 |
10 |
11 |
--------------------------------------------------------------------------------
/airflow_astro_example/.astro/config.yaml:
--------------------------------------------------------------------------------
1 | project:
2 | name: airflow-astro-example
3 |
--------------------------------------------------------------------------------
/airflow_astro_example/.astro/test_dag_integrity_default.py:
--------------------------------------------------------------------------------
1 | """Test the validity of all DAGs. **USED BY DEV PARSE COMMAND DO NOT EDIT**"""
2 | from contextlib import contextmanager
3 | import logging
4 | import os
5 |
6 | import pytest
7 |
8 | from airflow.models import DagBag, Variable, Connection
9 | from airflow.hooks.base import BaseHook
10 | from airflow.utils.db import initdb
11 |
12 | # init airflow database
13 | initdb()
14 |
15 | # The following code patches errors caused by missing OS Variables, Airflow Connections, and Airflow Variables
16 |
17 |
18 | # =========== MONKEYPATCH BaseHook.get_connection() ===========
19 | def basehook_get_connection_monkeypatch(key: str, *args, **kwargs):
20 | print(
21 | f"Attempted to fetch connection during parse returning an empty Connection object for {key}"
22 | )
23 | return Connection(key)
24 |
25 |
26 | BaseHook.get_connection = basehook_get_connection_monkeypatch
27 | # # =========== /MONKEYPATCH BASEHOOK.GET_CONNECTION() ===========
28 |
29 |
30 | # =========== MONKEYPATCH OS.GETENV() ===========
31 | def os_getenv_monkeypatch(key: str, *args, **kwargs):
32 | default = None
33 | if args:
34 | default = args[0] # os.getenv should get at most 1 arg after the key
35 | if kwargs:
36 | default = kwargs.get(
37 | "default", None
38 | ) # and sometimes kwarg if people are using the sig
39 |
40 | env_value = os.environ.get(key, None)
41 |
42 | if env_value:
43 | return env_value # if the env_value is set, return it
44 | if (
45 | key == "JENKINS_HOME" and default is None
46 | ): # fix https://github.com/astronomer/astro-cli/issues/601
47 | return None
48 | if default:
49 | return default # otherwise return whatever default has been passed
50 | return f"MOCKED_{key.upper()}_VALUE" # if absolutely nothing has been passed - return the mocked value
51 |
52 |
53 | os.getenv = os_getenv_monkeypatch
54 | # # =========== /MONKEYPATCH OS.GETENV() ===========
55 |
56 | # =========== MONKEYPATCH VARIABLE.GET() ===========
57 |
58 |
59 | class magic_dict(dict):
60 | def __init__(self, *args, **kwargs):
61 | self.update(*args, **kwargs)
62 |
63 | def __getitem__(self, key):
64 | return {}.get(key, "MOCKED_KEY_VALUE")
65 |
66 |
67 | _no_default = object() # allow falsey defaults
68 |
69 |
70 | def variable_get_monkeypatch(key: str, default_var=_no_default, deserialize_json=False):
71 | print(
72 | f"Attempted to get Variable value during parse, returning a mocked value for {key}"
73 | )
74 |
75 | if default_var is not _no_default:
76 | return default_var
77 | if deserialize_json:
78 | return magic_dict()
79 | return "NON_DEFAULT_MOCKED_VARIABLE_VALUE"
80 |
81 |
82 | Variable.get = variable_get_monkeypatch
83 | # # =========== /MONKEYPATCH VARIABLE.GET() ===========
84 |
85 |
86 | @contextmanager
87 | def suppress_logging(namespace):
88 | """
89 | Suppress logging within a specific namespace to keep tests "clean" during build
90 | """
91 | logger = logging.getLogger(namespace)
92 | old_value = logger.disabled
93 | logger.disabled = True
94 | try:
95 | yield
96 | finally:
97 | logger.disabled = old_value
98 |
99 |
100 | def get_import_errors():
101 | """
102 | Generate a tuple for import errors in the dag bag, and include DAGs without errors.
103 | """
104 | with suppress_logging("airflow"):
105 | dag_bag = DagBag(include_examples=False)
106 |
107 | def strip_path_prefix(path):
108 | return os.path.relpath(path, os.environ.get("AIRFLOW_HOME"))
109 |
110 | # Initialize an empty list to store the tuples
111 | result = []
112 |
113 | # Iterate over the items in import_errors
114 | for k, v in dag_bag.import_errors.items():
115 | result.append((strip_path_prefix(k), v.strip()))
116 |
117 | # Check if there are DAGs without errors
118 | for file_path in dag_bag.dags:
119 | # Check if the file_path is not in import_errors, meaning no errors
120 | if file_path not in dag_bag.import_errors:
121 | result.append((strip_path_prefix(file_path), "No import errors"))
122 |
123 | return result
124 |
125 |
126 | @pytest.mark.parametrize(
127 | "rel_path, rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]
128 | )
129 | def test_file_imports(rel_path, rv):
130 | """Test for import errors on a file"""
131 | if rv != "No import errors":
132 | # If rv is not "No import errors," consider it a failed test
133 | raise Exception(f"{rel_path} failed to import with message \n {rv}")
134 | else:
135 | # If rv is "No import errors," consider it a passed test
136 | print(f"{rel_path} passed the import test")
137 |
--------------------------------------------------------------------------------
/airflow_astro_example/.dockerignore:
--------------------------------------------------------------------------------
1 | astro
2 | .git
3 | .env
4 | airflow_settings.yaml
5 | logs/
6 | .venv
7 | airflow.db
8 | airflow.cfg
9 |
--------------------------------------------------------------------------------
/airflow_astro_example/.gitignore:
--------------------------------------------------------------------------------
1 | .git
2 | .env
3 | .DS_Store # macOS specific ignore
4 | airflow_settings.yaml
5 | __pycache__/
6 | astro
7 | .venv
8 | airflow-webserver.pid
9 | webserver_config.py
10 | airflow.cfg
11 | airflow.db
12 |
--------------------------------------------------------------------------------
/airflow_astro_example/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM quay.io/astronomer/astro-runtime:10.3.0
2 |
--------------------------------------------------------------------------------
/airflow_astro_example/README.md:
--------------------------------------------------------------------------------
1 | Overview
2 | ========
3 |
4 | Welcome to Astronomer! This project was generated after you ran 'astro dev init' using the Astronomer CLI. This readme describes the contents of the project, as well as how to run Apache Airflow on your local machine.
5 |
6 | Project Contents
7 | ================
8 |
9 | Your Astro project contains the following files and folders:
10 |
11 | - dags: This folder contains the Python files for your Airflow DAGs. By default, this directory includes two example DAGs:
12 | - `example_dag_basic`: This DAG shows a simple ETL data pipeline example with three TaskFlow API tasks that run daily.
13 | - `example_dag_advanced`: This advanced DAG showcases a variety of Airflow features like branching, Jinja templates, task groups and several Airflow operators.
14 | - Dockerfile: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. If you want to execute other commands or overrides at runtime, specify them here.
15 | - include: This folder contains any additional files that you want to include as part of your project. It is empty by default.
16 | - packages.txt: Install OS-level packages needed for your project by adding them to this file. It is empty by default.
17 | - requirements.txt: Install Python packages needed for your project by adding them to this file. It is empty by default.
18 | - plugins: Add custom or community plugins for your project to this file. It is empty by default.
19 | - airflow_settings.yaml: Use this local-only file to specify Airflow Connections, Variables, and Pools instead of entering them in the Airflow UI as you develop DAGs in this project.
20 |
21 | Deploy Your Project Locally
22 | ===========================
23 |
24 | 1. Start Airflow on your local machine by running 'astro dev start'.
25 |
26 | This command will spin up 4 Docker containers on your machine, each for a different Airflow component:
27 |
28 | - Postgres: Airflow's Metadata Database
29 | - Webserver: The Airflow component responsible for rendering the Airflow UI
30 | - Scheduler: The Airflow component responsible for monitoring and triggering tasks
31 | - Triggerer: The Airflow component responsible for triggering deferred tasks
32 |
33 | 2. Verify that all 4 Docker containers were created by running 'docker ps'.
34 |
35 | Note: Running 'astro dev start' will start your project with the Airflow Webserver exposed at port 8080 and Postgres exposed at port 5432. If you already have either of those ports allocated, you can either [stop your existing Docker containers or change the port](https://docs.astronomer.io/astro/test-and-troubleshoot-locally#ports-are-not-available).
36 |
37 | 3. Access the Airflow UI for your local Airflow project. To do so, go to http://localhost:8080/ and log in with 'admin' for both your Username and Password.
38 |
39 | You should also be able to access your Postgres Database at 'localhost:5432/postgres'.
40 |
41 | Deploy Your Project to Astronomer
42 | =================================
43 |
44 | If you have an Astronomer account, pushing code to a Deployment on Astronomer is simple. For deploying instructions, refer to Astronomer documentation: https://docs.astronomer.io/cloud/deploy-code/
45 |
46 | Contact
47 | =======
48 |
49 | The Astronomer CLI is maintained with love by the Astronomer team. To report a bug or suggest a change, reach out to our support.
50 |
--------------------------------------------------------------------------------
/airflow_astro_example/dags/.airflowignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/airflow_astro_example/dags/.airflowignore
--------------------------------------------------------------------------------
/airflow_astro_example/dags/example_dag_advanced.py:
--------------------------------------------------------------------------------
1 | from pendulum import datetime, duration
2 |
3 | # Airflow Operators are templates for tasks and encompass the logic that your DAG will actually execute.
4 | # To use an operator in your DAG, you first have to import it.
5 | # To learn more about operators, see: https://registry.astronomer.io/.
6 |
7 | # DAG and task decorators for interfacing with the TaskFlow API
8 | from airflow.decorators import dag, task, task_group
9 |
10 | # A function that sets sequential dependencies between tasks including lists of tasks
11 | from airflow.models.baseoperator import chain
12 |
13 | from airflow.operators.bash import BashOperator
14 | from airflow.operators.empty import EmptyOperator
15 | from airflow.operators.weekday import BranchDayOfWeekOperator
16 |
17 | # Used to label node edges in the Airflow UI
18 | from airflow.utils.edgemodifier import Label
19 |
20 | # Used to determine the day of the week
21 | from airflow.utils.weekday import WeekDay
22 |
23 |
24 | """
25 | This DAG is intended to demonstrate a number of core Apache Airflow concepts that are central to the pipeline
26 | authoring experience, including the TaskFlow API, Edge Labels, Jinja templating, branching,
27 | generating tasks within a loop, task groups, and trigger rules.
28 |
29 | First, this DAG checks if the current day is a weekday or weekend. Next, the DAG checks which day of the week
30 | it is. Lastly, the DAG prints out a bash statement based on which day it is. On Tuesday, for example, the DAG
31 | prints "It's Tuesday and I'm busy with studying".
32 |
33 | This DAG uses the following operators:
34 |
35 | BashOperator -
36 | Executes a Bash script, command, or set of commands.
37 |
38 | See more info about this operator here:
39 | https://registry.astronomer.io/providers/apache-airflow/modules/bashoperator
40 |
41 | EmptyOperator -
42 | Does nothing but can be used to structure your DAG.
43 |
44 | See more info about this operator here:
45 | https://registry.astronomer.io/providers/apache-airflow/modules/emptyoperator
46 |
47 | BranchDayOfWeekOperator -
48 | Branches into one of two lists of tasks depending on the current day.
49 |
50 | See more info about this operator here:
51 | https://registry.astronomer.io/providers/apache-airflow/modules/branchdayofweekoperator
52 | """
53 |
54 | # Reference data that defines "weekday" as well as the activity assigned to each day of the week
55 | DAY_ACTIVITY_MAPPING = {
56 | "monday": {"is_weekday": True, "activity": "guitar lessons"},
57 | "tuesday": {"is_weekday": True, "activity": "studying"},
58 | "wednesday": {"is_weekday": True, "activity": "soccer practice"},
59 | "thursday": {"is_weekday": True, "activity": "contributing to Airflow"},
60 | "friday": {"is_weekday": True, "activity": "family dinner"},
61 | "saturday": {"is_weekday": False, "activity": "going to the beach"},
62 | "sunday": {"is_weekday": False, "activity": "sleeping in"},
63 | }
64 |
65 | # The TaskFlow API is also used in a number of tasks within this DAG. Check out of the TaskFlow API tutorial
66 | # to learn more.
67 | # https://airflow.apache.org/docs/apache-airflow/stable/tutorial/taskflow.html
68 |
69 |
70 | # This is the TaskFlow equivalent of the PythonOperator:
71 | # https://registry.astronomer.io/providers/apache-airflow/modules/pythonoperator
72 | @task(
73 | # By default the function name is used as the `task_id`, but it can be overriden if desired.
74 | task_id="going_to_the_beach",
75 | multiple_outputs=True, # multiple_outputs=True unrolls dictionaries into separate XCom values
76 | )
77 | def _going_to_the_beach() -> dict[str, str]:
78 | return {
79 | "subject": "Beach day!",
80 | "body": "It's Saturday and I'm heading to the beach. Come join me!",
81 | }
82 |
83 |
84 | # This is the TaskFlow API equivalent to the BranchPythonOperator:
85 | # https://registry.astronomer.io/providers/apache-airflow/modules/branchpythonoperator
86 | # The task retrieves the activity from the "DAY_ACTIVITY_MAPPING" dictionary.
87 | @task.branch
88 | def get_activity(day_name: str) -> str:
89 | activity_id = DAY_ACTIVITY_MAPPING[day_name]["activity"].replace(" ", "_")
90 |
91 | if DAY_ACTIVITY_MAPPING[day_name]["is_weekday"]:
92 | return f"weekday_activities.{activity_id}"
93 |
94 | return f"weekend_activities.{activity_id}"
95 |
96 |
97 | # This the TaskFlow API equivalent to the PythonVirtualEnvOperator:
98 | # https://registry.astronomer.io/providers/apache-airflow/modules/pythonvirtualenvoperator
99 | @task.virtualenv(requirements=["beautifulsoup4==4.11.2"])
100 | def inviting_friends(subject: str, body: str) -> None:
101 | from bs4 import BeautifulSoup
102 |
103 | print("Inviting friends...")
104 | html_doc = f"{subject}
{body}
"
105 | soup = BeautifulSoup(html_doc, "html.parser")
106 | print(soup.prettify())
107 |
108 |
109 | # When using the DAG decorator, the "dag" argument doesn't need to be specified for each task.
110 | # The "dag_id" value defaults to the name of the function it is decorating if not explicitly set.
111 | # In this example, the "dag_id" value would be "example_dag_advanced".
112 | @dag(
113 | # This DAG is set to run for the first time on January 1, 2023.
114 | # Best practice is to use a static start_date.
115 | # Subsequent DAG runs are instantiated based on the "schedule" parameter below.
116 | start_date=datetime(2023, 1, 1),
117 | # This defines how many instantiations of this DAG (DAG Runs) can execute concurrently. In this case,
118 | # we're only allowing 1 DAG run at any given time, as opposed to allowing multiple overlapping DAG runs.
119 | max_active_runs=1,
120 | # This defines how often your DAG will run, or the schedule by which DAG runs are created. It can be
121 | # defined as a cron expression, custom timetable, existing presets or using the Dataset feature.
122 | # This DAG uses a preset to run daily.
123 | schedule="@daily",
124 | # Default settings applied to all tasks within the DAG; can be overwritten at the task level.
125 | default_args={
126 | "owner": "community", # Defines the value of the "owner" column in the DAG view of the Airflow UI
127 | "retries": 2, # If a task fails, it will retry 2 times.
128 | "retry_delay": duration(
129 | minutes=3
130 | ), # A task that fails will wait 3 minutes to retry.
131 | },
132 | default_view="graph", # This defines the default view for this DAG in the Airflow UI
133 | # When catchup=False, your DAG will only run for the latest schedule interval. In this case, this means
134 | # that tasks will not be run between January 1st, 2023 and 1 day ago. When turned on, this DAG's first run
135 | # will be for today, per the @daily schedule
136 | catchup=False,
137 | tags=["example"], # If set, this tag is shown in the DAG view of the Airflow UI
138 | )
139 | def example_dag_advanced():
140 | # EmptyOperator placeholder for first task
141 | begin = EmptyOperator(task_id="begin")
142 | # Last task will only trigger if all upstream tasks have succeeded or been skipped
143 | end = EmptyOperator(task_id="end", trigger_rule="none_failed")
144 |
145 | # This task checks which day of the week it is
146 | check_day_of_week = BranchDayOfWeekOperator(
147 | task_id="check_day_of_week",
148 | week_day={WeekDay.SATURDAY, WeekDay.SUNDAY}, # This checks day of week
149 | follow_task_ids_if_true="weekend", # Next task if criteria is met
150 | follow_task_ids_if_false="weekday", # Next task if criteria is not met
151 | use_task_execution_day=True, # If True, uses task’s execution day to compare with is_today
152 | )
153 |
154 | weekend = EmptyOperator(task_id="weekend") # "weekend" placeholder task
155 | weekday = EmptyOperator(task_id="weekday") # "weekday" placeholder task
156 |
157 | # Templated value for determining the name of the day of week based on the start date of the DAG Run
158 | day_name = "{{ dag_run.start_date.strftime('%A').lower() }}"
159 |
160 | # Begin weekday tasks.
161 | # Tasks within this TaskGroup (weekday tasks) will be grouped together in the Airflow UI
162 | @task_group
163 | def weekday_activities():
164 | # TaskFlow functions can also be reused which is beneficial if you want to use the same callable for
165 | # multiple tasks and want to use different task attributes.
166 | # See this tutorial for more information:
167 | # https://airflow.apache.org/docs/apache-airflow/stable/tutorial/taskflow.html#reusing-a-decorated-task
168 | which_weekday_activity_day = get_activity.override(
169 | task_id="which_weekday_activity_day"
170 | )(day_name)
171 |
172 | for day, day_info in DAY_ACTIVITY_MAPPING.items():
173 | if day_info["is_weekday"]:
174 | day_of_week = Label(label=day)
175 | activity = day_info["activity"]
176 |
177 | # This task prints the weekday activity to bash
178 | do_activity = BashOperator(
179 | task_id=activity.replace(" ", "_"),
180 | # This is the Bash command to run
181 | bash_command=f"echo It's {day.capitalize()} and I'm busy with {activity}.",
182 | )
183 |
184 | # Declaring task dependencies within the "TaskGroup" via the classic bitshift operator.
185 | which_weekday_activity_day >> day_of_week >> do_activity
186 |
187 | # Begin weekend tasks
188 | # Tasks within this TaskGroup will be grouped together in the UI
189 | @task_group
190 | def weekend_activities():
191 | which_weekend_activity_day = get_activity.override(
192 | task_id="which_weekend_activity_day"
193 | )(day_name)
194 |
195 | # Labels that will appear in the Graph view of the Airflow UI
196 | saturday = Label(label="saturday")
197 | sunday = Label(label="sunday")
198 |
199 | # This task runs the Sunday activity of sleeping for a random interval between 1 and 30 seconds
200 | sleeping_in = BashOperator(
201 | task_id="sleeping_in", bash_command="sleep $[ (1 + $RANDOM % 30) ]s"
202 | )
203 |
204 | going_to_the_beach = _going_to_the_beach() # Calling the TaskFlow task
205 |
206 | # Because the "_going_to_the_beach()" function has "multiple_outputs" enabled, each dict key is
207 | # accessible as their own "XCom" key.
208 | _inviting_friends = inviting_friends(
209 | subject=going_to_the_beach["subject"], body=going_to_the_beach["body"]
210 | )
211 |
212 | # Using "chain()" here for list-to-list dependencies which are not supported by the bitshift
213 | # operator and to simplify the notation for the desired dependency structure.
214 | chain(
215 | which_weekend_activity_day,
216 | [saturday, sunday],
217 | [going_to_the_beach, sleeping_in],
218 | )
219 |
220 | # Call the @task_group TaskFlow functions to instantiate them in the DAG
221 | _weekday_activities = weekday_activities()
222 | _weekend_activities = weekend_activities()
223 |
224 | # High-level dependencies between tasks
225 | chain(
226 | begin,
227 | check_day_of_week,
228 | [weekday, weekend],
229 | [_weekday_activities, _weekend_activities],
230 | end,
231 | )
232 |
233 | # Task dependency created by XComArgs:
234 | # going_to_the_beach >> inviting_friends
235 |
236 |
237 | example_dag_advanced()
238 |
--------------------------------------------------------------------------------
/airflow_astro_example/dags/example_dag_basic.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pendulum import datetime
3 |
4 | from airflow.decorators import (
5 | dag,
6 | task,
7 | ) # DAG and task decorators for interfacing with the TaskFlow API
8 |
9 |
10 | # When using the DAG decorator, The "dag_id" value defaults to the name of the function
11 | # it is decorating if not explicitly set. In this example, the "dag_id" value would be "example_dag_basic".
12 | @dag(
13 | # This defines how often your DAG will run, or the schedule by which your DAG runs. In this case, this DAG
14 | # will run daily
15 | schedule="@daily",
16 | # This DAG is set to run for the first time on January 1, 2023. Best practice is to use a static
17 | # start_date. Subsequent DAG runs are instantiated based on the schedule
18 | start_date=datetime(2023, 1, 1),
19 | # When catchup=False, your DAG will only run the latest run that would have been scheduled. In this case, this means
20 | # that tasks will not be run between January 1, 2023 and 30 mins ago. When turned on, this DAG's first
21 | # run will be for the next 30 mins, per the its schedule
22 | catchup=False,
23 | default_args={
24 | "retries": 2, # If a task fails, it will retry 2 times.
25 | },
26 | tags=["example"],
27 | ) # If set, this tag is shown in the DAG view of the Airflow UI
28 | def example_dag_basic():
29 | """
30 | ### Basic ETL Dag
31 | This is a simple ETL data pipeline example that demonstrates the use of
32 | the TaskFlow API using three simple tasks for extract, transform, and load.
33 | For more information on Airflow's TaskFlow API, reference documentation here:
34 | https://airflow.apache.org/docs/apache-airflow/stable/tutorial_taskflow_api.html
35 | """
36 |
37 | @task()
38 | def extract():
39 | """
40 | #### Extract task
41 | A simple "extract" task to get data ready for the rest of the
42 | pipeline. In this case, getting data is simulated by reading from a
43 | hardcoded JSON string.
44 | """
45 | data_string = '{"1001": Hello, "1002": 433.21, "1003": 502.22}'
46 |
47 | order_data_dict = json.loads(data_string)
48 | return order_data_dict
49 |
50 | @task(
51 | multiple_outputs=True
52 | ) # multiple_outputs=True unrolls dictionaries into separate XCom values
53 | def transform(order_data_dict: dict):
54 | """
55 | #### Transform task
56 | A simple "transform" task which takes in the collection of order data and
57 | computes the total order value.
58 | """
59 | total_order_value = 0
60 |
61 | for value in order_data_dict.values():
62 | total_order_value += value
63 |
64 | return {"total_order_value": total_order_value}
65 |
66 | @task()
67 | def load(total_order_value: float):
68 | """
69 | #### Load task
70 | A simple "load" task that takes in the result of the "transform" task and prints it out,
71 | instead of saving it to end user review
72 | """
73 |
74 | print(f"Total order value is: {total_order_value:.2f}")
75 |
76 | order_data = extract()
77 | order_summary = transform(order_data)
78 | load(order_summary["total_order_value"])
79 |
80 |
81 | example_dag_basic()
82 |
--------------------------------------------------------------------------------
/airflow_astro_example/packages.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/airflow_astro_example/packages.txt
--------------------------------------------------------------------------------
/airflow_astro_example/requirements.txt:
--------------------------------------------------------------------------------
1 | # Astro Runtime includes the following pre-installed providers packages: https://docs.astronomer.io/astro/runtime-image-architecture#provider-packages
2 |
--------------------------------------------------------------------------------
/airflow_astro_example/tests/dags/test_dag_example.py:
--------------------------------------------------------------------------------
1 | """Example DAGs test. This test ensures that all Dags have tags, retries set to two, and no import errors. This is an example pytest and may not be fit the context of your DAGs. Feel free to add and remove tests."""
2 |
3 | import os
4 | import logging
5 | from contextlib import contextmanager
6 | import pytest
7 | from airflow.models import DagBag
8 |
9 |
10 | @contextmanager
11 | def suppress_logging(namespace):
12 | logger = logging.getLogger(namespace)
13 | old_value = logger.disabled
14 | logger.disabled = True
15 | try:
16 | yield
17 | finally:
18 | logger.disabled = old_value
19 |
20 |
21 | def get_import_errors():
22 | """
23 | Generate a tuple for import errors in the dag bag
24 | """
25 | with suppress_logging("airflow"):
26 | dag_bag = DagBag(include_examples=False)
27 |
28 | def strip_path_prefix(path):
29 | return os.path.relpath(path, os.environ.get("AIRFLOW_HOME"))
30 |
31 | # prepend "(None,None)" to ensure that a test object is always created even if it's a no op.
32 | return [(None, None)] + [
33 | (strip_path_prefix(k), v.strip()) for k, v in dag_bag.import_errors.items()
34 | ]
35 |
36 |
37 | def get_dags():
38 | """
39 | Generate a tuple of dag_id, in the DagBag
40 | """
41 | with suppress_logging("airflow"):
42 | dag_bag = DagBag(include_examples=False)
43 |
44 | def strip_path_prefix(path):
45 | return os.path.relpath(path, os.environ.get("AIRFLOW_HOME"))
46 |
47 | return [(k, v, strip_path_prefix(v.fileloc)) for k, v in dag_bag.dags.items()]
48 |
49 |
50 | @pytest.mark.parametrize(
51 | "rel_path,rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]
52 | )
53 | def test_file_imports(rel_path, rv):
54 | """Test for import errors on a file"""
55 | if rel_path and rv:
56 | raise Exception(f"{rel_path} failed to import with message \n {rv}")
57 |
58 |
59 | APPROVED_TAGS = {}
60 |
61 |
62 | @pytest.mark.parametrize(
63 | "dag_id,dag,fileloc", get_dags(), ids=[x[2] for x in get_dags()]
64 | )
65 | def test_dag_tags(dag_id, dag, fileloc):
66 | """
67 | test if a DAG is tagged and if those TAGs are in the approved list
68 | """
69 | assert dag.tags, f"{dag_id} in {fileloc} has no tags"
70 | if APPROVED_TAGS:
71 | assert not set(dag.tags) - APPROVED_TAGS
72 |
73 |
74 | @pytest.mark.parametrize(
75 | "dag_id,dag, fileloc", get_dags(), ids=[x[2] for x in get_dags()]
76 | )
77 | def test_dag_retries(dag_id, dag, fileloc):
78 | """
79 | test if a DAG has retries set
80 | """
81 | assert (
82 | dag.default_args.get("retries", None) >= 2
83 | ), f"{dag_id} in {fileloc} must have task retries >= 2."
84 |
--------------------------------------------------------------------------------
/airflow_setup/setup.sh:
--------------------------------------------------------------------------------
1 | AIRFLOW_VERSION=2.10.5
2 |
3 | # Extract the version of Python you have installed. If you're currently using a Python version that is not supported by Airflow, you may want to set this manually.
4 | # See above for supported versions.
5 | PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
6 |
7 | CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt"
8 | # For example this would install 2.10.5 with python 3.8: https://raw.githubusercontent.com/apache/airflow/constraints-2.10.5/constraints-3.8.txt
9 |
10 | pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
--------------------------------------------------------------------------------
/cloud_function_example/main.py:
--------------------------------------------------------------------------------
1 | def predict(request):
2 |
3 | from flask import jsonify
4 | import pickle
5 | from google.cloud import storage
6 |
7 | def get_model(b, A):
8 | def line(x):
9 | return b * x + A
10 | return line
11 |
12 | storage_client = storage.Client()
13 | bucket = storage_client.get_bucket("simple-regression-bucket")
14 | blob = bucket.blob("simple_regression.pkl")
15 | blob.download_to_filename("/tmp/simple_regression.pkl")
16 |
17 | model_params = pickle.load(
18 | open('/tmp/simple_regression.pkl', 'rb'))
19 |
20 | model = get_model(model_params[0], model_params[1])
21 |
22 | request_json = request.get_json()
23 | if "x" in request.args:
24 | try:
25 | return jsonify({'input': request.args['x'], 'prediction': model(float(request.args['x']))})
26 | except:
27 | pass
28 | elif request_json and 'x' in request_json:
29 | return jsonify({'input': request_json['x'], 'prediction': model(float(request_json['x']))})
30 | else:
31 | return jsonify({'success': 'false', 'message': 'Input x was not passed correctly.'})
--------------------------------------------------------------------------------
/cloud_function_example/requirements.txt:
--------------------------------------------------------------------------------
1 | # Function dependencies, for example:
2 | # package>=version
3 | flask
4 | numpy
5 | google-cloud-storage
--------------------------------------------------------------------------------
/copilot_example/fastapi_example.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI
2 |
3 | app = FastAPI()
4 |
5 | @app.get("/")
6 | def read_root():
7 | return {"Hello": "World"}
8 |
9 | @app.get("/items/{item_id}")
10 | def read_item(item_id: int, q: str = None):
11 | return {"item_id": item_id, "q": q}
12 |
13 | # run the script using `uvicorn fastapi_example:app --reload` in the command line
--------------------------------------------------------------------------------
/copilot_example/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.104.1
--------------------------------------------------------------------------------
/cron_example/date_job_every_minute.txt:
--------------------------------------------------------------------------------
1 | Wed Feb 14 03:44:01 PM CST 2024
2 | Wed Feb 14 03:45:01 PM CST 2024
3 | Wed Feb 14 03:46:01 PM CST 2024
4 | Wed Feb 14 03:47:01 PM CST 2024
5 | Wed Feb 14 03:48:01 PM CST 2024
6 | Wed Feb 14 03:49:01 PM CST 2024
7 | Wed Feb 14 03:50:01 PM CST 2024
8 | Wed Feb 14 03:51:01 PM CST 2024
9 | Wed Feb 14 03:52:01 PM CST 2024
10 | Wed Feb 14 03:53:01 PM CST 2024
11 | Wed Feb 14 03:54:01 PM CST 2024
12 | Wed Feb 14 03:55:01 PM CST 2024
13 | Wed Feb 14 03:56:01 PM CST 2024
14 | Wed Feb 14 03:57:01 PM CST 2024
15 | Wed Feb 14 03:58:01 PM CST 2024
16 | Wed Feb 14 03:59:01 PM CST 2024
17 | Wed Feb 14 04:00:01 PM CST 2024
18 | Wed Feb 14 04:01:01 PM CST 2024
19 | Wed Feb 14 04:02:01 PM CST 2024
20 | Wed Feb 14 04:03:01 PM CST 2024
21 | Wed Feb 14 04:04:01 PM CST 2024
22 | Wed Feb 12 22:17:01 UTC 2025
23 | Wed Feb 12 22:18:01 UTC 2025
24 |
--------------------------------------------------------------------------------
/cron_example/run_this_job.sh:
--------------------------------------------------------------------------------
1 | date >> /home/theja/mlops-code-examples/cron_example/date_job_every_minute.txt
2 |
--------------------------------------------------------------------------------
/cron_example/run_this_job.txt:
--------------------------------------------------------------------------------
1 | To run the script:
2 |
3 | Step 1: open cron editor on the command line with `crontab -e`
4 |
5 | Step 2: insert the following line (replace localmachine with your username)
6 |
7 | * * * * * /home/theja/mlops-code-examples/cron_example/run_this_job.sh
8 |
9 |
--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/README.md:
--------------------------------------------------------------------------------
1 | # Docker Compose Example
2 |
3 | ## Description
4 |
5 | This project provides examples of using Docker Compose to orchestrate containerized applications.
6 |
7 | ## Table of Contents
8 |
9 | - [Installation](#installation)
10 | - [Usage](#usage)
11 | - [Contributing](#contributing)
12 | - [License](#license)
13 |
14 | ## Installation
15 |
16 | To get started, follow these steps:
17 |
18 | 1. Clone the repository.
19 | 2. Install Docker and Docker Compose.
20 | 3. Run `docker-compose up` to start the application.
21 |
22 | ## Usage
23 |
24 | To connect to the Docker container and manage your PostgreSQL database, you can use pgAdmin 4. Follow these steps:
25 |
26 | 1. Open a web browser and visit `http://localhost:5050`.
27 | 2. Log in to pgAdmin 4 using the default credentials (username: `pgadmin4@pgadmin.org`, password: `admin`).
28 | 3. Click on "Add New Server" in the "Quick Links" section.
29 | 4. Enter a name for the server and switch to the "Connection" tab.
30 | 5. In the "Host name/address" field, enter the name of the Docker container running PostgreSQL (e.g., `postgres`).
31 | 6. Set the "Port" to `5432`.
32 | 7. Enter the username and password for the PostgreSQL database.
33 | 8. Click "Save" to connect to the Docker container.
34 |
35 | Now you can use pgAdmin 4 to manage your PostgreSQL database running in the Docker container.
36 |
37 |
38 | ## Contributing
39 |
40 | Contributions are welcome! Please fork the repository and submit a pull request.
41 |
42 | ## License
43 |
44 | This example is licensed under the [MIT License](LICENSE).
--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | services:
4 | database:
5 | image: 'postgres:latest'
6 | restart: always
7 | ports:
8 | - 15432:5432
9 | env_file:
10 | - .env
11 | volumes:
12 | - ./db-data/:/var/lib/postgresql/data/
13 | - ./init.sql:/docker-entrypoint-initdb.d/init.sql
--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/env.example:
--------------------------------------------------------------------------------
1 | POSTGRES_PASSWORD=changethis!
--------------------------------------------------------------------------------
/docker_examples/docker_compose_example/init.sql:
--------------------------------------------------------------------------------
1 | -- create a table
2 | CREATE TABLE test(
3 | id INT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
4 | name TEXT NOT NULL,
5 | archived BOOLEAN NOT NULL DEFAULT FALSE
6 | );
7 |
8 | -- add test data
9 | INSERT INTO test (name, archived)
10 | VALUES ('Theja', true),
11 | ('UIC', false);
12 |
--------------------------------------------------------------------------------
/docker_examples/docker_example/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:buster-slim
2 | RUN apt-get update \
3 | && apt-get install -y python3-pip python3-dev \
4 | && pip3 install flask
5 | WORKDIR /app
6 | COPY flask_simple_regression_service.py .
7 | ENTRYPOINT ["python3","flask_simple_regression_service.py"]
--------------------------------------------------------------------------------
/docker_examples/docker_example/flask_simple_regression_service.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify, request
2 |
3 |
4 | def model(x):
5 | return 2*x + 2
6 |
7 | app = Flask(__name__)
8 | @app.route("/", methods=["GET"])
9 | def predict():
10 |
11 | if "x" in request.args:
12 | try:
13 | return jsonify({'input': request.args['x'], 'prediction': model(float(request.args['x']))})
14 | except:
15 | pass
16 |
17 | return jsonify({'status': 'false', 'message': 'Input x was not passed.'})
18 |
19 |
20 | if __name__ == '__main__':
21 | app.run(host="0.0.0.0",port=5002)
22 |
--------------------------------------------------------------------------------
/docker_examples/docker_example/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==3.0.0
--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3:latest
2 |
3 | RUN conda install -y flask pandas \
4 | && conda install -c conda-forge scikit-surprise scikit-learn \
5 | && conda install pytorch torchvision cpuonly -c pytorch
6 |
7 | COPY . .
8 | RUN mkdir -p /root/.surprise_data/ml-1m
9 | RUN cp -r ml-1m /root/.surprise_data/ml-1m/
10 | CMD ["python","flask_recommendation_service.py"]
11 |
--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/flask_recommendation_service.py:
--------------------------------------------------------------------------------
1 | # load Flask
2 | import flask
3 | from recommend_pytorch_train import MF
4 | from recommend_pytorch_inf import get_top_n, get_previously_seen
5 | import torch
6 | import pandas as pd
7 | import surprise
8 | import time
9 |
10 |
11 | app = flask.Flask(__name__)
12 |
13 | start_time = time.time()
14 |
15 | # data preload
16 | data = surprise.Dataset.load_builtin('ml-1m')
17 | trainset = data.build_full_trainset()
18 | testset = trainset.build_anti_testset()
19 | movies_df = pd.read_csv('./movies.dat',
20 | sep="::", header=None, engine='python', encoding="iso-8859-1")
21 | movies_df.columns = ['iid', 'name', 'genre']
22 | movies_df.set_index('iid', inplace=True)
23 |
24 | # model preload
25 | k = 100 # latent dimension
26 | c_bias = 1e-6
27 | c_vector = 1e-6
28 | model = MF(trainset.n_users, trainset.n_items,
29 | k=k, c_bias=c_bias, c_vector=c_vector)
30 | model.load_state_dict(torch.load(
31 | './recommendation_model_pytorch.pkl')) # TODO: prevent overwriting
32 | model.eval()
33 |
34 | print('Model and data preloading completed in ', time.time()-start_time)
35 |
36 |
37 | @app.route("/", methods=["GET"])
38 | def recommend():
39 |
40 | data = {"success": False}
41 |
42 | if "uid" in flask.request.args:
43 |
44 | data['uid'] = str(flask.request.args['uid'])
45 |
46 | try:
47 | data['seen'] = get_previously_seen(
48 | trainset, data['uid'], movies_df)
49 | recommended = get_top_n(
50 | model, testset, trainset, data['uid'], movies_df, n=10)
51 | print(recommended)
52 | data['recommended'] = [x[1] for x in recommended]
53 | data["success"] = True
54 | except:
55 | pass
56 |
57 | return flask.jsonify(data)
58 |
59 |
60 | # start the flask app, allow remote connections
61 | if __name__ == '__main__':
62 | app.run(host='0.0.0.0')
63 |
--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/movies.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/docker_examples/docker_pipeline_example/movies.dat
--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/recommend_pytorch_inf.py:
--------------------------------------------------------------------------------
1 | from recommend_pytorch_train import MF
2 | from surprise import Dataset
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import pandas as pd
7 | import pprint
8 |
9 |
10 | def get_top_n(model, testset, trainset, uid_input, movies_df, n=10):
11 |
12 | preds = []
13 | try:
14 | uid_input = int(trainset.to_inner_uid(uid_input))
15 | except KeyError:
16 | return preds
17 |
18 | # First map the predictions to each user.
19 | for uid, iid, _ in testset: # inefficient
20 | try:
21 | uid_internal = int(trainset.to_inner_uid(uid))
22 | except KeyError:
23 | continue
24 | if uid_internal == uid_input:
25 | try:
26 | iid_internal = int(trainset.to_inner_iid(iid))
27 | movie_name = movies_df.loc[int(iid), 'name']
28 | preds.append((iid, movie_name, float(
29 | model(torch.tensor([[uid_input, iid_internal]])))))
30 | except KeyError:
31 | pass
32 | # Then sort the predictions for each user and retrieve the k highest ones
33 | if preds is not None:
34 | preds.sort(key=lambda x: x[1], reverse=True)
35 | if len(preds) > n:
36 | preds = preds[:n]
37 | return preds
38 |
39 |
40 | def get_previously_seen(trainset, uid, movies_df):
41 | seen = []
42 | for (iid, _) in trainset.ur[int(uid)]:
43 | try:
44 | seen.append(movies_df.loc[int(iid), 'name'])
45 | except KeyError:
46 | pass
47 | if len(seen) > 10:
48 | break
49 | return seen
50 |
51 |
52 | def main():
53 | # Data
54 | movies_df = pd.read_csv('../data/ml-1m/movies.dat', sep="::",
55 | header=None, engine='python')
56 | movies_df.columns = ['iid', 'name', 'genre']
57 | movies_df.set_index('iid', inplace=True)
58 | data = Dataset.load_builtin('ml-1m')
59 | trainset = data.build_full_trainset()
60 | testset = trainset.build_anti_testset()
61 |
62 | k = 100 # latent dimension
63 | c_bias = 1e-6
64 | c_vector = 1e-6
65 |
66 | model = MF(trainset.n_users, trainset.n_items,
67 | k=k, c_bias=c_bias, c_vector=c_vector)
68 | model.load_state_dict(torch.load('../data/models/recommendation_model_pytorch.pkl'))
69 | model.eval()
70 |
71 | # Print the recommended items for sample users
72 | sample_users = list(set([x[0] for x in testset]))[:4]
73 |
74 | for uid in sample_users:
75 |
76 | print('User:', uid)
77 | print('\n')
78 |
79 | print('\tSeen:')
80 | seen = get_previously_seen(trainset, uid, movies_df)
81 | pprint.pprint(seen)
82 | print('\n')
83 |
84 | print('\tRecommendations:')
85 | recommended = get_top_n(model, testset, trainset, uid, movies_df, n=10)
86 | pprint.pprint([x[1] for x in recommended])
87 | print('\n')
88 |
89 |
90 | if __name__ == "__main__":
91 | main()
92 |
--------------------------------------------------------------------------------
/docker_examples/docker_pipeline_example/recommend_pytorch_train.py:
--------------------------------------------------------------------------------
1 | # https://github.com/NicolasHug/Surprise
2 | # can be replaced by explicitly importing the movielens data
3 | from surprise import Dataset
4 | import numpy as np
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | from sklearn.utils import shuffle
9 |
10 | class Loader():
11 | current = 0
12 |
13 | def __init__(self, x, y, batchsize=1024, do_shuffle=True):
14 | self.shuffle = shuffle
15 | self.x = x
16 | self.y = y
17 | self.batchsize = batchsize
18 | self.batches = range(0, len(self.y), batchsize)
19 | if do_shuffle:
20 | # Every epoch re-shuffle the dataset
21 | self.x, self.y = shuffle(self.x, self.y)
22 |
23 | def __iter__(self):
24 | # Reset & return a new iterator
25 | self.x, self.y = shuffle(self.x, self.y, random_state=0)
26 | self.current = 0
27 | return self
28 |
29 | def __len__(self):
30 | # Return the number of batches
31 | return int(len(self.x) / self.batchsize)
32 |
33 | def __next__(self):
34 | n = self.batchsize
35 | if self.current + n >= len(self.y):
36 | raise StopIteration
37 | i = self.current
38 | xs = torch.from_numpy(self.x[i:i + n])
39 | ys = torch.from_numpy(self.y[i:i + n])
40 | self.current += n
41 | return (xs, ys)
42 |
43 |
44 | class MF(nn.Module):
45 |
46 | def __init__(self, n_user, n_item, k=18, c_vector=1.0, c_bias=1.0):
47 | super(MF, self).__init__()
48 | self.k = k
49 | self.n_user = n_user
50 | self.n_item = n_item
51 | self.c_bias = c_bias
52 | self.c_vector = c_vector
53 |
54 | self.user = nn.Embedding(n_user, k)
55 | self.item = nn.Embedding(n_item, k)
56 |
57 | # We've added new terms here:
58 | self.bias_user = nn.Embedding(n_user, 1)
59 | self.bias_item = nn.Embedding(n_item, 1)
60 | self.bias = nn.Parameter(torch.ones(1))
61 |
62 | def forward(self, train_x):
63 | user_id = train_x[:, 0]
64 | item_id = train_x[:, 1]
65 | vector_user = self.user(user_id)
66 | vector_item = self.item(item_id)
67 |
68 | # Pull out biases
69 | bias_user = self.bias_user(user_id).squeeze()
70 | bias_item = self.bias_item(item_id).squeeze()
71 | biases = (self.bias + bias_user + bias_item)
72 |
73 | ui_interaction = torch.sum(vector_user * vector_item, dim=1)
74 |
75 | # Add bias prediction to the interaction prediction
76 | prediction = ui_interaction + biases
77 | return prediction
78 |
79 | def loss(self, prediction, target):
80 |
81 | def l2_regularize(array):
82 | loss = torch.sum(array**2)
83 | return loss
84 |
85 | loss_mse = F.mse_loss(prediction, target.squeeze())
86 |
87 | # Add new regularization to the biases
88 | prior_bias_user = l2_regularize(self.bias_user.weight) * self.c_bias
89 | prior_bias_item = l2_regularize(self.bias_item.weight) * self.c_bias
90 |
91 | prior_user = l2_regularize(self.user.weight) * self.c_vector
92 | prior_item = l2_regularize(self.item.weight) * self.c_vector
93 | total = loss_mse + prior_user + prior_item + prior_bias_user + prior_bias_item
94 | return total
95 |
96 |
97 | def main():
98 | # Data
99 | data = Dataset.load_builtin('ml-1m')
100 | trainset = data.build_full_trainset()
101 | uir = np.array([x for x in trainset.all_ratings()])
102 | train_x = test_x = uir[:, :2].astype(np.int64) # for simplicity
103 | train_y = test_y = uir[:, 2].astype(np.float32)
104 |
105 | # Parameters
106 | lr = 5e-3
107 | k = 100 # latent dimension
108 | c_bias = 1e-6
109 | c_vector = 1e-6
110 | batchsize = 1024
111 | num_epochs = 40
112 |
113 | model = MF(trainset.n_users, trainset.n_items,
114 | k=k, c_bias=c_bias, c_vector=c_vector)
115 | optimizer = torch.optim.Adam(model.parameters(), lr=lr)
116 |
117 |
118 | for epoch in range(num_epochs):
119 | dataloader = Loader(train_x, train_y, batchsize=batchsize)
120 | itr = 0
121 | for batch in dataloader:
122 | itr += 1
123 | prediction = model(batch[0])
124 | loss = model.loss(prediction, batch[1])
125 | optimizer.zero_grad()
126 | loss.backward()
127 | optimizer.step()
128 | if itr % 100 == 0:
129 | print(f"epoch: {epoch}. iteration: {itr}. training loss: {loss}")
130 |
131 | torch.save(model.state_dict(),
132 | "../data/models/recommendation_model_pytorch.pkl")
133 |
134 |
135 | if __name__ == '__main__':
136 | main()
137 |
--------------------------------------------------------------------------------
/dvc_example/data/.gitignore:
--------------------------------------------------------------------------------
1 | /data.xml
2 |
--------------------------------------------------------------------------------
/dvc_example/data/data.xml.dvc:
--------------------------------------------------------------------------------
1 | outs:
2 | - md5: 22a1a2931c8370d3aeedd7183606fd7f
3 | size: 14445097
4 | hash: md5
5 | path: data.xml
6 |
--------------------------------------------------------------------------------
/dvc_example/requirements.txt:
--------------------------------------------------------------------------------
1 | dvc==3.43.1
2 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_food_api/flask_food_service.py:
--------------------------------------------------------------------------------
1 | # load Flask
2 | import flask
3 | import requests
4 |
5 | app = flask.Flask(__name__)
6 |
7 | @app.route("/", methods=["GET"])
8 | def food():
9 |
10 | data = {"success": False}
11 |
12 | if "msg" in flask.request.args:
13 | data['foodname'] = str(flask.request.args['msg'])
14 | try:
15 | req = requests.get(f"https://foodish-api.herokuapp.com/api/images/{data['foodname']}")
16 | data["response"] = req.json()
17 | data["success"] = True
18 | except:
19 | pass
20 | else:
21 | try:
22 | req = requests.get("https://foodish-api.herokuapp.com/api/")
23 | data["response"] = req.json()
24 | data["success"] = True
25 | except:
26 | pass
27 |
28 | if data['success']:
29 | img_str= f"""
30 |
31 | """
32 | else:
33 | img_str= "Food API failed"
34 |
35 | return f"""
36 |
37 |
38 |
39 | Our Funky HTML Page
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | {img_str}
49 |
50 |
51 |
52 |
53 | """
54 |
55 | # start the flask app, allow remote connections
56 | if __name__ == '__main__':
57 | app.run(host='0.0.0.0')
--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/flask_imagenet_improved_service.py:
--------------------------------------------------------------------------------
1 | """
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2019 Avinash Sajjanshetty
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
7 | this software and associated documentation files (the "Software"), to deal in
8 | the Software without restriction, including without limitation the rights to
9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | the Software, and to permit persons to whom the Software is furnished to do so,
11 | subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 |
23 |
24 |
25 | # PyTorch Flask API
26 |
27 |
28 | Check the demo [here](https://pytorch-imagenet.herokuapp.com/).
29 |
30 | If you'd like to check a super simple API server, then check [this repo](https://github.com/avinassh/pytorch-flask-api).
31 |
32 |
33 | ## Requirements
34 |
35 | Runs with Python-3.7.3
36 |
37 | Install them from `requirements.txt`:
38 |
39 | pip install -r requirements.txt
40 |
41 |
42 | requirements.txt has:
43 | Flask==1.0.3
44 | https://download.pytorch.org/whl/cpu/torch-1.0.0-cp37-cp37m-linux_x86_64.whl
45 | torchvision==0.2.1
46 | numpy==1.16.4
47 | Pillow==7.1.0
48 |
49 | ## Local Deployment
50 |
51 | Run the server:
52 |
53 | python app.py
54 |
55 |
56 | ## Heroku Deployment
57 |
58 | [](https://heroku.com/deploy?template=https://github.com/avinassh/pytorch-flask-api-heroku)
59 |
60 |
61 | ## License
62 |
63 | The mighty MIT license. Please check `LICENSE` for more details.
64 |
65 |
66 |
67 | """
68 |
69 |
70 | import os
71 | import json
72 | from flask import Flask, render_template, request, redirect
73 | import io
74 | from PIL import Image
75 | from torchvision import models
76 | import torchvision.transforms as transforms
77 |
78 |
79 | def get_model():
80 | model = models.densenet121(pretrained=True)
81 | model.eval()
82 | return model
83 |
84 |
85 | def transform_image(image_bytes):
86 | my_transforms = transforms.Compose([transforms.Resize(255),
87 | transforms.CenterCrop(224),
88 | transforms.ToTensor(),
89 | transforms.Normalize(
90 | [0.485, 0.456, 0.406],
91 | [0.229, 0.224, 0.225])])
92 | image = Image.open(io.BytesIO(image_bytes))
93 | return my_transforms(image).unsqueeze(0)
94 |
95 |
96 | # ImageNet classes are often of the form `can_opener` or `Egyptian_cat`
97 | # will use this method to properly format it so that we get
98 | # `Can Opener` or `Egyptian Cat`
99 | def format_class_name(class_name):
100 | class_name = class_name.replace('_', ' ')
101 | class_name = class_name.title()
102 | return class_name
103 |
104 | def get_prediction(image_bytes):
105 | try:
106 | tensor = transform_image(image_bytes=image_bytes)
107 | outputs = model.forward(tensor)
108 | except Exception:
109 | return 0, 'error'
110 | _, y_hat = outputs.max(1)
111 | predicted_idx = str(y_hat.item())
112 | return imagenet_class_index[predicted_idx]
113 |
114 |
115 | model = get_model()
116 | imagenet_class_index = json.load(open('../imagenet_class_index.json'))
117 |
118 | app = Flask(__name__)
119 |
120 |
121 | @app.route('/', methods=['GET', 'POST'])
122 | def upload_file():
123 | if request.method == 'POST':
124 | if 'file' not in request.files:
125 | return redirect(request.url)
126 | file = request.files.get('file')
127 | if not file:
128 | return
129 | img_bytes = file.read()
130 | class_id, class_name = get_prediction(image_bytes=img_bytes)
131 | class_name = format_class_name(class_name)
132 | return render_template('result.html', class_id=class_id,
133 | class_name=class_name)
134 | return render_template('index.html')
135 |
136 |
137 | if __name__ == '__main__':
138 | app.run(debug=True, port=int(os.environ.get('PORT', 5000)))
139 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/static/pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/flask_examples/flask_example_imagenet/flask_imagenet_improved/static/pytorch.png
--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/static/style.css:
--------------------------------------------------------------------------------
1 | html,
2 | body {
3 | height: 100%;
4 | }
5 |
6 | body {
7 | display: -ms-flexbox;
8 | display: flex;
9 | -ms-flex-align: center;
10 | align-items: center;
11 | padding-top: 40px;
12 | padding-bottom: 40px;
13 | background-color: #f5f5f5;
14 | }
15 |
16 | .form-signin {
17 | width: 100%;
18 | max-width: 330px;
19 | padding: 15px;
20 | margin: auto;
21 | }
22 |
23 | .form-signin .form-control {
24 | position: relative;
25 | box-sizing: border-box;
26 | height: auto;
27 | padding: 10px;
28 | font-size: 16px;
29 | }
30 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
19 |
20 |
21 | Image Prediction using PyTorch
22 |
23 |
24 |
32 |
33 |
34 |
35 |
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_improved/templates/result.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
19 |
20 |
21 | Image Prediction using PyTorch
22 |
23 |
24 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/flask_imagenet_service.py:
--------------------------------------------------------------------------------
1 | """
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2019 Avinash Sajjanshetty
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
7 | this software and associated documentation files (the "Software"), to deal in
8 | the Software without restriction, including without limitation the rights to
9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | the Software, and to permit persons to whom the Software is furnished to do so,
11 | subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 |
23 |
24 | # PyTorch Flask API
25 |
26 | This repo contains a sample code to show how to create a Flask API server by deploying our PyTorch model. This is a sample code which goes with [tutorial](https://pytorch.org/tutorials/intermediate/flask_rest_api_tutorial.html).
27 |
28 | If you'd like to learn how to deploy to Heroku, then check [this repo](https://github.com/avinassh/pytorch-flask-api-heroku).
29 |
30 |
31 | ## How to
32 |
33 | Install the dependencies:
34 |
35 | pip install -r requirements.txt
36 |
37 | requirements.txt should have:
38 | Flask==1.0.3
39 | torchvision==0.3.0
40 |
41 | Run the Flask server:
42 |
43 | FLASK_ENV=development FLASK_APP=[YOUR-FILE-NAME].py flask run
44 |
45 |
46 | From another tab, send the image file in a request:
47 |
48 | curl -X POST -F file=@cat_pic.jpeg http://localhost:5000/predict
49 |
50 |
51 | ## License
52 |
53 | The mighty MIT license. Please check `LICENSE` for more details.
54 |
55 | """
56 |
57 | import io
58 | import json
59 |
60 | from torchvision import models
61 | import torchvision.transforms as transforms
62 | from PIL import Image
63 | from flask import Flask, jsonify, request
64 |
65 |
66 | app = Flask(__name__)
67 | imagenet_class_index = json.load(open('./imagenet_class_index.json'))
68 | model = models.densenet121(pretrained=True)
69 | model.eval()
70 |
71 |
72 | def transform_image(image_bytes):
73 | my_transforms = transforms.Compose([transforms.Resize(255),
74 | transforms.CenterCrop(224),
75 | transforms.ToTensor(),
76 | transforms.Normalize(
77 | [0.485, 0.456, 0.406],
78 | [0.229, 0.224, 0.225])])
79 | image = Image.open(io.BytesIO(image_bytes))
80 | return my_transforms(image).unsqueeze(0)
81 |
82 |
83 | def get_prediction(image_bytes):
84 | tensor = transform_image(image_bytes=image_bytes)
85 | outputs = model.forward(tensor)
86 | _, y_hat = outputs.max(1)
87 | predicted_idx = str(y_hat.item())
88 | return imagenet_class_index[predicted_idx]
89 |
90 |
91 | @app.route('/predict', methods=['POST'])
92 | def predict():
93 | if request.method == 'POST':
94 | file = request.files['file']
95 | img_bytes = file.read()
96 | class_id, class_name = get_prediction(image_bytes=img_bytes)
97 | return jsonify({'class_id': class_id, 'class_name': class_name})
98 |
99 |
100 | if __name__ == '__main__':
101 | app.run()
102 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_imagenet/imagenet_class_index.json:
--------------------------------------------------------------------------------
1 | {"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]}
--------------------------------------------------------------------------------
/flask_examples/flask_example_recommendation/flask_recommendation_service.py:
--------------------------------------------------------------------------------
1 | # load Flask
2 | import flask
3 | from recommend_pytorch_train import MF
4 | from recommend_pytorch_inf import get_top_n, get_previously_seen
5 | import torch
6 | import pandas as pd
7 | import surprise
8 | import time
9 |
10 |
11 | app = flask.Flask(__name__)
12 |
13 | start_time = time.time()
14 |
15 | # data preload
16 | data = surprise.Dataset.load_builtin('ml-1m')
17 | trainset = data.build_full_trainset()
18 | testset = trainset.build_anti_testset()
19 | movies_df = pd.read_csv('../data/ml-1m/movies.dat',
20 | sep="::", header=None, engine='python')
21 | movies_df.columns = ['iid', 'name', 'genre']
22 | movies_df.set_index('iid', inplace=True)
23 |
24 | # model preload
25 | k = 30 # latent dimension
26 | c_bias = 1e-6
27 | c_vector = 1e-6
28 | model = MF(trainset.n_users, trainset.n_items,
29 | k=k, c_bias=c_bias, c_vector=c_vector)
30 | model.load_state_dict(torch.load(
31 | '../data/models/recommendation_model_pytorch.pkl')) # TODO: prevent overwriting
32 | model.eval()
33 |
34 | print('Model and data preloading completed in ', time.time()-start_time)
35 |
36 |
37 | @app.route("/", methods=["GET"])
38 | def recommend():
39 |
40 | data = {"success": False}
41 |
42 | if "uid" in flask.request.args:
43 |
44 | data['uid'] = str(flask.request.args['uid'])
45 |
46 | try:
47 | data['seen'] = get_previously_seen(
48 | trainset, data['uid'], movies_df)
49 | recommended = get_top_n(
50 | model, testset, trainset, data['uid'], movies_df, n=10)
51 | print(recommended)
52 | data['recommended'] = [x[1] for x in recommended]
53 | data["success"] = True
54 | except:
55 | pass
56 |
57 | return flask.jsonify(data)
58 |
59 |
60 | # start the flask app, allow remote connections
61 | if __name__ == '__main__':
62 | app.run(host='0.0.0.0')
63 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/flask_simple_regression_improved_service.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from flask import Flask, jsonify, request, render_template
3 |
4 |
5 | def get_model(b, A):
6 | def line(x):
7 | return b * x + A
8 | return line
9 |
10 |
11 | model_params = pickle.load(
12 | open('/home/theja/teach/mlops-data/models/simple_regression.pkl', 'rb'))
13 | model = get_model(model_params[0], model_params[1])
14 |
15 |
16 | app = Flask(__name__)
17 |
18 |
19 | @app.route("/", methods=["GET", "POST"])
20 | def predict():
21 |
22 | if "x" in request.args:
23 | try:
24 | return jsonify({'input': request.args['x'], 'prediction': model(float(request.args['x']))})
25 | except:
26 | return jsonify({'success': 'false', 'message': 'Input x was not passed correctly.'})
27 | elif request.method == 'POST':
28 | result = {'x': request.form.get('x'), 'prediction': None}
29 | try:
30 | x = float(request.form['x'])
31 | result['prediction'] = model(float(request.form['x']))
32 | except:
33 | pass
34 | return render_template('result.html', result=result)
35 |
36 | return render_template('index.html')
37 |
38 |
39 | if __name__ == '__main__':
40 | app.run()
41 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/static/pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/flask_examples/flask_example_regression/flask_simple_regression_improved/static/pytorch.png
--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/static/style.css:
--------------------------------------------------------------------------------
1 | html,
2 | body {
3 | height: 100%;
4 | }
5 |
6 | body {
7 | display: -ms-flexbox;
8 | display: flex;
9 | -ms-flex-align: center;
10 | align-items: center;
11 | padding-top: 40px;
12 | padding-bottom: 40px;
13 | background-color: #f5f5f5;
14 | }
15 |
16 | .form-signin {
17 | width: 100%;
18 | max-width: 330px;
19 | padding: 15px;
20 | margin: auto;
21 | }
22 |
23 | .form-signin .form-control {
24 | position: relative;
25 | box-sizing: border-box;
26 | height: auto;
27 | padding: 10px;
28 | font-size: 16px;
29 | }
30 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
9 |
21 |
22 |
23 | Simple Model Serving
24 |
25 |
26 |
27 |
34 |
37 |
40 |
43 |
59 |
60 |
70 |
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_improved/templates/result.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
9 |
21 |
22 |
23 | Prediction
24 |
25 |
26 |
27 |
28 |
34 |
37 |
40 |
43 |
44 |
54 |
87 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/flask_simple_regression_service.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify, request
2 | import flask
3 | import os
4 |
5 | def model(x):
6 | return 2*x+1
7 |
8 | app = Flask(__name__)
9 |
10 | @app.route("/", methods=["GET"])
11 | def hello():
12 | return f"flask version: {flask.__version__}"
13 |
14 | """
15 | Endpoint to make a prediction based on the input parameter 'x'.
16 |
17 | This function handles GET requests to the /mypredict route. It checks if the
18 | 'x' parameter is present in the request arguments. If 'x' is present, it attempts
19 | to convert 'x' to a float and pass it to the model for prediction. The result is
20 | returned as a JSON response containing the input and the prediction. If 'x' is
21 | not present or an error occurs during processing, an error message is returned.
22 |
23 | Returns:
24 | Response: A JSON response containing the input and prediction if successful,
25 | or an error message if 'x' is not provided or an error occurs.
26 | """
27 | @app.route("/mypredict", methods=["GET"])
28 | def predict():
29 | # check if x is in the arguments
30 | if "x" in request.args:
31 | try:
32 | return jsonify({'input': request.args['x'],
33 | 'prediction': model(float(request.args['x']))})
34 | except:
35 | pass
36 |
37 | return jsonify({'success': 'false', 'message': 'Input x was not passed.'})
38 |
39 |
40 | if __name__ == '__main__':
41 | host = os.getenv('FLASK_RUN_HOST', '127.0.0.1')
42 | port = int(os.getenv('FLASK_RUN_PORT', 5000))
43 |
44 | app.run(host=host, port=port)
45 |
--------------------------------------------------------------------------------
/flask_examples/flask_example_regression/requirements.txt:
--------------------------------------------------------------------------------
1 | flask==3.1.0
--------------------------------------------------------------------------------
/flask_examples/flask_example_weather/flask_weather_service.py:
--------------------------------------------------------------------------------
1 | # load Flask
2 | import flask
3 | import requests
4 | from flask import jsonify
5 | from geopy.geocoders import Nominatim
6 |
7 | app = flask.Flask(__name__)
8 |
9 | # define a predict function as an endpoint
10 | @app.route("/", methods=["GET", "POST"])
11 | def weather():
12 |
13 | data = {"success": False}
14 | # https://pypi.org/project/geopy/
15 | geolocator = Nominatim(user_agent="cloud_function_weather_app")
16 |
17 | # Works with post req:
18 | # curl -i -H "Content-Type: application/json" -X POST -d "{\"msg\":\"Chicago\"}" localhost:5000
19 | # params = flask.request.json
20 | # if params is None:
21 | # params = flask.request.args
22 |
23 | if flask.request.is_json:
24 | params = flask.request.json
25 | else:
26 | params = flask.request.args
27 |
28 | # params = request.get_json()
29 | if "msg" in params:
30 | location = geolocator.geocode(str(params["msg"]))
31 | data["location"] = [
32 | location.address,
33 | location.latitude,
34 | location.longitude,
35 | location.altitude,
36 | ]
37 | # https://www.weather.gov/documentation/services-web-api
38 | try:
39 | result1 = requests.get(
40 | f"https://api.weather.gov/points/{location.latitude},{location.longitude}"
41 | )
42 | result2 = requests.get(f"{result1.json()['properties']['forecast']}")
43 | data["response"] = result2.json()
44 | data["success"] = True
45 | except:
46 | pass
47 | return jsonify(data)
48 |
49 |
50 | # start the flask app, allow remote connections
51 | if __name__ == "__main__":
52 | app.run(host="0.0.0.0")
53 |
--------------------------------------------------------------------------------
/github_actions_example/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - name: Checkout code
17 | uses: actions/checkout@v2
18 |
19 | - name: Set up Node.js
20 | uses: actions/setup-node@v2
21 | with:
22 | node-version: '14'
23 |
24 | - name: Install dependencies
25 | run: npm install
26 |
27 | - name: Run tests
28 | run: npm test
29 |
30 | - name: Build
31 | run: npm run build
--------------------------------------------------------------------------------
/github_actions_example/README.md:
--------------------------------------------------------------------------------
1 | # My Project
2 |
3 | This project is a simple JavaScript application that serves as an entry point for demonstrating a GitHub Actions workflow.
4 |
5 | ## Purpose
6 |
7 | The purpose of this project is to showcase how to set up a continuous integration pipeline using GitHub Actions.
8 |
9 | ## Setup
10 |
11 | To set up this project locally, follow these steps:
12 |
13 | 1. Clone the repository:
14 | ```
15 | git clone https://github.com/yourusername/my-project.git
16 | ```
17 |
18 | 2. Navigate into the project directory:
19 | ```
20 | cd my-project
21 | ```
22 |
23 | 3. Install the necessary dependencies (if any):
24 | ```
25 | npm install
26 | ```
27 |
28 | 4. Run the application:
29 | ```
30 | node src/index.js
31 | ```
32 |
33 | ## GitHub Actions
34 |
35 | This project includes a GitHub Actions workflow defined in `.github/workflows/ci.yml` that runs on specified events to ensure code quality and functionality.
--------------------------------------------------------------------------------
/github_actions_example/src/index.js:
--------------------------------------------------------------------------------
1 | console.log("Hello, World! This is the entry point of my project.");
--------------------------------------------------------------------------------
/kafka_example/client_notebooks/consumer_local_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 6,
6 | "id": "b152a484-14df-4835-a31c-36e806ed917f",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "from confluent_kafka import Consumer\n",
11 | "\n",
12 | "conf = {'bootstrap.servers': \"localhost:9092\",\n",
13 | " 'group.id': \"foo\",\n",
14 | " 'enable.auto.commit': False,\n",
15 | " 'auto.offset.reset': 'earliest'}\n",
16 | "\n",
17 | "consumer = Consumer(conf)"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 7,
23 | "id": "fe58d7c8-586f-4437-b4a7-6dbc40be9798",
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "consumer.subscribe([\"mlops-topic\"])"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 14,
33 | "id": "8fb97686-4c56-4c73-a359-bc344576ea2e",
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "msg = consumer.poll(timeout=1.0)"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 15,
43 | "id": "1327eec3-e411-4749-b666-698381a3f3f8",
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "data": {
48 | "text/plain": [
49 | "b'this is mlops course 3'"
50 | ]
51 | },
52 | "execution_count": 15,
53 | "metadata": {},
54 | "output_type": "execute_result"
55 | }
56 | ],
57 | "source": [
58 | "msg.value()"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 5,
64 | "id": "f9008409-40f4-4466-8462-6de38feb36e7",
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "consumer.close()"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "id": "357d6527-f084-4823-9962-77c3a5965785",
75 | "metadata": {},
76 | "outputs": [],
77 | "source": []
78 | }
79 | ],
80 | "metadata": {
81 | "kernelspec": {
82 | "display_name": "Python 3",
83 | "language": "python",
84 | "name": "python3"
85 | },
86 | "language_info": {
87 | "codemirror_mode": {
88 | "name": "ipython",
89 | "version": 3
90 | },
91 | "file_extension": ".py",
92 | "mimetype": "text/x-python",
93 | "name": "python",
94 | "nbconvert_exporter": "python",
95 | "pygments_lexer": "ipython3",
96 | "version": "3.8.2"
97 | }
98 | },
99 | "nbformat": 4,
100 | "nbformat_minor": 5
101 | }
102 |
--------------------------------------------------------------------------------
/kafka_example/client_notebooks/producer_local_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 4,
6 | "id": "18669f83-1d05-4af8-b2c0-12ed1d0920d6",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "from confluent_kafka import Producer\n",
11 | "import socket\n",
12 | "\n",
13 | "conf = {'bootstrap.servers': \"localhost:9092\",\n",
14 | " 'client.id': socket.gethostname()}\n",
15 | "\n",
16 | "producer = Producer(conf)"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 5,
22 | "id": "c4fd9b3b-1bf4-42c8-93aa-282a750262f7",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "producer.produce(\"mlops-topic\", key=\"key\", value=\"this is mlops course\")"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 6,
32 | "id": "ed58b93e-628d-4101-99c0-12f38d491c24",
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "producer.produce(\"mlops-topic\", key=\"key\", value=\"this is mlops course 2\")"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 7,
42 | "id": "f499c9a1-b1c5-41d6-b90d-0114383ef86d",
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "producer.produce(\"mlops-topic\", key=\"key\", value=\"this is mlops course 3\")"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "id": "27022fab-30a8-47f7-b78d-f8b43cb9533a",
53 | "metadata": {},
54 | "outputs": [],
55 | "source": []
56 | }
57 | ],
58 | "metadata": {
59 | "kernelspec": {
60 | "display_name": "Python 3",
61 | "language": "python",
62 | "name": "python3"
63 | },
64 | "language_info": {
65 | "codemirror_mode": {
66 | "name": "ipython",
67 | "version": 3
68 | },
69 | "file_extension": ".py",
70 | "mimetype": "text/x-python",
71 | "name": "python",
72 | "nbconvert_exporter": "python",
73 | "pygments_lexer": "ipython3",
74 | "version": "3.8.2"
75 | }
76 | },
77 | "nbformat": 4,
78 | "nbformat_minor": 5
79 | }
80 |
--------------------------------------------------------------------------------
/kafka_example/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 |
3 | services:
4 | zookeeper:
5 | image: 'confluentinc/cp-zookeeper:latest'
6 | environment:
7 | ZOOKEEPER_CLIENT_PORT: 2181
8 | ZOOKEEPER_TICK_TIME: 2000
9 | ports:
10 | - "2181:2181"
11 | networks:
12 | - kafka-net
13 | volumes:
14 | - zookeeper_data:/var/lib/zookeeper/data
15 | - zookeeper_log:/var/lib/zookeeper/log
16 |
17 | kafka:
18 | image: 'confluentinc/cp-kafka:latest'
19 | depends_on:
20 | - zookeeper
21 | environment:
22 | KAFKA_BROKER_ID: 1
23 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
24 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
25 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
26 | KAFKA_LOG_DIRS: /var/lib/kafka/data
27 | ports:
28 | - "9092:9092"
29 | networks:
30 | - kafka-net
31 | volumes:
32 | - kafka_data:/var/lib/kafka/data
33 |
34 | networks:
35 | kafka-net:
36 | driver: bridge
37 |
38 | volumes:
39 | zookeeper_data:
40 | zookeeper_log:
41 | kafka_data:
--------------------------------------------------------------------------------
/kafka_example/readme.md:
--------------------------------------------------------------------------------
1 | # Kafka with Docker and Zookeeper
2 |
3 | This repository demonstrates how to set up Apache Kafka with Zookeeper using Docker and Docker Compose.
4 |
5 | ## Prerequisites
6 |
7 | Ensure you have the following installed on your machine:
8 | - [Docker](https://www.docker.com/products/docker-desktop)
9 | - [Docker Compose](https://docs.docker.com/compose/install/)
10 |
11 | ## Setup Instructions
12 |
13 | 1. **Clone the repository** using git clone and navigate to kafka example directory.
14 |
15 | 2. **Start the Services**:
16 |
17 | Run the following command to bring up Kafka and Zookeeper:
18 |
19 | ```bash
20 | docker-compose up -d
21 | ```
22 |
23 | This will start the Kafka broker on port `9092` and Zookeeper on port `2181`.
24 |
25 | 3. **Verify Kafka Setup**:
26 |
27 | - To **list the topics**, use:
28 |
29 | ```bash
30 | docker exec -it kafka-topics --bootstrap-server localhost:9092 --list
31 | ```
32 |
33 | - To **create a new topic**, use:
34 |
35 | ```bash
36 | docker exec -it kafka-topics --bootstrap-server localhost:9092 --create --topic test-topic --partitions 1 --replication-factor 1
37 | ```
38 |
39 | Replace `` with the actual name of the Kafka container. You can find the container name by running:
40 |
41 | ```bash
42 | docker ps
43 | ```
44 |
45 | 4. **Kafka CLI Examples**:
46 |
47 | - **Producing messages** to a topic:
48 |
49 | ```bash
50 | docker exec -it kafka-console-producer --bootstrap-server localhost:9092 --topic test-topic
51 | ```
52 |
53 | Type your message and hit Enter to send it.
54 |
55 | - **Consuming messages** from a topic:
56 |
57 | ```bash
58 | docker exec -it kafka-console-consumer --bootstrap-server localhost:9092 --topic test-topic --from-beginning
59 | ```
60 |
61 | ## Useful Commands
62 |
63 | - **Stop services**:
64 |
65 | ```bash
66 | docker-compose down
67 | ```
68 |
69 | - **Restart services**:
70 |
71 | ```bash
72 | docker-compose restart
73 | ```
74 |
75 | ## Troubleshooting
76 |
77 | - If Kafka fails to start, ensure no other service is using port `9092` or `2181`.
78 | - You can check logs using:
79 |
80 | ```bash
81 | docker-compose logs
82 | ```
83 |
84 | ## License
85 |
86 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
87 |
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_imperative/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_imperative/.weather_pod_additional.txt.swp
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_imperative/echo_server_imperative_example.txt:
--------------------------------------------------------------------------------
1 | # Step 1: Deploy a echo server, which just responds back with whatever was sent to it
2 |
3 | kubectl create deployment hello-minikube --image=k8s.gcr.io/echoserver:1.4
4 |
5 | # Step 2: Expost the port via NodePort spec
6 |
7 | kubectl expose deployment hello-minikube --type=NodePort --port=8080
8 |
9 | # Step 3: Pick the port from the result returned from running the above command (it is the latter higher number)
10 |
11 | kubectl get services hello-minikube
12 |
13 | #Step 4: Get the IP of the cluster
14 |
15 | minikube ip
16 |
17 |
18 | # Step 5: Open browser with the ip:port or use curl (with an aexample ip and port as shown below)
19 |
20 | curl -X POST http://192.168.99.101:31313 \
21 | -H "Content-Type: application/json" \
22 | -d '{"productId": 123456, "quantity": 100}'
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_imperative/minikube.sh:
--------------------------------------------------------------------------------
1 | minikube start \
2 | --addons="dashboard" \
3 | --addons="metrics-server"
4 |
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_nginx_replica/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_nginx_replica/.weather_pod_additional.txt.swp
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_nginx_replica/nginx_example.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | creationTimestamp: null
5 | labels:
6 | app: nginx-replica-example
7 | name: nginx-replica-example
8 | spec:
9 | replicas: 3
10 | selector:
11 | matchLabels:
12 | app: nginx-replica-example
13 | strategy: {}
14 | template:
15 | metadata:
16 | creationTimestamp: null
17 | labels:
18 | app: nginx-replica-example
19 | spec:
20 | containers:
21 | - image: nginx:1.18.0
22 | name: nginx-replica-example-k8s
23 | resources: {}
24 | status: {}
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_nginx_replica/nginx_example_additional.md:
--------------------------------------------------------------------------------
1 | ## Before running kubectl apply:
2 |
3 | - Change to minikube's docker thats on the master using
4 | minikube docker-env
5 | eval $(minikube -p minikube docker-env)
6 | - Build the image for this docker runtime (go into the docker_example folder and then) using
7 | docker build -t minikube_weather .
8 |
9 | ## Running kubectl apply:
10 |
11 | kubectl apply -f weather_pod_example.yaml
12 |
13 | ## After running kubectl apply:
14 |
15 | - Expose the container to the world
16 | kubectl expose pod test --type=NodePort --port=5000
17 |
18 | - Make an example request (find the cluster's IP and port as in the imperative example)
19 | curl http://192.168.99.101:30325?msg=Chicago
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_recommendations_pod/recommendation_pod_example.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: recommendations-pod
5 | labels:
6 | ml: pytorch
7 | spec:
8 | containers:
9 | - name: recommendation-service
10 | image: recommendations:latest
11 | imagePullPolicy: Never
12 |
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_deployment/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_weather_deployment/.weather_pod_additional.txt.swp
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_deployment/weather_deployment_example.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | creationTimestamp: null
5 | labels:
6 | app: weather-minikube
7 | name: weather-deployment
8 | spec:
9 | replicas: 1
10 | selector:
11 | matchLabels:
12 | app: weather-minikube
13 | strategy: {}
14 | template:
15 | metadata:
16 | creationTimestamp: null
17 | labels:
18 | app: weather-minikube
19 | spec:
20 | containers:
21 | - image: minikube_weather:latest
22 | name: weather-service-k8s
23 | resources: {}
24 | imagePullPolicy: Never
25 | status: {}
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_deployment/weather_deployment_example_additional.md:
--------------------------------------------------------------------------------
1 | ## Before running kubectl apply:
2 |
3 | - Change to minikube's docker thats on the master using
4 | minikube docker-env
5 | eval $(minikube -p minikube docker-env)
6 | - Build the image for this docker runtime (go into the docker_example folder and then) using
7 | docker build -t minikube_weather .
8 |
9 | ## Running kubectl apply:
10 |
11 | kubectl apply -f weather_pod_example.yaml
12 |
13 | ## After running kubectl apply:
14 |
15 | - Expose the container to the world
16 | kubectl expose pod test --type=NodePort --port=5000
17 |
18 | - Make an example request (find the cluster's IP and port as in the imperative example)
19 | curl http://192.168.99.101:30325?msg=Chicago
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_pod/.weather_pod_additional.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thejat/mlops-code-examples/26def3f2ecd4dd3b98ac3c5dffbaa46d6053b9b9/kubernetes_examples/kubernetes_example_weather_pod/.weather_pod_additional.txt.swp
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_pod/weather_pod_example.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: weather-pod
5 | labels:
6 | site: myhomepage
7 | spec:
8 | containers:
9 | - name: test
10 | image: minikube_weather:latest
11 | imagePullPolicy: Never
--------------------------------------------------------------------------------
/kubernetes_examples/kubernetes_example_weather_pod/weather_pod_example_additional.md:
--------------------------------------------------------------------------------
1 | ## Before running kubectl apply:
2 |
3 | - Change to minikube's docker thats on the master using
4 | minikube docker-env
5 | eval $(minikube -p minikube docker-env)
6 | - Build the image for this docker runtime (go into the docker_example folder and then) using
7 | docker build -t minikube_weather .
8 |
9 | ## Running kubectl apply:
10 |
11 | kubectl apply -f weather_pod_example.yaml
12 |
13 | ## After running kubectl apply:
14 |
15 | - Expose the container to the world
16 | kubectl expose pod test --type=NodePort --port=5000
17 |
18 | - Make an example request (find the cluster's IP and port as in the imperative example)
19 | curl http://192.168.99.101:30325?msg=Chicago
--------------------------------------------------------------------------------
/lambda_function_example/lambda_function.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | # top_n = {'196':[(1,3),(2,4)]}
4 | # movie_dict = {1:{'name':'a'},2:{'name':'b'}}
5 |
6 | def lambda_handler(event,context):
7 | data = {"success": False}
8 |
9 |
10 | with open("top_n.json", "r") as read_file:
11 | top_n = json.load(read_file)
12 | with open("movie_dict.json", "r") as read_file:
13 | movie_dict = json.load(read_file)
14 |
15 |
16 | print(event) #debug
17 | if "body" in event:
18 | event = event["body"]
19 | if event is not None:
20 | event = json.loads(event)
21 | else:
22 | event = {}
23 |
24 | if "uid" in event:
25 | data["response"] = str([movie_dict.get(iid,{'name':None})['name'] for (iid, _) in top_n[event.get("uid")]])
26 | data["success"] = True
27 |
28 | return {
29 | 'statusCode': 200,
30 | 'headers':{'Content-Type':'application/json'},
31 | 'body': json.dumps(data)
32 | }
--------------------------------------------------------------------------------
/mlflow_example/mlflow_example.py:
--------------------------------------------------------------------------------
1 | import mlflow
2 | from mlflow.models import infer_signature
3 | from sklearn import datasets
4 | from sklearn.model_selection import train_test_split
5 | from sklearn.linear_model import LogisticRegression
6 | from sklearn.metrics import accuracy_score
7 |
8 |
9 | # Load the Iris dataset
10 | X, y = datasets.load_iris(return_X_y=True)
11 |
12 | # Split the data into training and test sets
13 | X_train, X_test, y_train, y_test = train_test_split(
14 | X, y, test_size=0.2, random_state=42
15 | )
16 |
17 | # Define the model hyperparameters
18 | params = {
19 | "solver": "lbfgs",
20 | "max_iter": 10,
21 | "multi_class": "auto",
22 | "random_state": 8888,
23 | }
24 |
25 | # Train the model
26 | lr = LogisticRegression(**params)
27 | lr.fit(X_train, y_train)
28 |
29 | # Predict on the test set
30 | y_pred = lr.predict(X_test)
31 |
32 | # Calculate metrics
33 | accuracy = accuracy_score(y_test, y_pred)
34 |
35 |
36 | # Set our tracking server uri for logging
37 | mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")
38 |
39 | # Create a new MLflow Experiment
40 | mlflow.set_experiment("MLops Course")
41 |
42 | # Start an MLflow run
43 | with mlflow.start_run():
44 | # Log the hyperparameters
45 | mlflow.log_params(params)
46 |
47 | # Log the loss metric
48 | mlflow.log_metric("accuracy", accuracy)
49 |
50 | # Set a tag that we can use to remind ourselves what this run was for
51 | mlflow.set_tag("Training Info", "Basic LR model for iris data")
52 |
53 | # Infer the model signature
54 | signature = infer_signature(X_train, lr.predict(X_train))
55 |
56 | # Log the model
57 | model_info = mlflow.sklearn.log_model(
58 | sk_model=lr,
59 | artifact_path="iris_model",
60 | signature=signature,
61 | input_example=X_train,
62 | registered_model_name="tracking-quickstart",
63 | )
64 |
--------------------------------------------------------------------------------
/mlflow_example/requirements.txt:
--------------------------------------------------------------------------------
1 | mlflow==2.20.1
2 |
--------------------------------------------------------------------------------
/model_example_regression/simple_regression_inf.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pickle
3 | import matplotlib.pyplot as plt
4 |
5 |
6 | def get_model(b, A):
7 | def line(x):
8 | return b * x + A
9 | return line
10 |
11 |
12 | if __name__ == "__main__":
13 |
14 | model_params = pickle.load(
15 | open('../data/models/simple_regression.pkl', 'rb'))
16 | model = get_model(model_params[0], model_params[1])
17 |
18 | X = np.linspace(start=-1, stop=1, num=50)
19 | Ypred = [model(x) for x in X]
20 | plt.plot(X, Ypred)
21 | plt.title('Simple regression.')
22 | plt.ylabel('y predicted values')
23 | plt.xlabel('x values')
24 | plt.show()
25 |
--------------------------------------------------------------------------------
/model_example_regression/simple_regression_train.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import math
3 | import pickle
4 |
5 |
6 | def fit(X, Y):
7 | """
8 | From https://code.activestate.com/recipes/578914-simple-linear-regression-with-pure-python/
9 | """
10 |
11 | def mean(Xs):
12 | return sum(Xs) / len(Xs)
13 |
14 | m_X = mean(X)
15 | m_Y = mean(Y)
16 |
17 | def std(Xs, m):
18 | normalizer = len(Xs) - 1
19 | return math.sqrt(sum((pow(x - m, 2) for x in Xs)) / normalizer)
20 |
21 | def pearson_r(Xs, Ys):
22 |
23 | sum_xy = 0
24 | sum_sq_v_x = 0
25 | sum_sq_v_y = 0
26 |
27 | for (x, y) in zip(Xs, Ys):
28 | var_x = x - m_X
29 | var_y = y - m_Y
30 | sum_xy += var_x * var_y
31 | sum_sq_v_x += pow(var_x, 2)
32 | sum_sq_v_y += pow(var_y, 2)
33 | return sum_xy / math.sqrt(sum_sq_v_x * sum_sq_v_y)
34 |
35 | r = pearson_r(X, Y)
36 |
37 | b = r * (std(Y, m_Y) / std(X, m_X))
38 | A = m_Y - b * m_X
39 |
40 | def line(x):
41 | return b * x + A
42 |
43 | return line, [b, A]
44 |
45 |
46 | if __name__ == "__main__":
47 |
48 | X = np.array([1, 2, 3, 5, 22, -10])
49 | Y = 2.5*X + 3 # y = 1 * x_0 + 2 * x_1 + 3
50 |
51 | model, model_params = fit(X, Y)
52 | print('2', model(2))
53 | print('-1', model(-1))
54 | print('0', model(0))
55 |
56 | pickle.dump(model_params, open(
57 | '../data/models/simple_regression.pkl', 'wb'))
58 |
--------------------------------------------------------------------------------
/pyspark_example/app/example.py:
--------------------------------------------------------------------------------
1 | # your_script.py
2 | from pyspark.sql import SparkSession
3 |
4 | # Create a Spark session
5 | spark = SparkSession.builder.appName("PySpark Example").getOrCreate()
6 |
7 | # Create a simple DataFrame
8 | data = [("John", 30), ("Jane", 25), ("Sam", 35)]
9 | df = spark.createDataFrame(data, ["Name", "Age"])
10 |
11 | # Show the DataFrame
12 | df.show()
13 |
14 | # Stop the Spark session
15 | spark.stop()
--------------------------------------------------------------------------------
/pyspark_example/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | spark-master:
4 | image: bitnami/spark:latest
5 | container_name: spark-master
6 | environment:
7 | - SPARK_MODE=master
8 | - SPARK_RPC_AUTHENTICATION_ENABLED=no
9 | - SPARK_RPC_ENCRYPTION_ENABLED=no
10 | - SPARK_SSL_ENABLED=no
11 | ports:
12 | - "8086:8080"
13 | - "7077:7077"
14 |
15 | spark-worker-1:
16 | image: bitnami/spark:latest
17 | container_name: spark-worker-1
18 | environment:
19 | - SPARK_MODE=worker
20 | - SPARK_MASTER_URL=spark://spark-master:7077
21 | - SPARK_WORKER_MEMORY=1G
22 | - SPARK_WORKER_CORES=1
23 | depends_on:
24 | - spark-master
25 | ports:
26 | - "8081:8081"
27 |
28 | spark-worker-2:
29 | image: bitnami/spark:latest
30 | container_name: spark-worker-2
31 | environment:
32 | - SPARK_MODE=worker
33 | - SPARK_MASTER_URL=spark://spark-master:7077
34 | - SPARK_WORKER_MEMORY=1G
35 | - SPARK_WORKER_CORES=1
36 | depends_on:
37 | - spark-master
38 | ports:
39 | - "8082:8082"
40 |
41 | spark-pyspark:
42 | image: bitnami/spark:latest
43 | container_name: spark-pyspark
44 | environment:
45 | - SPARK_MODE=client
46 | depends_on:
47 | - spark-master
48 | volumes:
49 | - ./app:/app
50 | command: "spark-submit --master spark://spark-master:7077 /app/example.py"
51 |
--------------------------------------------------------------------------------
/pyspark_example/readme.md:
--------------------------------------------------------------------------------
1 | # Apache Spark Cluster with PySpark using Docker
2 |
3 | This project demonstrates how to set up an Apache Spark cluster with PySpark using Docker and Docker Compose. The setup includes a Spark master node, two Spark worker nodes, and a PySpark client that can submit jobs to the cluster.
4 |
5 | ## Prerequisites
6 |
7 | Ensure you have the following installed on your machine:
8 | - [Docker](https://www.docker.com/products/docker-desktop)
9 | - [Docker Compose](https://docs.docker.com/compose/install/)
10 |
11 | ## Setup Instructions
12 |
13 | 1. **Clone this repository** or create the following `docker-compose.yml` file in your project directory:
14 |
15 | ```yaml
16 | version: '3'
17 | services:
18 | spark-master:
19 | image: bitnami/spark:latest
20 | container_name: spark-master
21 | environment:
22 | - SPARK_MODE=master
23 | - SPARK_RPC_AUTHENTICATION_ENABLED=no
24 | - SPARK_RPC_ENCRYPTION_ENABLED=no
25 | - SPARK_SSL_ENABLED=no
26 | ports:
27 | - "8080:8080"
28 | - "7077:7077"
29 |
30 | spark-worker-1:
31 | image: bitnami/spark:latest
32 | container_name: spark-worker-1
33 | environment:
34 | - SPARK_MODE=worker
35 | - SPARK_MASTER_URL=spark://spark-master:7077
36 | - SPARK_WORKER_MEMORY=1G
37 | - SPARK_WORKER_CORES=1
38 | depends_on:
39 | - spark-master
40 | ports:
41 | - "8081:8081"
42 |
43 | spark-worker-2:
44 | image: bitnami/spark:latest
45 | container_name: spark-worker-2
46 | environment:
47 | - SPARK_MODE=worker
48 | - SPARK_MASTER_URL=spark://spark-master:7077
49 | - SPARK_WORKER_MEMORY=1G
50 | - SPARK_WORKER_CORES=1
51 | depends_on:
52 | - spark-master
53 | ports:
54 | - "8082:8082"
55 |
56 | spark-pyspark:
57 | image: bitnami/spark:latest
58 | container_name: spark-pyspark
59 | environment:
60 | - SPARK_MODE=client
61 | depends_on:
62 | - spark-master
63 | volumes:
64 | - ./app:/app
65 | command: "spark-submit --master spark://spark-master:7077 /app/example.py"
66 | ```
67 |
68 | 2. **Create the PySpark Script**:
69 |
70 | Create a directory called `app`, and inside it, create a PySpark script named `example.py` with the following content:
71 |
72 | ```python
73 | # your_script.py
74 | from pyspark.sql import SparkSession
75 |
76 | # Create a Spark session
77 | spark = SparkSession.builder.appName("PySpark Example").getOrCreate()
78 |
79 | # Create a simple DataFrame
80 | data = [("John", 30), ("Jane", 25), ("Sam", 35)]
81 | df = spark.createDataFrame(data, ["Name", "Age"])
82 |
83 | # Show the DataFrame
84 | df.show()
85 |
86 | # Stop the Spark session
87 | spark.stop()
88 | ```
89 |
90 | 3. **Start the Spark Cluster**:
91 |
92 | Run the following command to start the Spark master and worker nodes, as well as the PySpark client:
93 |
94 | ```bash
95 | docker-compose up -d
96 | ```
97 |
98 | This will launch:
99 | - Spark Master on `http://localhost:8080`
100 | - Two Spark Workers on `http://localhost:8081` and `http://localhost:8082`
101 | - PySpark client that runs the job defined in `your_script.py`.
102 |
103 | 4. **Check the Spark UI**:
104 |
105 | - **Spark Master UI**: Visit `http://localhost:8080` to monitor the Spark master.
106 | - **Worker UIs**: Workers are available at `http://localhost:8081` and `http://localhost:8082`.
107 |
108 | 5. **Submit the PySpark Job**:
109 |
110 | The PySpark job (`example.py`) is automatically submitted when you start the containers. To check the logs of the PySpark job, run:
111 |
112 | ```bash
113 | docker logs spark-pyspark
114 | ```
115 |
116 | 6. **Stop the Cluster**:
117 |
118 | To stop the Spark cluster, use:
119 |
120 | ```bash
121 | docker-compose down
122 | ```
123 |
124 | ## Useful Commands
125 |
126 | - **Start the cluster**: `docker-compose up -d`
127 | - **Stop the cluster**: `docker-compose down`
128 | - **Check PySpark job logs**: `docker logs spark-pyspark`
129 | - **Check running containers**: `docker ps`
130 | - **Restart the cluster**: `docker-compose restart`
131 |
132 | ## Directory Structure
133 |
134 | . ├── docker-compose.yml └── app └── example.py
135 |
136 |
137 | ## Troubleshooting
138 |
139 | - **Ports Conflict**: Ensure that ports `8080`, `7077`, `8081`, and `8082` are not being used by other services on your machine.
140 | - **Logs**: Check logs for more detailed error messages using:
141 |
142 | ```bash
143 | docker-compose logs
144 | ```
145 |
146 | ## License
147 |
148 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
149 |
--------------------------------------------------------------------------------
/pytest_example/test_function_example.py:
--------------------------------------------------------------------------------
1 | # content of test_sample.py
2 | def inc(x):
3 | return x + 1
4 |
5 |
6 | def test_answer():
7 | assert inc(3) == 4
8 |
9 | def test_answer2():
10 | assert inc(3) == 4
11 |
12 | def test_answer3():
13 | assert inc(3) == 4
14 |
15 | def test_answer4():
16 | assert inc(3) == 5 # This test will fail
--------------------------------------------------------------------------------
/ray_cluster_example/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use the official Ray Docker image
2 | FROM rayproject/ray:latest
3 |
4 | # Install additional dependencies (if necessary)
5 | # RUN pip install
6 |
7 | # Set environment variables to suppress unnecessary warnings
8 | ENV PYTHONUNBUFFERED=1
9 |
--------------------------------------------------------------------------------
/ray_cluster_example/app/example.py:
--------------------------------------------------------------------------------
1 | import ray
2 |
3 | # Initialize Ray, automatically connects to the Ray cluster
4 | ray.init(address='auto')
5 |
6 | @ray.remote
7 | def square(x):
8 | return x * x
9 |
10 | if __name__ == "__main__":
11 | # Distribute computation across the Ray cluster
12 | futures = [square.remote(i) for i in range(100)]
13 | results = ray.get(futures)
14 |
15 | print(results)
--------------------------------------------------------------------------------
/ray_cluster_example/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | services:
2 | ray-head:
3 | build: .
4 | command: >
5 | bash -c "ray start --head --port=6379 --dashboard-host 0.0.0.0 && tail -f /dev/null"
6 | ports:
7 | - "8265:8265" # Ray Dashboard
8 | - "6380:6379" # Change external port to 6380, while internal remains 6379 for Ray
9 | volumes:
10 | - ./app:/app # Optional: Mount your app directory
11 | environment:
12 | - PYTHONUNBUFFERED=1
13 | networks:
14 | - ray-network
15 |
16 | ray-worker:
17 | build: .
18 | command: >
19 | bash -c "ray start --address='ray-head:6379' && tail -f /dev/null"
20 | depends_on:
21 | - ray-head
22 | volumes:
23 | - ./app:/app # Optional: Mount your app directory
24 | environment:
25 | - PYTHONUNBUFFERED=1
26 | networks:
27 | - ray-network
28 | deploy:
29 | replicas: 2 # Number of worker nodes
30 |
31 | networks:
32 | ray-network:
33 | driver: bridge
34 |
--------------------------------------------------------------------------------
/ray_cluster_example/readme.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Ray Cluster Example using Docker Compose
4 |
5 | This project sets up a **Ray cluster** using Docker Compose. The cluster consists of a head node and multiple worker nodes, allowing for distributed computation and parallel task execution with Ray.
6 |
7 | ## Table of Contents
8 |
9 | - [Prerequisites](#prerequisites)
10 | - [Getting Started](#getting-started)
11 | - [1. Clone the Repository](#1-clone-the-repository)
12 | - [2. Build the Docker Images](#2-build-the-docker-images)
13 | - [3. Start the Ray Cluster](#3-start-the-ray-cluster)
14 | - [4. Access the Ray Dashboard](#4-access-the-ray-dashboard)
15 | - [5. Running a Distributed Task](#5-running-a-distributed-task)
16 | - [Scaling the Cluster](#scaling-the-cluster)
17 | - [Stopping the Cluster](#stopping-the-cluster)
18 | - [Troubleshooting](#troubleshooting)
19 |
20 | ## Prerequisites
21 |
22 | Make sure you have the following installed on your system:
23 |
24 | - [Docker](https://www.docker.com/get-started)
25 | - [Docker Compose](https://docs.docker.com/compose/install/)
26 |
27 | ## Getting Started
28 |
29 | ### 1. Clone the Repository
30 |
31 | ```bash
32 | git clone https://github.com/thejat/mlops-code-examples.git
33 | cd mlops-code-examples
34 | cd ray_cluster_example
35 | ```
36 |
37 | ### 2. Build the Docker Images
38 |
39 | To build the Ray head and worker Docker images:
40 |
41 | ```bash
42 | docker-compose build
43 | ```
44 |
45 | ### 3. Start the Ray Cluster
46 |
47 | To start the Ray cluster, including one head node and two worker nodes:
48 |
49 | ```bash
50 | docker-compose up -d
51 | ```
52 |
53 | ### 4. Access the Ray Dashboard
54 |
55 | You can monitor the Ray cluster through the **Ray Dashboard**. The dashboard will be available at [http://localhost:8265](http://localhost:8265).
56 |
57 | ### 5. Running a Distributed Task
58 |
59 | Once the Ray cluster is running, you can submit jobs or run distributed tasks. For example, you can create a simple script like `example.py` in the `app` directory:
60 |
61 | ```python
62 | import ray
63 |
64 | # Connect to the Ray cluster
65 | ray.init(address='auto')
66 |
67 | @ray.remote
68 | def square(x):
69 | return x * x
70 |
71 | if __name__ == "__main__":
72 | # Distribute tasks across the Ray cluster
73 | results = ray.get([square.remote(i) for i in range(100)])
74 | print(results)
75 | ```
76 |
77 | To run the script inside the head node container:
78 |
79 | ```bash
80 | docker exec -it ray_cluster_example-ray-head-1 python /app/example.py
81 | ```
82 |
83 | This will distribute the computation across the Ray cluster and return the results.
84 |
85 | ## Scaling the Cluster
86 |
87 | To scale the number of worker nodes up or down, adjust the `deploy.replicas` value in the `docker-compose.yml` file under the `ray-worker` service:
88 |
89 | ```yaml
90 | deploy:
91 | replicas: 4 # Number of worker nodes
92 | ```
93 |
94 | Then apply the changes:
95 |
96 | ```bash
97 | docker-compose up -d --scale ray-worker=4
98 | ```
99 |
100 | ## Stopping the Cluster
101 |
102 | To stop the cluster and remove the containers:
103 |
104 | ```bash
105 | docker-compose down
106 | ```
107 |
108 | ## Troubleshooting
109 |
110 | - **Port Conflict**: If you encounter a port conflict on `6379` (Redis or Ray head port), edit the `docker-compose.yml` file and change the external port for the Ray head service. For example, use `6380:6379` to avoid conflicts.
111 |
112 | - **Containers Exiting Immediately**: Ensure that the containers remain running by using `tail -f /dev/null` in the `docker-compose.yml` to keep the head and worker nodes alive after starting Ray.
113 |
--------------------------------------------------------------------------------