├── tests
    ├── __init__.py
    └── test_model_task.py
├── requirements.txt
├── blog_post
    ├── software_architecture.png
    └── post.md
├── model_task_queue
    ├── __main__.py
    ├── __init__.py
    ├── config.py
    ├── celery.py
    └── ml_model_task.py
├── test_requirements.txt
├── scripts
    ├── simple_test.py
    └── concurrent_test.py
├── LICENSE
├── README.md
├── .gitignore
├── setup.py
└── Makefile


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/schmidtbri/ml-model-abc-improvements#egg=iris_model
2 | celery[redis,librabbitmq]==4.3.0
3 | 


--------------------------------------------------------------------------------
/blog_post/software_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schmidtbri/task-queue-ml-model-deployment/HEAD/blog_post/software_architecture.png


--------------------------------------------------------------------------------
/model_task_queue/__main__.py:
--------------------------------------------------------------------------------
 1 | """Main entry point for the Celery worker process.
 2 | 
 3 | ..note::
 4 |     This script is here to start a worker process from the deployment package.
 5 | 
 6 | """
 7 | import sys
 8 | from model_task_queue.celery import app
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     # starting the Celery app in worker mode
13 |     app.start(argv=['celery', 'worker'] + sys.argv[1:])
14 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
 1 | # these packages are used to automate unit tests
 2 | atomicwrites==1.3.0
 3 | attrs==19.1.0
 4 | importlib-metadata==0.21
 5 | more-itertools==7.2.0
 6 | packaging==19.1
 7 | pluggy==0.13.0
 8 | py==1.10.0
 9 | pyparsing==2.4.2
10 | pytest==5.1.2
11 | six==1.12.0
12 | wcwidth==0.1.7
13 | zipp==0.6.0
14 | # these packages are used to do code analysis
15 | mccabe==0.6.1
16 | pycodestyle==2.5.0
17 | pydocstyle==4.0.1
18 | pyflakes==2.1.1
19 | pylama==7.7.1
20 | snowballstemmer==1.9.1
21 | # this package is used to calculate unit test coverage
22 | coverage==4.5.4


--------------------------------------------------------------------------------
/scripts/simple_test.py:
--------------------------------------------------------------------------------
 1 | """A script to test the model task queue with a single request"""
 2 | import time
 3 | from model_task_queue.celery import app
 4 | 
 5 | 
 6 | def run_test():
 7 | 
 8 |     task = app.tasks["model_task_queue.ml_model_task.iris_model"]
 9 | 
10 |     result = task.delay(data={"sepal_length": 5.0, "sepal_width": 3.2, "petal_length": 1.2, "petal_width": 0.2})
11 | 
12 |     # waiting for the task to complete
13 |     while result.ready() is not True:
14 |         time.sleep(1)
15 | 
16 |     prediction = result.get(timeout=1)
17 |     print("The task returned this prediction: {}".format(prediction))
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     run_test()
22 | 


--------------------------------------------------------------------------------
/model_task_queue/__init__.py:
--------------------------------------------------------------------------------
 1 | """Simple task queue application that makes predictions with an MLModel class."""
 2 | import os
 3 | import sys
 4 | import logging
 5 | 
 6 | __version_info__ = ('1', '0', '0')
 7 | __version__ = '.'.join(__version_info__)
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | # adding "vendors" folder as a site directory if it is found in the environment, this allows the application to run
12 | # from a deployment package as well as from a virtual environment with no changes to the code
13 | vendors_path = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), "vendors")
14 | if os.path.exists(vendors_path):
15 |     sys.path = [vendors_path] + sys.path
16 |     logger.info("Found a vendors folder at: '{}', adding it to the python path.".format(vendors_path))
17 | 


--------------------------------------------------------------------------------
/model_task_queue/config.py:
--------------------------------------------------------------------------------
 1 | """Configuration settings for the celery application."""
 2 | 
 3 | 
 4 | class Config(object):
 5 |     """Configuration for all environments."""
 6 | 
 7 |     models = [
 8 |         {
 9 |             "module_name": "iris_model.iris_predict",
10 |             "class_name": "IrisModel"
11 |         }
12 |     ]
13 | 
14 | 
15 | class ProdConfig(Config):
16 |     """Configuration for the prod environment."""
17 | 
18 |     broker_url = 'redis://localhost:6379/0'
19 |     result_backend = 'redis://localhost:6379/0'
20 | 
21 | 
22 | class BetaConfig(Config):
23 |     """Configuration for the beta environment."""
24 | 
25 |     broker_url = 'redis://localhost:6379/0'
26 |     result_backend = 'redis://localhost:6379/0'
27 | 
28 | 
29 | class TestConfig(Config):
30 |     """Configuration for the test environment."""
31 | 
32 |     broker_url = 'redis://localhost:6379/0'
33 |     result_backend = 'redis://localhost:6379/0'
34 | 
35 | 
36 | class DevConfig(Config):
37 |     """Configuration for the dev environment."""
38 | 
39 |     broker_url = 'redis://localhost:6379/0'
40 |     result_backend = 'redis://localhost:6379/0'
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 schmidtbri
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/model_task_queue/celery.py:
--------------------------------------------------------------------------------
 1 | """A task queue for machine learning model deployment."""
 2 | # this is imported here so that the python path is set correctly before importing Celery when executing outside of a virtual environment
 3 | import model_task_queue
 4 | import os
 5 | from celery import Celery
 6 | from celery.app.registry import TaskRegistry
 7 | 
 8 | from model_task_queue import __name__
 9 | from model_task_queue.ml_model_task import MLModelPredictionTask
10 | from model_task_queue.config import Config
11 | 
12 | 
13 | # creating a TaskRegistry in order to be able to instantiate a dynamic number of task in the celery app
14 | registry = TaskRegistry()
15 | 
16 | # instantiating the MLModelPredictionTask objects and adding them to a TaskRegistry object
17 | for model in Config.models:
18 |     registry.register(MLModelPredictionTask(module_name=model["module_name"],
19 |                                             class_name=model["class_name"]))
20 | 
21 | # instantiating the Celery app object
22 | app = Celery(__name__,
23 |              tasks=registry)
24 | 
25 | # importing the connection settings
26 | app.config_from_object("model_task_queue.config:{}".format(os.environ['APP_SETTINGS']))
27 | 


--------------------------------------------------------------------------------
/scripts/concurrent_test.py:
--------------------------------------------------------------------------------
 1 | """A script to test the model task queue with a concurrent requests"""
 2 | import time
 3 | from concurrent.futures import ThreadPoolExecutor as Executor
 4 | from model_task_queue.celery import app
 5 | 
 6 | 
 7 | def request_task(data):
 8 |     task = app.tasks["model_task_queue.ml_model_task.iris_model"]
 9 | 
10 |     result = task.delay(data=data)
11 | 
12 |     # waiting for the task to complete
13 |     while result.ready() is not True:
14 |         time.sleep(1)
15 | 
16 |     prediction = result.get(timeout=1)
17 |     return prediction
18 | 
19 | 
20 | def run_test():
21 |     data = [
22 |         {"sepal_length": 5.0, "sepal_width": 3.2, "petal_length": 1.2, "petal_width": 0.2},
23 |         {"sepal_length": 5.5, "sepal_width": 3.5, "petal_length": 1.3, "petal_width": 0.2},
24 |         {"sepal_length": 4.9, "sepal_width": 3.1, "petal_length": 1.5, "petal_width": 0.1},
25 |         {"sepal_length": 4.4, "sepal_width": 3.0, "petal_length": 1.3, "petal_width": 0.2}
26 |     ]
27 |     with Executor(max_workers=4) as exe:
28 |         jobs = [exe.submit(request_task, d) for d in data]
29 |         results = [job.result() for job in jobs]
30 |         print("The tasks returned these predictions: {}".format(results))
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     run_test()
35 | 


--------------------------------------------------------------------------------
/model_task_queue/ml_model_task.py:
--------------------------------------------------------------------------------
 1 | """Celery task for making ML Model predictions."""
 2 | import importlib
 3 | from celery import Task
 4 | 
 5 | from ml_model_abc import MLModel
 6 | 
 7 | 
 8 | class MLModelPredictionTask(Task):
 9 |     """Celery Task for making ML Model predictions."""
10 | 
11 |     def __init__(self, module_name, class_name):
12 |         """Class constructor."""
13 |         super().__init__()
14 |         self._model = None
15 | 
16 |         model_module = importlib.import_module(module_name)
17 |         model_class = getattr(model_module, class_name)
18 | 
19 |         if issubclass(model_class, MLModel) is False:
20 |             raise ValueError("MLModelPredictionTask can only be used with subtypes of MLModel.")
21 | 
22 |         # saving a reference to the class to avoid having to import it again
23 |         self._model_class = model_class
24 | 
25 |         # dynamically adding a name to the task object
26 |         self.name = "{}.{}".format(__name__, model_class.qualified_name)
27 | 
28 |     def initialize(self):
29 |         """Class initialization."""
30 |         model_object = self._model_class()
31 |         self._model = model_object
32 | 
33 |     def run(self, data):
34 |         """Execute predictions with the MLModel class."""
35 |         if self._model is None:
36 |             self.initialize()
37 |         return self._model.predict(data=data)
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Task Queue ML Model Deployment
 2 | Code to deploy an ML model as a task in a task queue.
 3 | 
 4 | This code is used in this [blog post](https://towardsdatascience.com/a-task-queue-ml-model-deployment-552d2ceb38a5).
 5 | 
 6 | ## Installation 
 7 | The makefile included with this project contains targets that help to automate several tasks.
 8 | 
 9 | To download the source code execute this command:
10 | ```bash
11 | git clone https://github.com/schmidtbri/task-queue-ml-model-deployment
12 | ```
13 | Then create a virtual environment and activate it:
14 | ```bash
15 | 
16 | # go into the project directory
17 | cd task-queue-ml-model-deployment
18 | 
19 | make venv
20 | 
21 | source venv/bin/activate
22 | ```
23 | 
24 | Install the dependencies:
25 | ```bash
26 | make dependencies
27 | ```
28 | 
29 | ## Running the unit tests
30 | To run the unit test suite execute these commands:
31 | ```bash
32 | 
33 | # first install the test dependencies
34 | make test-dependencies
35 | 
36 | # run the test suite
37 | make test
38 | ```
39 | 
40 | ## Making a Deployment Package
41 | To create a tarball deployment package for the worker nodes, use this command:
42 | ```bash
43 | make deployment-package
44 | ```
45 | 
46 | ### Starting a Worker Process
47 | To start a worker process execute these commands:
48 | ```bash
49 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
50 | export APP_SETTINGS=ProdConfig
51 | export PYTHONPATH=./
52 | python3 -m model_task_queue --loglevel INFO
53 | ```
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | venv/
 88 | venv
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .idea/
107 | 
108 | .DS_Store/
109 | 
110 | reports/
111 | 
112 | vendors/


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from os import path
 4 | from io import open
 5 | from setuptools import setup
 6 | 
 7 | from model_task_queue import __name__, __version__, __doc__
 8 | 
 9 | # Get the long description from the README file
10 | here = path.abspath(path.dirname(__file__))
11 | with open(path.join(here, 'README.md'), encoding='utf-8') as f:
12 |     long_description = f.read()
13 | 
14 | 
15 | # this command line option is used when we want to create a deployment package
16 | if "--create_deployment_package" in sys.argv:
17 |     sys.argv.remove("--create_deployment_package")
18 | 
19 |     # getting the full list of files in the "vendors" folder
20 |     directory = path.abspath(path.join(path.dirname(path.dirname(__file__)), "vendors"))
21 | 
22 |     # create a recursive list of paths to files in a directory
23 |     extra_files = []
24 |     for (path, directories, filenames) in os.walk(directory):
25 |         for filename in filenames:
26 |             extra_files.append(os.path.join('..', path, filename))
27 | 
28 |     # removing files with .pyc, .pyo. and __pycache__ in their path
29 |     extra_files = [s for s in extra_files if ".pyc" not in s and ".pyo" not in s and "__pycache__" not in s]
30 | 
31 |     # creating a parameter to send to setup function
32 |     package_data = {'': extra_files}
33 | else:
34 |     package_data = {'': []}
35 | 
36 | 
37 | setup(
38 |     name=__name__,
39 |     version=__version__,
40 |     description=__doc__,
41 |     long_description=long_description,
42 |     long_description_content_type="text/markdown",
43 |     url="https://github.com/schmidtbri/task-queue-ml-model-deployment",
44 |     author="Brian Schmidt",
45 |     author_email="6666331+schmidtbri@users.noreply.github.com",
46 |     packages=["model_task_queue"],
47 |     python_requires=">=3.5",
48 |     install_requires=["iris-model@git+https://github.com/schmidtbri/ml-model-abc-improvements#egg=iris_model@master",
49 |                       "celery[redis,librabbitmq]==4.3.0"],
50 |     package_data=package_data
51 | )
52 | 


--------------------------------------------------------------------------------
/tests/test_model_task.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from model_task_queue.ml_model_task import MLModelPredictionTask
 4 | 
 5 | 
 6 | # creating a mockup class to test with
 7 | class SomeClass(object):
 8 |     pass
 9 | 
10 | 
11 | class ModelManagerTests(unittest.TestCase):
12 | 
13 |     def test1(self):
14 |         """ testing the MLModelPredictionTask class with good data """
15 |         # arrange
16 |         model_task = MLModelPredictionTask(module_name="iris_model.iris_predict", class_name="IrisModel")
17 | 
18 |         # act
19 |         exception_raised = False
20 |         result = None
21 |         try:
22 |             result = model_task.run(data={"sepal_length": 4.4, "sepal_width": 2.9, "petal_length": 1.4, "petal_width": 0.2})
23 |         except Exception as e:
24 |             exception_raised = True
25 | 
26 |         # assert
27 |         self.assertFalse(exception_raised)
28 |         self.assertTrue(type(result) is dict)
29 | 
30 |     def test2(self):
31 |         """ testing the MLModelPredictionTask class with data with incorrect schema """
32 |         # arrange
33 |         model_task = MLModelPredictionTask(module_name="iris_model.iris_predict", class_name="IrisModel")
34 | 
35 |         # act
36 |         exception_raised = False
37 |         result = None
38 |         try:
39 |             result = model_task.run(data={"sepal_length": 4.4, "sepal_width": 2.9, "petal_width": 0.2})
40 |         except Exception as e:
41 |             exception_raised = True
42 | 
43 |         # assert
44 |         self.assertTrue(exception_raised)
45 |         self.assertTrue(result is None)
46 | 
47 |     def test3(self):
48 |         """ testing that the MLModelPredictionTask only allows MLModel objects to be stored """
49 |         # arrange, act
50 |         # using the class defined at the top of this file to test
51 |         exception_raised = False
52 |         exception_message = ""
53 |         try:
54 |             model_task = MLModelPredictionTask(module_name="tests.test_model_task", class_name="SomeClass")
55 |         except Exception as e:
56 |             exception_raised = True
57 |             exception_message = str(e)
58 | 
59 |         # assert
60 |         self.assertTrue(exception_raised)
61 |         self.assertTrue(exception_message == "MLModelPredictionTask can only be used with subtypes of MLModel.")
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     unittest.main()
66 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | TEST_PATH=./tests
 2 | 
 3 | .DEFAULT_GOAL := help
 4 | 
 5 | .PHONY: help clean-pyc build clean-build deployment-package venv dependencies test-dependencies clean-venv test test-reports clean-test check-codestyle check-docstyle
 6 | 
 7 | help:
 8 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 9 | 
10 | clean-pyc: ## Remove python artifacts.
11 | 	find . -name '*.pyc' -exec rm -f {} +
12 | 	find . -name '*.pyo' -exec rm -f {} +
13 | 	find . -name '*~' -exec rm -f {} +
14 | 
15 | build: ## build a package
16 | 	python setup.py sdist bdist_wheel
17 | 
18 | clean-build:  ## clean build artifacts
19 | 	rm -rf build
20 | 	rm -rf dist
21 | 	rm -rf model_task_queue.egg-info
22 | 
23 | deployment-package:  ## makes a deployment package with all dependencies
24 | 	# installing all dependencies to the vendors directory
25 | 	mkdir vendors
26 | 	pip install --target vendors -r requirements.txt
27 | 	python setup.py sdist --create_deployment_package
28 | 	rm -rf vendors
29 | 	rm -rf build
30 | 	rm -rf model_task_queue.egg-info
31 | 
32 | venv: ## create virtual environment
33 | 	python3 -m venv venv
34 | 	venv/bin/pip install --upgrade pip
35 | 	venv/bin/pip install --upgrade setuptools
36 | 	venv/bin/pip install --upgrade wheel
37 | 
38 | dependencies: ## install dependencies from requirements.txt
39 | 	pip install -r requirements.txt
40 | 
41 | test-dependencies: ## install dependencies from test_requirements.txt
42 | 	pip install -r test_requirements.txt
43 | 
44 | clean-venv: ## remove all packages from virtual environment
45 | 	pip freeze | grep -v "^-e" | xargs pip uninstall -y
46 | 
47 | test: clean-pyc ## Run unit test suite.
48 | 	pytest --verbose --color=yes $(TEST_PATH)
49 | 
50 | test-reports: clean-pyc ## Run unit test suite with reporting
51 | 	mkdir -p reports
52 | 	python -m coverage run --source model_task_queue -m pytest --verbose --color=yes --junitxml=./reports/unit_tests.xml $(TEST_PATH)
53 | 	coverage xml -o ./reports/coverage.xml
54 | 	rm -rf .coverage
55 | 
56 | clean-test:	## Remove test artifacts
57 | 	rm -rf .pytest_cache
58 | 	rm -rf .coverage
59 | 	rm -rf reports
60 | 
61 | check-codestyle:  ## checks the style of the code against PEP8
62 | 	pycodestyle model_task_queue --max-line-length=120
63 | 
64 | check-docstyle:  ## checks the style of the docstrings against PEP257
65 | 	pydocstyle model_task_queue
66 | 


--------------------------------------------------------------------------------
/blog_post/post.md:
--------------------------------------------------------------------------------
  1 | Title: A Task Queue ML Model Deployment
  2 | Date: 2019-10-24 09:24
  3 | Category: Blog
  4 | Slug: task-queue-ml-model-deployment
  5 | Authors: Brian Schmidt
  6 | Summary: When building software, we may come across situations in which we want to execute a long-running operation behind the scenes while keeping the main execution path of the code running. This is useful when the software needs to remain responsive to a user, and the long running operation would get in the way. These types of operations often involve contacting another service over the network or writing data to IO. For example, when a web service needs to send an email, often the best way to do it is to launch a task in the background that will actually send the email, and return a response to the client immediately.
  7 | 
  8 | This blog post builds on the ideas started in
  9 | [three]({filename}/articles/a-simple-ml-model-base-class/post.md)
 10 | [previous]({filename}/articles/improving-the-mlmodel-base-class/post.md)
 11 | [blog posts]({filename}/articles/using-ml-model-abc/post.md).
 12 | 
 13 | The code in this blog post can be found in this [github repo](https://github.com/schmidtbri/task-queue-ml-model-deployment).
 14 | 
 15 | # Introduction
 16 | 
 17 | When building software, we may come across situations in which we want
 18 | to execute a long-running operation behind the scenes while keeping the
 19 | main execution path of the code running. This is useful when the
 20 | software needs to remain responsive to a user, and the long running
 21 | operation would get in the way. These types of operations often involve
 22 | contacting another service over the network or writing data to IO. For
 23 | example, when a web service needs to send an email, often the best way
 24 | to do it is to launch a task in the background that will actually send
 25 | the email, and return a response to the client immediately.
 26 | 
 27 | These types of tasks are often handled in a task queue, which can also
 28 | be called a [job queue](https://en.wikipedia.org/wiki/Job_queue). A task
 29 | queue is a service that receives requests to perform tasks, and handles
 30 | finding the resources necessary for the task, and scheduling the task.
 31 | It can also store the results of the tasks for later retrieval. Tasks
 32 | usually execute asynchronously, which means that the client does not
 33 | wait for the result of the task, but synchronous execution can also be
 34 | supported.
 35 | 
 36 | A task queue can also execute tasks on many different physical
 37 | computers, which makes it a distributed system. To handle communication
 38 | between many machines, a task queue often makes use of a [message
 39 | broker](https://en.wikipedia.org/wiki/Message_broker)
 40 | service to handle message passing between the worker processes that
 41 | execute the tasks and the clients of the tasks. A message broker service
 42 | acts as a middle man, receiving, storing, routing, and sending messages
 43 | between many different services. A message router service is an
 44 | implementation of the
 45 | [publish-subscribe](https://en.wikipedia.org/wiki/Publish%E2%80%93subscribe_pattern)
 46 | pattern. The benefits of using this pattern is that the services that
 47 | communicate over the message broker remain decoupled from each other.
 48 | 
 49 | A task queue can be useful for machine learning model deployments, since
 50 | a machine learning model may take some time to make a prediction and
 51 | return a result. Most often, the ML prediction algorithm itself is
 52 | CPU-bound, which means that it is limited by the availability of CPU
 53 | time. This means that a task queue is usually not necessary for the
 54 | deployment of the ML model itself, but for dealing with the loading of
 55 | data that the prediction algorithm may need to make a prediction which
 56 | is an IO-bound process. Another situation in which a task queue may be
 57 | useful is when we need to make thousands of predictions and return them
 58 | as a result; in this case it would be useful to launch an asynchronous
 59 | task that will take care of the predictions behind the scenes and then
 60 | come back later to access the results.
 61 | 
 62 | # Task Queueing With Celery
 63 | 
 64 | Celery is a python package that handles most of the complexity of
 65 | distributing and executing tasks across different processes. Celery is
 66 | able to use many different types of message brokers to distribute tasks,
 67 | for this blog post we'll use the Redis message broker. In order to
 68 | access task results, Celery supports several kinds of result storage
 69 | backends, for this blog we'll also use Redis to store the prediction
 70 | results of the model. As in previous blog posts, we'll be deploying the
 71 | iris\_model package, which was developed as an example and has now been
 72 | deployed several times.
 73 | 
 74 | Since we are now dealing with more than one service and we are
 75 | communicating data between several different processes over a network,
 76 | it's useful to visualize the activity of the task queue with a software
 77 | architecture diagram:
 78 | 
 79 | ![Software Architecture]({attach}software_architecture.png){ width=100% }
 80 | 
 81 | The client application installs the Celery application package and sends
 82 | task requests through the tasks that are defined in it, whenever a task
 83 | needs to be executed, it sends a message to the task broker with any
 84 | parameters that the task needs to execute. The message broker receives
 85 | messages and holds them until they are picked up by the worker
 86 | processes. The workers are running the Celery application and pick up
 87 | messages from the message broker, when a task is completed, they store
 88 | the results to the result storage backend.
 89 | 
 90 | # Package Structure
 91 | 
 92 | To begin, I set up the project structure for the application package:
 93 | 
 94 | ```
 95 | - model_task_queue ( python package for task queue app )
 96 |     - __init__.py
 97 |     - __main__.py ( command line entry point )
 98 |     - celery.py ( celery application )
 99 |     - config.py
100 |     - ml_model_task.py ( task class )
101 | - scripts
102 |     - simple_test.py ( single prediction test )
103 |     - continuous_test.py ( multiple prediction test )
104 | - tests ( unit tests )
105 | - Makefle
106 | - README.md
107 | - requirements.txt
108 | - test_requirements.txt
109 | - setup.py
110 | 
111 | ```
112 | 
113 | This structure can be seen here in the [github
114 | repository](https://github.com/schmidtbri/task-queue-ml-model-deployment).
115 | 
116 | # Model Async Task
117 | 
118 | Creating an asynchronous task with the Celery package is simple, it's as
119 | easy as putting a function decorator on a function. An example of how to
120 | do this can be found in the [Celery startup
121 | guide](https://docs.celeryproject.org/en/latest/getting-started/first-steps-with-celery.html#application).
122 | The function decorator allows the client application to call the
123 | function just like a local function, while having the actual execution
124 | of the code happen asynchronously in a worker process running in a
125 | different computer. In the client code, the function acts as a facade
126 | that hides the complexities of parameter serialization/deserialization,
127 | network communication and other complexities of the distributed nature
128 | of the task queue.
129 | 
130 | The function decorator is a simple way to get started with Celery tasks,
131 | but we have some special requirements that make it hard to create Celery
132 | tasks this way. For example, Celery task functions don't maintain state
133 | between requests. If we had to instantiate an MLModel object for every
134 | task request, the model parameters would have to be loaded and
135 | deserialized over and over for each request. To get around this
136 | limitation we'll have to code the ML model async task in such a way that
137 | it can maintain an instance of an MLModel object in memory between
138 | requests. A way to do this can be found in the Celery documentation
139 | [here](https://docs.celeryproject.org/en/latest/userguide/tasks.html#custom-task-classes).
140 | 
141 | Following the example in the documentation, we'll define a class that
142 | inherits from the celery.Task base class:
143 | 
144 | ```python
145 | from celery import Task
146 | 
147 | class MLModelPredictionTask(Task):
148 |     """Celery Task for making ML Model predictions."""
149 | ```
150 | 
151 | The code above can be found
152 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/ml_model_task.py#L3-L9).
153 | 
154 | Now we'll define the task class' \_\_init\_\_ method:
155 | 
156 | ```python
157 | def __init__(self, module_name, class_name):
158 |     """Class constructor."""
159 |     super().__init__()
160 |     self._model = None
161 |     
162 |     model_module = importlib.import_module(module_name)
163 |     model_class = getattr(model_module, class_name)
164 |     
165 |     if issubclass(model_class, MLModel) is False:
166 |         raise ValueEror("MLModelPredictionTask can only be used with subtypes of MLModel.")
167 |     
168 |     # saving the reference to the class to avoid having to import it again
169 |     self._model_class = model_class
170 |     
171 |     # adding a name to the task object
172 |     self.name = "{}.{}".format(__name__, model_class.qualified_name)
173 | ```
174 | 
175 | The code above can be found
176 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/ml_model_task.py#L11-L26).
177 | 
178 | The \_\_init\_\_() method accepts two parameters: the name of the module
179 | where we can find the MLModel-derived class, and the name of the class
180 | in that module that implements the prediction functionality. The
181 | \_\_init\_\_() method then calls the \_\_init\_\_() method of the Celery
182 | Task base class to make sure that all of the required initialization
183 | code is executed correctly. Then the "\_model" property is set to None
184 | (for now). After this, we dynamically import the MLModel class from the
185 | environment, and check that it is a subclass of MLModel. Next, we save a
186 | reference to the class in the "\_model\_class" property of the new task
187 | object but we do not instantiate the model class itself, the reason for
188 | this is explained below. Lastly, we set a unique name for the Celery
189 | task based on the name of the MLModelPredictionTask class' module and
190 | the qualified name of the MLModel class that is being hosted inside of
191 | this instance of the MLModelPredictionTask class. The name of the task
192 | is set dynamically so that we are able to host many different models
193 | within the same celery application, while guaranteeing that the tasks
194 | will have unique names.
195 | 
196 | Next, we have the initialize() method is responsible for instantiating
197 | the model class, and saving the reference as a property of the
198 | MLModelPredictionTask object:
199 | 
200 | ```python
201 | def initialize(self):
202 |     model_object = self._model_class()
203 |     self._model = model_object
204 | ```
205 | 
206 | The code above can be found
207 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/ml_model_task.py#L28-L31).
208 | 
209 | Lastly, the run() method is responsible for doing the work of the async
210 | task:
211 | 
212 | ```python
213 | def run(self, data):
214 |     if self._model is None:
215 |         self.initialize()
216 |         return self._model.predict(data=data)
217 | ```
218 | 
219 | The code above can be found
220 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/ml_model_task.py#L33-L37).
221 | 
222 | The run() method checks if the model class is instantiated before it
223 | attempts to make a prediction. If it is not instantiated, it calls the
224 | initialize() method to create the model object before making a
225 | prediction with it. The run() method is the one that defines the actual
226 | functionality of the Celery task.
227 | 
228 | In
229 | [previous]({filename}/articles/using-ml-model-abc/post.md)
230 | [blog
231 | posts]({filename}/articles/etl-job-ml-model-deployment/post.md),
232 | the instantiation of the model class happens in the \_\_init\_\_()
233 | method of the class that is managing the model object. After this, we
234 | can use the model class to make a prediction. We have to take a
235 | different approach in this application because we need to keep the model
236 | class from being instantiated in the client application that is using
237 | the asynchronous task. This happens because the client application
238 | instantiates and manages an instance of the task class in its own
239 | process space, and uses it to communicate with the worker processes that
240 | are actually doing the work. To keep the model class from being
241 | instantiated in the client application, the run() method is actually
242 | responsible for initializing the model class instead of the
243 | \_\_init\_\_() method. The only downside to this approach is that when
244 | the worker process instantiates the task class, it will not have an
245 | instance of the model class in memory, it will only be created the first
246 | time that a prediction is made.
247 | 
248 | # Celery Application
249 | 
250 | Now that we have a Celery task that can host an MLModel-based class, we
251 | can start building a Celery application that hosts the tasks. To do
252 | this, we first have to instantiate a task registry to hold the
253 | instantiated tasks:
254 | 
255 | First, we will install a machine learning model that will be hosted by
256 | the Celery application. For this we'll use the iris\_model package that
257 | I've already shown in
258 | [previous]({filename}/articles/etl-job-ml-model-deployment/post.md)
259 | [blog
260 | posts]({filename}/articles/using-ml-model-abc/post.md):
261 | 
262 | ```bash
263 | pip install git+https://github.com/schmidtbri/ml-model-abc-improvements#egg=iris_model
264 | ```
265 | 
266 | Then, we'll create a configuration class for the application:
267 | 
268 | ```python
269 | class Config(object):
270 |     """Configuration for all environments."""
271 |     models = [
272 |         {
273 |             "module_name": "iris_model.iris_predict",
274 |             "class_name": "IrisModel"
275 |         }
276 |     ]
277 | ```
278 | 
279 | The code above can be found
280 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/config.py#L4-L12).
281 | 
282 | The configuration class defines property called "models" that is a list
283 | of dictionaries, each dictionary containing two keys. The "module\_name"
284 | key points at a module that contains an MLModel-derived class, and the
285 | "class\_name" key contains the name of the class. By storing the
286 | locations of the classes in this way, adding a new MLModel class to the
287 | application is as simple as adding an entry to the list. The
288 | configuration above points at the IrisModel class that we just installed
289 | in the iris\_model package. This class is meant to hold configuration
290 | that is shared by all of the environments.
291 | 
292 | In the same file we also store configuration for different environments,
293 | here is the configuration class for the production environment:
294 | 
295 | ```python
296 | class ProdConfig(Config):
297 |     """Configuration for the prod environment."""
298 |     broker_url = 'redis://localhost:6379/0'
299 |     result_backend = 'redis://localhost:6379/0'
300 | ```
301 | 
302 | The code above can be found
303 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/config.py#L15-L19).
304 | 
305 | The configuration is pointing at a redis service on the localhost for
306 | now. Now that we have configuration taken care of, we can start building
307 | the Celery application. To do this we start by instantiating a task
308 | registry:
309 | 
310 | ```python
311 | registry = TaskRegistry()
312 | ```
313 | 
314 | The code above can be found
315 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/celery.py#L14).
316 | 
317 | Next, we add tasks to the task registry:
318 | 
319 | ```python
320 | for model in Config.models:
321 |     registry.register(MLModelPredictionTask(module_name=model["module_name"], class_name=model["class_name"]))
322 | ```
323 | 
324 | The code above can be found
325 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/celery.py#L17-L19).
326 | 
327 | The loop iterates through the list of models in the configuration,
328 | instantiates a MLModelPredictionTask for each model, and registers the
329 | new task with the task registry object we defined above.
330 | 
331 | Celery tasks are usually automatically registered in a task registry as
332 | soon as they are instantiated, but we have a special situation because
333 | of the dynamic and configuration-driven nature of the Celery
334 | application. The manual registration of the task shown above is needed
335 | because we don't know how many tasks we will be hosting in the
336 | application, we only know this once the application starts up and reads
337 | the configuration.
338 | 
339 | Now that we have a task registry with tasks in it, we can create the
340 | Celery application object:
341 | 
342 | ```python
343 | app = Celery(__name__, tasks=registry)
344 | ```
345 | 
346 | The code above can be found
347 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/celery.py#L22-L23).
348 | 
349 | The name of the application is pulled from the module name, and the
350 | tasks parameter is the task registry object we defined above.
351 | 
352 | Lastly, we need to point the Celery application to a broker and result
353 | backend so that the clients and workers can communicate. These settings
354 | are loaded from the configuration classes we've already defined:
355 | 
356 | ```python
357 | app.config_from_object("model_task_queue.config.{}".format(os.environ['APP_SETTINGS']))
358 | ```
359 | 
360 | The code above can be found
361 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/model_task_queue/celery.py#L26).
362 | 
363 | The name of the environment is loaded from an environment variable
364 | called "APP\_SETTINGS". The environment variable is then used to load
365 | the correct configuration object from the config.py file.
366 | 
367 | # Using the Task
368 | 
369 | To use the iris\_model task in the Celery application we just built,
370 | we'll need to start up an instance of redis to serve as the message
371 | broker and storage backend for the task queue. To do this, we can use a
372 | docker image with this command:
373 | 
374 | ```bash
375 | docker run -d -p 6379:6379 redis
376 | ```
377 | 
378 | Now that we have a redis instance to communicate with, we can start a
379 | Celery worker process:
380 | 
381 | ```bash
382 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
383 | export APP_SETTINGS=ProdConfig
384 | export PYTHONPATH=./
385 | python3 -m model_task_queue --loglevel INFO
386 | ```
387 | 
388 | The OBJC\_DISABLE\_INITIALIZE\_FORK\_SAFETY environment variable is
389 | needed in MacOS to allow Celery to fork processes when handling task
390 | execution. The APP\_SETTINGS environment variable is needed so that the
391 | Celery application will load the right configuration. The PYTHONPATH
392 | environment allows the Python interpreter to find the dependencies of
393 | the Celery application. The last command start the Celery worker process
394 | by calling the script in the \_\_main\_\_.py module.
395 | 
396 | Next, we can try out the task itself in a python interactive session:
397 | 
398 | ```python
399 | >>> import os
400 | >>> os.environ["APP_SETTINGS"] = "ProdConfig"
401 | >>> from model_task_queue.celery import app
402 | >>> task = app.tasks["model_task_queue.ml_model_task.iris_model"]
403 | >>> task.__dict__
404 | {'_model': None, '_model_class': <class
405 | 'iris_model.iris_predict.IrisModel'>, 'name':
406 | 'model_task_queue.ml_model_task.iris_model', '_exec_options':
407 | {'queue': None, 'routing_key': None, 'exchange': None,
408 | 'priority': None, 'expires': None, 'serializer': 'json',
409 | 'delivery_mode': None, 'compression': None, 'time_limit': None,
410 | 'soft_time_limit': None, 'immediate': None, 'mandatory': None,
411 | 'ignore_result': False}}
412 | ```
413 | 
414 | When using the celery task, we first need to instantiate the Celery
415 | application object that is hosting the task. This happens when we import
416 | the model\_task\_queue.celery module. Once we have the application
417 | object, we can query the app.tasks dictionary for the model task we are
418 | interested in. The name of the task is dynamically generated from the
419 | qualified name of the model that it is hosting.
420 | 
421 | As can be seen above, when the task is first instantiated, it does not
422 | have an object reference in the \_model property. This is as we
423 | intended, since we are using the Celery application as a client and we
424 | don't want the task to instantiate the model class which would cause the
425 | model to be deserialized in the client process.
426 | 
427 | Now that we have an instance of the task, we can try to execute it:
428 | 
429 | ```python
430 | >>> result = task.delay(data={ "sepal_length": 5.0, "sepal_width": 3.2, "petal_length": 1.2, "petal_width": 0.2})
431 | >>> result.ready()
432 | True
433 | >>> result.get()
434 | {'species': 'setosa'}
435 | ```
436 | 
437 | We use the task.delay() method to call the task asynchronously, getting
438 | back a result object that can be used to get a result once the task is
439 | completed. The ready() method of the result can be used to check on the
440 | status of the result of the task. Once it is completed, the result can be
441 | retrieved from the result backend with the get() method.
442 | 
443 | If the task throws an exception, the result will also throw an exception
444 | when it is accessed:
445 | 
446 | ```python
447 | >>> result = task.delay(data={ "sepal_length": 5.0, "sepal_width": 3.2, "petal_length": 1.2, "petal_width": "asdfg"})
448 | >>> result.ready()
449 | True
450 | >>> result.get()
451 | Traceback (most recent call last):
452 | ...
453 | ml_model_abc.MLModelSchemaValidationException: Failed to validate input data: Key 'petal_width' error: asdfg should be instance of 'float'
454 | ```
455 | 
456 | Because the "petal\_width" field contains data that does not meet the
457 | schema of the iris model, the model threw an exception of type
458 | MLModelSchemaValidationException. The exception was caught by the celery
459 | worker, serialized, and transported back to the client.
460 | 
461 | # Test Script
462 | 
463 | In order to test the Celery application, we'll code a script that will
464 | make use of the iris\_model task asynchronously. To use the application,
465 | we import the Celery application from the module where it is defined:
466 | 
467 | ```python
468 | from model_task_queue.celery import app
469 | ```
470 | 
471 | The code above can be found
472 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/scripts/concurrent_test.py#L4).
473 | 
474 | Next, we'll define a function that start a task, wait for it to
475 | complete, and return the prediction result:
476 | 
477 | ```python
478 | def request_task(data):
479 |     task = app.tasks["model_task_queue.ml_model_task.iris_model"]
480 |     result = task.delay(data=data)
481 |     
482 |     # waiting for the task to complete
483 |     while result.ready() is not True:
484 |         time.sleep(1)
485 |         prediction = result.get(timeout=1)
486 |     return prediction
487 | ```
488 | 
489 | The code above can be found
490 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/scripts/concurrent_test.py#L7-L17).
491 | 
492 | Lastly, we'll define a function that uses the function above to test the
493 | iris\_model task concurrently:
494 | 
495 | ```python
496 | def run_test():
497 |     data = [
498 |             {"sepal_length": 5.0, "sepal_width": 3.2, "petal_length": 1.2, "petal_width": 0.2},
499 |             {"sepal_length": 5.5, "sepal_width": 3.5, "petal_length": 1.3, "petal_width": 0.2},
500 |             {"sepal_length": 4.9, "sepal_width": 3.1, "petal_length": 1.5, "petal_width": 0.1},
501 |             {"sepal_length": 4.4, "sepal_width": 3.0, "petal_length": 1.3, "petal_width": 0.2}
502 |         ]
503 |     with Executor(max_workers=4) as exe:
504 |         jobs = [exe.submit(request_task, d) for d in data]
505 |         results = [job.result() for job in jobs]
506 |         print("The tasks returned these predictions: {}\".format(results))
507 | ```
508 | 
509 | The code above can be found
510 | [here](https://github.com/schmidtbri/task-queue-ml-model-deployment/blob/master/scripts/concurrent_test.py#L20-L30).
511 | 
512 | The function sets up a few inputs for the model in the data list. It
513 | then calls the task concurrently using the ThreadPoolExecutor context
514 | manager from the concurrent Python package. The context manager executes
515 | the request_task() function concurrently in four worker processes.
516 | 
517 | To run the script, we'll need the redis docker image and the worker
518 | process to be running. The script above can be executed from the command
519 | line by using these commands:
520 | 
521 | ```bash
522 | export PYTHONPATH=./
523 | export APP_SETTINGS=ProdConfig
524 | python3 scripts/concurrent_test.py
525 | ```
526 | 
527 | # Closing
528 | 
529 | In this blog post I showed how to build a task queue application that is
530 | able to host machine learning models. A task queue is very useful in
531 | certain situations for deploying ml models because of capabilities that
532 | it brings to the table.Task queues allow applications to do work
533 | asynchronously behind the scenes without having the main application
534 | being affected.
535 | 
536 | The ML model deployment strategy I showed in this blog post works in the
537 | same way as the
538 | [previous]({filename}/articles/using-ml-model-abc/post.md)
539 | [blog
540 | posts]({filename}/articles/etl-job-ml-model-deployment/post.md)
541 | I've published. The Celery application I built does not work with only
542 | one ML model, it works with any ML model that uses the MLModel base
543 | class. The application is also able to host any number of models, and
544 | they are loaded from configuration which means that a new model can be
545 | added to the Celery application without modifying the code. By following
546 | good software engineering design practices, we are able to easily put
547 | machine learning models into production without having to worry about
548 | the implementation details of the models. All of these capabilities stem
549 | from the design of the [MLModel base
550 | class]({filename}/articles/a-simple-ml-model-base-class/post.md).
551 | 
552 | Another interesting feature of the Celery package is that we can launch
553 | tasks from a variety of different languages. There are client libraries
554 | for [node.js](https://github.com/mher/node-celery) and
555 | [PHP](https://github.com/gjedeer/celery-php). This
556 | flexibility makes it possible to use Python for building and deploying
557 | ML models, and to use other languages for the work that is best suited
558 | for them.
559 | 
560 | A drawback of this approach is that when the Celery application is built
561 | and deployed, the dependencies of the machine learning models that it is
562 | hosting are installed along with it. This means that if two models
563 | depend on different versions of scikit-learn or pandas, for example,
564 | they won't be able to be installed in the same Celery application. This
565 | limits the usefulness of the Celery application somewhat, since it can't
566 | host models together that have conflicting requirements.
567 | 
568 | Another drawback of this approach is the extra complexity that it
569 | entails, since it requires message broker service, a result storage
570 | service, and the worker processes to be running for the task queue to be
571 | available to client applications. All of these requirements add extra
572 | complexity to this deployment option.
573 | 
574 | The Celery application I built is only able to deal with single
575 | prediction requests. Even though this is useful it would make more sense
576 | for the Celery application to be used to run longer prediction jobs that
577 | make thousands of predictions at a time. An improvement that can be made
578 | to the task is to be able to launch prediction tasks that take large
579 | files as input, feed the individual records in the file as inputs to the
580 | model, and store the resulting predictions back into a storage service.
581 | The long-running task can also be instrumented to report its progress
582 | back to the client that requested the predictions.
583 | 


--------------------------------------------------------------------------------