├── .github
    ├── FUNDING.yml
    ├── opencollective.yml
    └── workflows
    │   └── codeql.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── constants.py
├── deploy
    ├── cr.yaml
    ├── crd.yaml
    ├── operator.yaml
    └── rbac.yaml
├── deployment_utils.py
├── docs
    ├── _config.yml
    ├── architecture.md
    └── index.md
├── example
    ├── Dockerfile
    ├── app.py
    └── run_task.py
├── handlers.py
├── models
    └── worker_spec.py
├── requirements.txt
├── templates
    ├── config_maps
    │   └── config_map.yaml
    ├── deployments
    │   ├── celery_worker_deployment.yaml
    │   └── flower_deployment.yaml
    ├── services
    │   └── flower_service.yaml
    └── static
    │   ├── celery-worker-static-deployment.yaml
    │   ├── flask-example.yaml
    │   └── redis-master.yaml
└── update_utils.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | open_collective: celery
2 | 
3 | 


--------------------------------------------------------------------------------
/.github/opencollective.yml:
--------------------------------------------------------------------------------
 1 | # Collective
 2 | # ----------
 3 | 
 4 | # Define the Open Collective you want to link the repository with.
 5 | # Please use the Open Collective slug not the Open Collective URL.
 6 | 
 7 | collective: celery
 8 | 
 9 | # Examples
10 | 
11 | # collective: webpack
12 | 
13 | # Invalid examples
14 | 
15 | # collective: https://opencollective.com/webpack
16 | # collective: opencollective.com/webpack
17 | # collective: https://opencollective.com/webpack/contribute/backer-266
18 | 
19 | # Invitation
20 | # ----------
21 | 
22 | # Post a comment on issues created by non contributors.
23 | 
24 | invitation: |-
25 |   Hey <author> :wave:,
26 | 
27 |   Thank you for opening an issue. We will get back to you as soon as we can. Have you seen our Open Collective page? Please consider contributing financially to our project. This will help us involve more contributors and get to issues like yours faster.
28 | 
29 |   <link>
30 | 
31 |   > We offer `priority` support for all financial contributors. Don't forget to add `priority` label once you become one! :smile:
32 | 
33 | # To disable that feature, you can use the following syntax
34 | 
35 | # invitation: false
36 | 
37 | # Tiers
38 | # -----
39 | 
40 | # In this section you can define specific behavior for all supported tiers, ie: "backer" "and sponsor".
41 | 
42 | tiers:
43 |   # # Uncomment this block if you want to activate a custom comment for people contributors to the "backer" tier
44 |   # - tiers: ['backer']
45 |   #   labels: ['priority']
46 |   #   message: |-
47 |   #     Hey <author> :wave:,
48 |   #
49 |   #     Thank you for being a "backer". We will handle your issue with priority support. To make sure we don't forget how special you are, we added a `priority` label to your issue.
50 |   #
51 |   #     Thank you again for contributing :tada:!
52 | 
53 |   # # Uncomment this block if you want to activate a custom commentfor people contributors to the "sponsor" tier
54 |   # - tiers: ['sponsor']
55 |   #   labels: ['priority']
56 |   #   message: |-
57 |   #     Hey <author> :wave:,
58 |   #
59 |   #     Thank you for being a "sponsor".  We will handle your issue with priority support. To make sure we don't forget how special you are, we added a `priority` label to your issue.
60 |   #
61 |   #     Thank you again for contributing :tada:!
62 | 
63 |   # This rule will be catching any contributor whatever their tiers (also custom contributions)
64 |   - tiers: '*'
65 |     labels: ['priority']
66 |     message: |-
67 |       Hey <author> :wave:,
68 | 
69 |       Thank you for contributing to our project financially. We will handle your issue with priority support. To make sure we don't forget how special you are, we added a `priority` label to your issue.
70 | 
71 |       Thank you again for contributing :tada:!
72 | 
73 | # To disable that feature, you can use the following syntax
74 | 
75 | # tiers: []
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 | 
 9 | jobs:
10 |   analyze:
11 |     name: Analyze
12 |     runs-on: ubuntu-latest
13 |     permissions:
14 |       actions: read
15 |       contents: read
16 |       security-events: write
17 | 
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         language: [ python ]
22 | 
23 |     steps:
24 |       - name: Checkout
25 |         uses: actions/checkout@v3
26 | 
27 |       - name: Initialize CodeQL
28 |         uses: github/codeql-action/init@v2
29 |         with:
30 |           languages: ${{ matrix.language }}
31 |           queries: +security-and-quality
32 | 
33 |       - name: Autobuild
34 |         uses: github/codeql-action/autobuild@v2
35 | 
36 |       - name: Perform CodeQL Analysis
37 |         uses: github/codeql-action/analyze@v2
38 |         with:
39 |           category: "/language:${{ matrix.language }}"
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /usr/src/
 4 | 
 5 | COPY requirements.txt ./
 6 | RUN pip install --no-cache-dir -r requirements.txt
 7 | 
 8 | COPY . .
 9 | 
10 | CMD kopf run handlers.py --verbose


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2020 Gautam Prajapati
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Celery-Kubernetes-Operator(WIP)
 2 | A basic Celery operator written in Python. To be used to manage Celery applications on a Kubernetes cluster. It started as a demo project to proposed EuroPython 2020 [proposal](https://ep2020.europython.eu/talks/BbvZjFa-advanced-infrastructure-management-in-kubernetes-using-python/). 
 3 | It'll is currently being worked on to be a production ready project with Celery organization. [Here's](https://github.com/celery/ceps/pull/29) the merged CEP for the same.
 4 | 
 5 | Please report an issue for improvement suggestions/feedback. This operator is being written with the help of [KOPF](https://github.com/nolar/kopf) framework.
 6 | 
 7 | # Project Scope
 8 | The general idea is to bridge the gap between infrastructure and application developers where application developers can just spec out a simple celery deployment yaml and have to do nothing more than `kubectl apply -f <file_name>`to spin up their own celery cluster.
 9 | 
10 | It aims to have following things in place-
11 | 1. A Custom Resource Definition(CRD) to spec out a Celery and Flower deployment having all the options that they support.
12 | 2. A custom controller implementation that registers and manages self-healing capabilities of custom Celery object for these operations
13 |     - CREATE - Creates the worker and flower deployments along with exposing a native Service object for Flower
14 |     - UPDATE - Reads the CRD modifications and updates the running deployments using specified strategy
15 |     - DELETE - Deletes the custom resource and all the child deployments
16 | 3. Keep a watch on important metrics(queue length, cpu, memory etc) using flower(and native K8s solutions) to autoscale/downscale the number of workers on specified constraints.
17 | 
18 | # Instructions to see this operator in action
19 | 
20 | 1. Install [minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/). This project is developed and tested only on minikube as of now.
21 | 2. Run `minikube start --driver=hyperkit` to start minikube. It is going to download VM boot image and setup a single node kubernetes cluster for you.
22 | 
23 | We need to build our docker images for celery app and the operator. They are going to be used in creating worker, flower and operator deployments.
24 | 
25 | 3. Switch to minikube docker daemon using `eval $(minikube -p minikube docker-env)` in your shell. This is important so that minikube doesn't try to pull images from remote registry. We're going to use our locally built images for the demo. Not using any remote registry. Image pull policy in all deployment spec is set to `Never` right now
26 | 4. For building operator image, run `docker build -t celery-operator .`
27 | 5. For building celery-flask example application image, run `docker build -t example-image -f example/Dockerfile .`
28 | 6. Apply celery CRD using `kubectl apply -f deploy/crd.yaml`. It'll enable Kubernetes to understand the custom resource named Celery
29 | 7. Create the custom resource(CR) using `kubectl apply -f deploy/cr.yaml`. It'll create the Celery resource for you.
30 | 8. Apply `kubectl apply -f deploy/rbac.yaml` to give operator necessary permissions to watch, create and modify resources on minikube K8s cluster
31 | 9. We need to setup a redis deployment in the cluster before deploying the operator. As soon as worker and flower deployment come up, they'd need a broker ready to connect to. We're using redis as broker for the demo. A deployment and service for redis can be created using `kubectl apply -f templates/static/redis-master.yaml`. 
32 | 10. Now to take action on newly created CR, we are going to deploy operator. Apply `kubectl apply -f deploy/operator.yaml` to setup the operator deployment. 
33 | 
34 | As soon as pod for the operator comes up, it notices the Celery resource and handles the creation event to setup worker deployments flower deployment and exposing Flower svc as a NodePort type to be accessed from outside the cluster. 
35 | You could do `minikube service celery-crd-example-flower --url` to get the url, open it in web browser to see if the newly created workers are in healthy state or not.
36 | 
37 | To see the custom autoscaling in action, we need to create another deployment - A flask application that keeps pushing messages in the redis broker continuously.
38 | 
39 | 11. Do `kubectl apply -f templates/static/flask-example.yaml` to run a flask example which is going to fill the queue with messages. As soon as each queue length goes beyond the average specified in CR, it'll trigger autoscaling of workers. If you delete the flask deployment(`kubectl delete -f templates/static/flask-example.yaml`), number of messages in queue will come down and hence decreasing the number of workers as well in the process.
40 | 
41 | # Directory Structure
42 | 
43 | # Inspiration
44 | 
45 | This project is inspired by proposal [Issue#24](https://github.com/celery/ceps/issues/24) in CEPS(Celery Enhancement Proposals) and @jmdacruz's POC [project](https://github.com/jmdacruz/celery-k8s-operator/)
46 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
 1 | # Native API Objects
 2 | DEPLOYMENT_KIND = 'Deployment'
 3 | SERVICE_KIND = 'Service'
 4 | 
 5 | # Celery Worker Constants
 6 | WORKER_TYPE = 'worker'
 7 | 
 8 | 
 9 | # Flower Constants
10 | FLOWER_TYPE = 'flower'
11 | 
12 | 
13 | # Hander Status
14 | STATUS_CREATED = 'CREATED'
15 | STATUS_SUCCESS = 'SUCCESS'
16 | STATUS_UPDATED = 'UPDATED'
17 | STATUS_PATCHED = 'PATCHED'
18 | 


--------------------------------------------------------------------------------
/deploy/cr.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: celeryproject.org/v1alpha1
 2 | kind: Celery
 3 | metadata:
 4 |   name: example-celery-obj
 5 | spec:
 6 |   common:
 7 |     appName: celery-crd-example
 8 |     celeryApp: 'app:celery_app'
 9 |     image: example-image
10 |   workerSpec:
11 |     numOfWorkers: 2
12 |     queues: celery # default queue name
13 |     logLevel: debug
14 |     concurrency: 2
15 |     resources:
16 |       requests:
17 |         cpu: "100m"
18 |         memory: "64Mi"
19 |       limits:
20 |         cpu: "200m"
21 |         memory: "128Mi"
22 |   flowerSpec:
23 |     replicas: 1
24 |     resources:
25 |       requests:
26 |         cpu: "100m"
27 |         memory: "64Mi"
28 |       limits:
29 |         cpu: "200m"
30 |         memory: "128Mi"
31 |   scaleTargetRef:
32 |   - kind: worker
33 |     minReplicas: 2
34 |     maxReplicas: 5
35 |     metrics:
36 |     - name: message_queue
37 |       target:
38 |         type: length
39 |         averageValue: 100


--------------------------------------------------------------------------------
/deploy/crd.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apiextensions.k8s.io/v1
  2 | kind: CustomResourceDefinition
  3 | metadata:
  4 |   name: celery.celeryproject.org
  5 | spec:
  6 |   scope: Namespaced
  7 |   group: celeryproject.org
  8 |   names:
  9 |     kind: Celery
 10 |     listKind: CeleryList
 11 |     plural: celery
 12 |     singular: celery
 13 |     shortNames:
 14 |       - cel
 15 |       - capp
 16 |   versions: 
 17 |     - name: v1alpha1
 18 |       served: true
 19 |       storage: true
 20 |       schema:
 21 |         openAPIV3Schema:
 22 |           type: object
 23 |           required: ["spec"]
 24 |           properties:
 25 |             spec:
 26 |               description: "spec defines the desired state and params for celery cluster"
 27 |               type: object
 28 |               properties:
 29 |                 common:
 30 |                   description: "common configuration parameters for all worker and flower deployments"
 31 |                   required: ["appName", "celeryApp", "image"]
 32 |                   type: object
 33 |                   properties:
 34 |                     image:
 35 |                       description: "container image name to run in the worker and flower deployments"
 36 |                       type: string
 37 |                     appName:
 38 |                       description: "app name for worker and flower deployments, will be suffixed accordingly"
 39 |                       type: string
 40 |                     celeryApp:
 41 |                       description: "celery app instance to use (e.g. module.celery_app_attr_name)"
 42 |                       type: string
 43 |                 workerSpec:
 44 |                   description: "worker deployment specific parameters"
 45 |                   type: object
 46 |                   properties:
 47 |                     numOfWorkers:
 48 |                       type: integer
 49 |                     queues:
 50 |                       type: string
 51 |                     logLevel:
 52 |                       type: string
 53 |                     concurrency:
 54 |                       type: integer
 55 |                     maxTasksPerChild:
 56 |                       type: integer
 57 |                     resources:
 58 |                       type: object
 59 |                       properties:
 60 |                         limits:
 61 |                           type: object
 62 |                           properties:
 63 |                             cpu:
 64 |                               type: string
 65 |                             memory:
 66 |                               type: string
 67 |                         requests:
 68 |                           type: object
 69 |                           properties:
 70 |                             cpu:
 71 |                               type: string
 72 |                             memory:
 73 |                               type: string
 74 |                   x-kubernetes-preserve-unknown-fields: true
 75 |                 flowerSpec:
 76 |                   description: "flower deployment specific parameters"
 77 |                   type: object
 78 |                   properties:
 79 |                     replicas:
 80 |                       type: integer
 81 |                     resources:
 82 |                       type: object
 83 |                       properties:
 84 |                         limits:
 85 |                           type: object
 86 |                           properties:
 87 |                             cpu:
 88 |                               type: string
 89 |                             memory:
 90 |                               type: string
 91 |                         requests:
 92 |                           type: object
 93 |                           properties:
 94 |                             cpu:
 95 |                               type: string
 96 |                             memory:
 97 |                               type: string
 98 |                   x-kubernetes-preserve-unknown-fields: true
 99 |                 scaleTargetRef:
100 |                   description: "auto scaling targets"
101 |                   type: array
102 |                   items:
103 |                     type: object
104 |                     properties:
105 |                       kind:
106 |                         description: "target of which kind (e.g worker, flower)"
107 |                         type: string
108 |                       minReplicas:
109 |                         description: "minimum number of replicas to keep"
110 |                         type: integer
111 |                       maxReplicas:
112 |                         description: "maximum number of replicas to keep"
113 |                         type: integer
114 |                       metrics:
115 |                         description: "specify metrics to scale/downscale the number of workers"
116 |                         type: array
117 |                         items:
118 |                           type: object
119 |                           properties:
120 |                             name:
121 |                               description: "name of metric. (e.g. message_queue)"
122 |                               type: string
123 |                             target:
124 |                               type: object
125 |                               properties:
126 |                                 type:
127 |                                   description: "target metric type. (e.g. length)"
128 |                                   type: string
129 |                                 averageValue:
130 |                                   description: "average metric value to maintain"
131 |                                   type: integer
132 |             status:
133 |               type: object
134 |               x-kubernetes-preserve-unknown-fields: true    
135 |       additionalPrinterColumns:
136 |         - name: Children
137 |           type: string
138 |           priority: 0
139 |           jsonPath: .status.create_fn.children_count
140 |           description: Number of children successfully created by handler
141 |         - name: Status
142 |           type: string
143 |           priority: 0
144 |           jsonPath: .status.create_fn.status
145 |           description: Status as returned from the handler
146 |         - name: Age
147 |           type: date
148 |           priority: 0
149 |           jsonPath: .metadata.creationTimestamp
150 |           description: Age of custom object


--------------------------------------------------------------------------------
/deploy/operator.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app: celery-operator
 6 |   name: celery-operator
 7 |   namespace: default
 8 | spec:
 9 |   minReadySeconds: 5
10 |   progressDeadlineSeconds: 600
11 |   replicas: 1
12 |   selector:
13 |     matchLabels:
14 |       app: celery-operator
15 |   strategy:
16 |     rollingUpdate:
17 |       maxSurge: 20%
18 |       maxUnavailable: 0%
19 |     type: RollingUpdate
20 |   template:
21 |     metadata:
22 |       creationTimestamp: null
23 |       labels:
24 |         app: celery-operator
25 |     spec:
26 |       serviceAccountName: celery-account
27 |       containers:
28 |       - name: celery-operator
29 |         image: celery-operator
30 |         imagePullPolicy: Never
31 |         resources:
32 |           requests:
33 |             cpu: "100m"
34 |             memory: "64Mi"
35 |           limits:
36 |             cpu: "200m"
37 |             memory: "128Mi"


--------------------------------------------------------------------------------
/deploy/rbac.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: ServiceAccount
 4 | metadata:
 5 |   namespace: default
 6 |   name: celery-account
 7 | ---
 8 | apiVersion: rbac.authorization.k8s.io/v1beta1
 9 | kind: ClusterRole
10 | metadata:
11 |   name: celery-role-cluster
12 | rules:
13 | 
14 |   # Framework: knowing which other operators are running (i.e. peering).
15 |   - apiGroups: [zalando.org]
16 |     resources: [clusterkopfpeerings]
17 |     verbs: [list, watch, patch, get]
18 |   - apiGroups: [apiextensions.k8s.io]
19 |     resources: [customresourcedefinitions]
20 |     verbs: [list, get]
21 | 
22 |   # Application: read-only access for watching cluster-wide.
23 |   - apiGroups: [celeryproject.org]
24 |     resources: [celery]
25 |     verbs: [list, watch]
26 | ---
27 | apiVersion: rbac.authorization.k8s.io/v1beta1
28 | kind: Role
29 | metadata:
30 |   namespace: default
31 |   name: celery-role-namespaced
32 | rules:
33 | 
34 |   # Framework: knowing which other operators are running (i.e. peering).
35 |   - apiGroups: [zalando.org]
36 |     resources: [kopfpeerings]
37 |     verbs: [list, watch, patch, get]
38 | 
39 |   # Framework: posting the events about the handlers progress/errors.
40 |   - apiGroups: [events.k8s.io]
41 |     resources: [events]
42 |     verbs: [create]
43 |   - apiGroups: [""]
44 |     resources: [events]
45 |     verbs: [create]
46 | 
47 |   # Application: watching & handling for the custom resource we declare.
48 |   - apiGroups: [celeryproject.org]
49 |     resources: [celery]
50 |     verbs: [list, watch, patch]
51 | 
52 |   # Application: other resources it produces and manipulates.
53 |   # Here, we create Jobs+PVCs+Pods, but we do not patch/update/delete them ever.
54 |   - apiGroups: [batch, extensions]
55 |     resources: [jobs]
56 |     verbs: [create]
57 |   - apiGroups: [""]
58 |     resources: [pods, services]
59 |     verbs: ['*']
60 |   - apiGroups: ["apps"]
61 |     resources: [deployments, replicasets]
62 |     verbs: ['*']
63 | ---
64 | apiVersion: rbac.authorization.k8s.io/v1beta1
65 | kind: ClusterRoleBinding
66 | metadata:
67 |   name: celery-rolebinding-cluster
68 | roleRef:
69 |   apiGroup: rbac.authorization.k8s.io
70 |   kind: ClusterRole
71 |   name: celery-role-cluster
72 | subjects:
73 |   - kind: ServiceAccount
74 |     name: celery-account
75 |     namespace: default
76 | ---
77 | apiVersion: rbac.authorization.k8s.io/v1beta1
78 | kind: RoleBinding
79 | metadata:
80 |   namespace: default
81 |   name: celery-rolebinding-namespaced
82 | roleRef:
83 |   apiGroup: rbac.authorization.k8s.io
84 |   kind: Role
85 |   name: celery-role-namespaced
86 | subjects:
87 |   - kind: ServiceAccount
88 |     name: celery-account


--------------------------------------------------------------------------------
/deployment_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import kopf
  3 | import yaml
  4 | 
  5 | 
  6 | def deploy_celery_workers(apps_api, namespace, spec, logger):
  7 |     path = os.path.join(
  8 |         os.path.dirname(__file__),
  9 |         'templates/deployments/celery_worker_deployment.yaml'
 10 |     )
 11 |     tmpl = open(path, 'rt').read()
 12 | 
 13 |     celery_config = spec['workerSpec']
 14 |     req_resources = celery_config['resources']['requests']
 15 |     lim_resources = celery_config['resources']['limits']
 16 | 
 17 |     text = tmpl.format(
 18 |         namespace=namespace,
 19 |         app_name=spec['common']['appName'],
 20 |         celery_app=spec['common']['celeryApp'],
 21 |         image=spec['common']['image'],
 22 |         num_of_workers=celery_config['numOfWorkers'],
 23 |         queues=celery_config['queues'],
 24 |         loglevel=celery_config['logLevel'],
 25 |         concurrency=celery_config['concurrency'],
 26 |         lim_cpu=lim_resources['cpu'],
 27 |         lim_mem=lim_resources['memory'],
 28 |         req_cpu=req_resources['cpu'],
 29 |         req_mem=req_resources['memory']
 30 |     )
 31 |     data = yaml.safe_load(text)
 32 |     mark_as_child(data)
 33 | 
 34 |     deployed_obj = apps_api.create_namespaced_deployment(
 35 |         namespace=namespace,
 36 |         body=data
 37 |     )
 38 | 
 39 |     logger.info(
 40 |         f"Deployment for celery workers successfully created with name: %s",
 41 |         deployed_obj.metadata.name
 42 |     )
 43 | 
 44 |     return deployed_obj
 45 | 
 46 | 
 47 | def deploy_flower(apps_api, namespace, spec, logger):
 48 |     path = os.path.join(
 49 |         os.path.dirname(__file__),
 50 |         'templates/deployments/flower_deployment.yaml'
 51 |     )
 52 |     tmpl = open(path, 'rt').read()
 53 | 
 54 |     flower_config = spec['flowerSpec']
 55 |     req_resources = flower_config['resources']['requests']
 56 |     lim_resources = flower_config['resources']['limits']
 57 |     text = tmpl.format(
 58 |         namespace=namespace,
 59 |         app_name=spec['common']['appName'],
 60 |         celery_app=spec['common']['celeryApp'],
 61 |         image=spec['common']['image'],
 62 |         replicas=flower_config['replicas'],
 63 |         lim_cpu=lim_resources['cpu'],
 64 |         lim_mem=lim_resources['memory'],
 65 |         req_cpu=req_resources['cpu'],
 66 |         req_mem=req_resources['memory']
 67 |     )
 68 |     data = yaml.safe_load(text)
 69 |     mark_as_child(data)
 70 | 
 71 |     deployed_obj = apps_api.create_namespaced_deployment(
 72 |         namespace=namespace,
 73 |         body=data
 74 |     )
 75 |     logger.info(
 76 |         f"Deployment for celery flower successfully created with name: %s",
 77 |         deployed_obj.metadata.name
 78 |     )
 79 | 
 80 |     return deployed_obj
 81 | 
 82 | 
 83 | def expose_flower_service(api, namespace, spec, logger):
 84 |     path = os.path.join(
 85 |         os.path.dirname(__file__),
 86 |         'templates/services/flower_service.yaml'
 87 |     )
 88 |     tmpl = open(path, 'rt').read()
 89 | 
 90 |     text = tmpl.format(
 91 |         namespace=namespace,
 92 |         app_name=spec['common']['appName']
 93 |     )
 94 |     data = yaml.safe_load(text)
 95 |     mark_as_child(data)
 96 | 
 97 |     svc_obj = api.create_namespaced_service(
 98 |         namespace=namespace,
 99 |         body=data
100 |     )
101 |     logger.info(
102 |         f"Flower service successfully created with name: %s",
103 |         svc_obj.metadata.name
104 |     )
105 |     return svc_obj
106 | 
107 | 
108 | def mark_as_child(data):
109 |     """
110 |         Marks the incoming data as child of celeryapplications
111 |     """
112 |     kopf.adopt(data)
113 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
  1 | ## Celery Kubernetes Operator - High Level Architecture
  2 | 
  3 | ### Overview
  4 | 
  5 | [Celery](https://docs.celeryproject.org/en/stable/) is a popular distributed task-queue system written in Python. To run Celery in production on Kubernetes, there are multiple manual steps involved like -
  6 | - Writing deployment spec for workers
  7 | - Setting up monitoring using [Flower](https://flower.readthedocs.io/en/latest/)
  8 | - Setting up Autoscaling
  9 | 
 10 | Apart from that, there's no consistent way to setup multiple clusters, everyone configures their own way which could create problems for infrastructure teams to manage and audit later.
 11 | This project attempts to solve(or automate) these issues. It is aiming to bridge the gap between application engineers and infrastructure operators who manually manage the celery clusters.
 12 | 
 13 | Moreover, since Celery is written in Python, we plan to use open source [KOPF](https://github.com/zalando-incubator/kopf)(Kubernetes Operator Pythonic Framework) to write the custom controller implementation.
 14 | 
 15 | ### Scope
 16 | 
 17 | 1. Provide a Custom Resource Definition(CRD) to spec out a Celery and Flower deployment having all the configuration options that they support.
 18 | 2. A custom controller implementation that registers and manages self-healing capabilities of custom Celery resource for these operations -
 19 |     + CREATE - Creates the worker and flower deployments along with exposing a native Service object for Flower
 20 |     + UPDATE - Reads the CRD modifications and updates the running deployments using specified strategy
 21 |     + DELETE - Deletes the custom resource and all the child deployments
 22 | 3. Support worker autoscaling/downscaling based on resource constraints(cpu, memory) and task queue length automatically.
 23 | 
 24 | Discussions involving other things that this operator should do based on your production use-case are welcome.
 25 | 
 26 | ### Diagram
 27 | 
 28 | ![CKO Arch Diagram](https://i.imgur.com/dTBuG58.png)
 29 | 
 30 | ### Workflow
 31 | 
 32 | End user starts by writing and creating a YAML spec for the desired celery cluster. Creation event is listened by the Creation Handler(KOPF based) which creates deployment for workers, flower and a Service object to expose flower UI to external users.
 33 | 
 34 | Assuming we have broker in place, any user facing application can start pushing messages to broker now and celery workers will start processing them.
 35 | 
 36 | User can update the custom resource, when that happens, updation handler listening to the event will patch the relevant deployments for change. Rollout strategy can be default or to be specified by user in the spec.
 37 | 
 38 | Both creation and updation handlers will return their statuses to be stored in parent resource's status field. Status field will contain the latest status of the cluster children at all times.
 39 | 
 40 | User can choose to setup autoscaling of workers by resource constraints(CPU, Memory) or broker queue length. Operator will automatically take care of creating an HPA or use KEDA based autoscaling(See [Autoscaling](#Autoscaling) section below) to make that happen.
 41 | 
 42 | ### Components
 43 | 
 44 | #### Worker Deployment
 45 | A Kubernetes [Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) to manage celery worker pods/replicaset. These workers consume the tasks from broker and process them.
 46 | 
 47 | #### Flower Deployment
 48 | A Kubernetes [Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) to manage flower pods/replicaset. Flower is de-facto standard to monitor and remote control celery.
 49 | 
 50 | #### Flower Service
 51 | Expose flower UI to an external IP through a Kubernetes [Service](https://kubernetes.io/docs/concepts/services-networking/service/) object. We should additionally explore [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) as well(TODO).
 52 | 
 53 | #### Celery CRD(Custom Resource Definition)
 54 | CRDs are a native way to extend Kubernetes APIs to recognize custom applications/objects. Celery CRD will contain the schema for celery cluster to be setup.
 55 | 
 56 | We plan to have following objects in place with their high level description -
 57 | - `common` - common configuration parameters for Celery cluster
 58 |     + `image` - Celery application image to be run
 59 |     + `imagePullPolicy` - [Always, Never, IfNotPresent]
 60 |     + `imagePullSecrets` - to pull the image from a private registry
 61 |     + `volumeMounts` - describes mounting of a volume within container.
 62 |     + `volumes` - describes a volume to be used for storage
 63 |     + `celeryVersion` - Celery version
 64 |     + `appName` - App name for worker and flower deployments
 65 |     + `celeryApp` - celery app instance to use (e.g. module.celery_app_attr_name)
 66 | - `workerSpec` - worker deployment specific parameters
 67 |     + `numOfWorkers` - Number of workers to launch initially
 68 |     + `args` - array of arguments(all celery supported options) to pass to worker process in container  (TODO: Entrypoint vs args vs individual params)
 69 |     + `rolloutStrategy` - Rollout strategy to spawn new worker pods
 70 |     + `resources` - optional argument to specify cpu, mem constraints for worker deployment
 71 | - `flowerSpec` - flower deployment and service specific parameters
 72 |     + `replicas` - Number of replicas for flower deployment
 73 |     + `args` - array of arguments(all flower supported options) to pass to flower process in the container
 74 |     + `servicePort` - Port to expose flower UI in the container
 75 |     + `serviceType` - [Default, NodePort, LoadBalancer]
 76 |     + `resources` - optional argument to specify cpu, mem constraints for flower deployment
 77 | - `scaleTargetRef` - array of items describing auto scaling targets
 78 |     + `kind` - which application kind to scale (worker, flower)
 79 |     + `minReplicas` - min num of replicas
 80 |     + `maxReplicas` - max num of replicas
 81 |     + `metrics` - list of metrics to monitor
 82 |         * `name` - Enum type (memory, cpu, task_queue_length)
 83 |         * `target` - target values
 84 |             - `type` - [Utilization, Average Value]
 85 |             - `averageValue/averageUtilization` - Average values to maintain
 86 | 
 87 | A more detailed version/documentation for CRD spec is underway.
 88 | 
 89 | #### Celery CR(Custom Resource)
 90 | Custom Resource Object for a Celery application. Multiple clusters will have multiple custom resource objects.
 91 | 
 92 | #### Custom Controller
 93 | [Custom controller](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#custom-controllers) implementation to manage Celery applications(CRs). Contains the code for creation, updation, deletion and scaling handlers of the cluster.
 94 | 
 95 | 
 96 | ### Async KOPF Handlers(Controller Implementation)
 97 | This section contains brief overview of creation and updation handlers which are going to react on celery resource creation and updation respectively and return their status to be stored back as resource's status.
 98 | 
 99 | #### Creation Handler
100 | Generates deployment spec for worker and flower deployments dynamically based on incoming parameters specified in custom celery resource. Also creates the flower service to expose flower UI. Status of each children is sent back to be stored under parent resource status field.
101 | 
102 | Additionally, it might handle the HPA object creation too if the scaling is to be done on native metrics(CPU and Memory utilization).
103 | 
104 | #### Updation Handler
105 | Updates deployment spec for worker and flower deployments(and service + HPA) dynamically and patch them. Status of each children is sent back to be stored under parent resource status field.
106 | 
107 | ### Autoscaling
108 | This section covers how operator is going to handle autoscaling. We plan to supporting scaling based on following two metrics.
109 | 
110 | #### Native Metrics(CPU, Memory Utilization)
111 | If workers need to be scaled only on CPU/Memory constraints, we can simply create an HPA object in creation/updation handlers and it'll take care of scaling relevant worker deployment automatically. HPA supports these two metrics out of the box. For custom metrics, we need to do additional work.
112 | 
113 | #### Broker Queue Length(KEDA based autoscaling)
114 | Queue Length based scaling needs custom metric server for an HPA to work. [KEDA](https://keda.sh/docs/1.5/concepts/) is a wonderful option because it is built for the same. It provides the [scalers](https://keda.sh/docs/1.5/scalers/) for all the popular brokers(RabbitMQ, Redis, Amazon SQS) supported in Celery.
115 | 
116 | KEDA provides multiple ways to be deployed on a Kubernetes cluster - Helm, Operator Hub and Yaml. Celery Operator can package KEDA along with itself for distribution.
117 | 
118 | ### Deployment Strategy
119 | 
120 | Probably the best way would be distribute a Helm Chart which packages CRD, controller and KEDA together(More to be explored here). We'll also support YAML apply based deployments.
121 | 
122 | Additionally, Helm approach is extensible in the sense that we can package additional components like preferred broker(Redis, RMQ, SQS) as well to start with Celery on Kubernetes out of the box without much efforts.
123 | 
124 | ### Want to Help?
125 | If you're running celery on a Kubernetes cluster, your inputs to how you manage applications will be valuable. You could contribute to the discussion by creating a new issue on the repo.
126 | 
127 | ### Motivation
128 | 
129 | Celery is one of the most popular distributed task queue system written in Python. Kubernetes is the de-facto standard for container-orchestration. We plan to write this operator to help manage celery applications gracefully and with ease on a Kubernetes cluster.
130 | 
131 | Moreover, we wish to build this operator with Python. Kubernetes is written in golang. There is a good learning curve to understand internals and write(also maintain) an operator with Go. With the help of KOPF like tool, it'll be good to have Celery spearhead the Python ecosystem for developing production ready Kubernetes extensions. It'll motivate community to overcome the learning barrier and create useful libraries, tools and other operators while staying in Python ecosystem.
132 | 
133 | ### TODOs for Exploration
134 | - [ ] Helm chart to install the operator along with a broker of choice
135 | - [ ] Add role based access control section for the operator
136 | - [ ] Ingress Resource
137 | - [ ] KEDA Autoscaling Implementation
138 | - [ ] Create new issue thread to discuss Celery use-cases
139 | - [ ] What should not be in scope of celery operator?
140 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ## Celery Kubernetes Operator(WIP)
 2 | 
 3 | A Kubernetes operator to manage Celery clusters. It started with being built as a demo project to EuroPython 2020 [talk](https://youtu.be/MoVHxRZ1688?t=9882) around automating infrastructure management in Kubernetes while staying in Python ecosystem. Now, it is being pursued to be a production ready project.
 4 | 
 5 | Please report an issue for improvement suggestions or feedback. This operator is being written with the help of [KOPF](https://github.com/zalando-incubator/kopf) framework open sourced by Zalando SE.
 6 | 
 7 | ## Architecture Document
 8 | 
 9 | Please go through the proposed architecture document for the operator [here](architecture.md).
10 | 
11 | ### Support or Contact
12 | 
13 | Please reach out on [twitter](https://twitter.com/28gautam97) or email [28gautam97@gmail.com](mailto:28gautam97@gmail.com).
14 | 


--------------------------------------------------------------------------------
/example/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /usr/src/app
 4 | 
 5 | COPY requirements.txt ./
 6 | RUN pip install --no-cache-dir -r requirements.txt
 7 | 
 8 | COPY . .
 9 | 
10 | WORKDIR example
11 | 
12 | CMD [ "python", "run_task.py" ]
13 | 


--------------------------------------------------------------------------------
/example/app.py:
--------------------------------------------------------------------------------
 1 | from celery import Celery
 2 | from flask import Flask
 3 | 
 4 | 
 5 | def make_celery(app):
 6 |     celery = Celery(
 7 |         app.import_name,
 8 |         backend=app.config['CELERY_RESULT_BACKEND'],
 9 |         broker=app.config['CELERY_BROKER_URL']
10 |     )
11 |     celery.conf.update(app.config)
12 | 
13 |     class ContextTask(celery.Task):
14 |         def __call__(self, *args, **kwargs):
15 |             with app.app_context():
16 |                 return self.run(*args, **kwargs)
17 | 
18 |     celery.Task = ContextTask
19 |     return celery
20 | 
21 | 
22 | flask_app = Flask(__name__)
23 | flask_app.config.update(
24 |     CELERY_BROKER_URL='redis://redis-master/1',
25 |     CELERY_RESULT_BACKEND='redis://redis-master/1'
26 | )
27 | celery_app = make_celery(flask_app)
28 | 
29 | 
30 | @celery_app.task()
31 | def add(a, b):
32 |     return a + b
33 | 


--------------------------------------------------------------------------------
/example/run_task.py:
--------------------------------------------------------------------------------
 1 | from app import add
 2 | 
 3 | i = 0
 4 | while True:
 5 |     add.delay(4, 5)
 6 |     add.delay(10, 20)
 7 |     add.delay(100, 20)
 8 |     i += 1
 9 |     if i == 10000:
10 |         break
11 | 


--------------------------------------------------------------------------------
/handlers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import kopf
  3 | import kubernetes
  4 | import requests
  5 | import constants
  6 | from math import ceil
  7 | from collections import namedtuple
  8 | 
  9 | from deployment_utils import (
 10 |     deploy_celery_workers,
 11 |     deploy_flower,
 12 |     expose_flower_service
 13 | )
 14 | from update_utils import (
 15 |     update_all_deployments,
 16 |     update_worker_deployment,
 17 |     update_flower_deployment
 18 | )
 19 | 
 20 | 
 21 | @kopf.on.create('celeryproject.org', 'v1alpha1', 'celery')
 22 | def create_fn(spec, name, namespace, logger, **kwargs):
 23 |     """
 24 |         Celery custom resource creation handler
 25 |     """
 26 | 
 27 |     # 1. Validation of spec
 28 |     val, err_msg = validate_spec(spec)
 29 |     if err_msg:
 30 |         status = 'Failed validation'
 31 |         raise kopf.PermanentError(f"{err_msg}. Got {val}")
 32 | 
 33 |     api = kubernetes.client.CoreV1Api()
 34 |     apps_api_instance = kubernetes.client.AppsV1Api()
 35 | 
 36 |     # 2. Deployment for celery workers
 37 |     worker_deployment = deploy_celery_workers(
 38 |         apps_api_instance, namespace, spec, logger
 39 |     )
 40 | 
 41 |     # 3. Deployment for flower
 42 |     flower_deployment = deploy_flower(
 43 |         apps_api_instance, namespace, spec, logger
 44 |     )
 45 | 
 46 |     # 4. Expose flower service
 47 |     flower_svc = expose_flower_service(
 48 |         api, namespace, spec, logger
 49 |     )
 50 | 
 51 |     children = [
 52 |         {
 53 |             'name': worker_deployment.metadata.name,
 54 |             'replicas': worker_deployment.spec.replicas,
 55 |             'kind': constants.DEPLOYMENT_KIND,
 56 |             'type': constants.WORKER_TYPE
 57 |         },
 58 |         {
 59 |             'name': flower_deployment.metadata.name,
 60 |             'replicas': flower_deployment.spec.replicas,
 61 |             'kind': constants.DEPLOYMENT_KIND,
 62 |             'type': constants.FLOWER_TYPE
 63 |         },
 64 |         {
 65 |             'name': flower_svc.metadata.name,
 66 |             'spec': flower_svc.spec.to_dict(),
 67 |             'kind': constants.SERVICE_KIND,
 68 |             'type': constants.FLOWER_TYPE
 69 |         }
 70 |     ]
 71 | 
 72 |     return {
 73 |         'children': children,
 74 |         'children_count': len(children),
 75 |         'status': constants.STATUS_CREATED
 76 |     }
 77 | 
 78 | 
 79 | @kopf.on.update('celeryproject.org', 'v1alpha1', 'celery')
 80 | def update_fn(spec, status, namespace, logger, **kwargs):
 81 |     diff = kwargs.get('diff')
 82 |     modified_spec = get_modified_spec_object(diff)
 83 | 
 84 |     api = kubernetes.client.CoreV1Api()
 85 |     apps_api_instance = kubernetes.client.AppsV1Api()
 86 |     result = status.get('update_fn') or status.get('create_fn')
 87 | 
 88 |     if modified_spec.common_spec:
 89 |         # if common spec was updated, need to update all deployments
 90 |         return update_all_deployments(
 91 |             api, apps_api_instance, spec, status, namespace
 92 |         )
 93 |     else:
 94 |         if modified_spec.worker_spec:
 95 |             # if worker spec was updated, just update worker deployments
 96 |             worker_deployment = update_worker_deployment(
 97 |                 apps_api_instance, spec, status, namespace
 98 |             )
 99 |             deployment_status = next(child for child in result.get('children') if child['type'] == constants.WORKER_TYPE)  # NOQA
100 | 
101 |             deployment_status.update({
102 |                 'name': worker_deployment.metadata.name,
103 |                 'replicas': worker_deployment.spec.replicas
104 |             })
105 | 
106 |         if modified_spec.flower_spec:
107 |             # if flower spec was updated, just update flower deployments
108 |             flower_deployment = update_flower_deployment(
109 |                 apps_api_instance, spec, status, namespace
110 |             )
111 |             deployment_status = next(child for child in result.get('children') if child['type'] == constants.FLOWER_TYPE)  # NOQA
112 | 
113 |             deployment_status.update({
114 |                 'name': flower_deployment.metadata.name,
115 |                 'replicas': flower_deployment.spec.replicas
116 |             })
117 |         return result
118 | 
119 | 
120 | def get_modified_spec_object(diff):
121 |     """
122 |         @param: diff - arg provided by kopf when an object is updated
123 |         diff format - Tuple of (op, (fields tuple), old, new)
124 |         @returns ModifiedSpec namedtuple signifying which spec was updated
125 |     """
126 |     common_spec_checklist = ['appName', 'celeryApp', 'image']
127 |     celery_config_checklist = ['workerSpec']
128 |     flower_config_checklist = ['flowerSpec']
129 | 
130 |     common_spec_modified = False
131 |     celery_spec_modified = False
132 |     flower_spec_modified = False
133 | 
134 |     # TODO - Optimize this loop maybe
135 |     for op, fields, old, new in diff:
136 |         if any(field in fields for field in common_spec_checklist):
137 |             common_spec_modified = True
138 |         if any(field in fields for field in celery_config_checklist):
139 |             celery_spec_modified = True
140 |         if any(field in fields for field in flower_config_checklist):
141 |             flower_spec_modified = True
142 | 
143 |     # a namedtuple to give structure to which spec was updated
144 |     ModifiedSpec = namedtuple('ModifiedSpec', ['common_spec', 'worker_spec', 'flower_spec'])
145 | 
146 |     return ModifiedSpec(
147 |         common_spec=common_spec_modified,
148 |         worker_spec=celery_spec_modified,
149 |         flower_spec=flower_spec_modified
150 |     )
151 | 
152 | 
153 | def check_flower_label(value, spec, **_):
154 |     """
155 |         Checks if incoming label value is the one assigned to
156 |         flower service and deployment
157 |     """
158 |     return value == f"{spec['common']['appName']}-flower"
159 | 
160 | 
161 | def get_flower_svc_host(status):
162 |     """
163 |         Get latest flower SVC host from parent's status
164 |     """
165 |     handler = status.get('update_fn') or status.get('create_fn')
166 | 
167 |     for child in handler.get('children'):
168 |         if child.get('kind') == constants.SERVICE_KIND and child.get('type') == constants.FLOWER_TYPE:  # NOQA
169 |             return f"{child.get('name')}:{child['spec']['ports'][0]['port']}"
170 | 
171 |     return None
172 | 
173 | 
174 | @kopf.timer('celeryproject.org', 'v1alpha1', 'celery',
175 |             initial_delay=5, interval=10, idle=10)
176 | def message_queue_length(spec, status, **kwargs):
177 |     flower_svc_host = get_flower_svc_host(status)
178 |     if not flower_svc_host:
179 |         return
180 | 
181 |     url = f"http://{flower_svc_host}/api/queues/length"
182 |     response = requests.get(url=url)
183 |     if response.status_code == 200:
184 |         return response.json().get('active_queues')
185 | 
186 |     return {
187 |         "queue_length": 0
188 |     }
189 | 
190 | 
191 | def get_current_replicas(child_name, status):
192 |     children = status.get('create_fn').get('children')
193 |     for child in children:
194 |         if child.get('name') == child_name:
195 |             return child.get('replicas')
196 | 
197 | 
198 | def get_current_queue_len(child_name, status):
199 |     for queue in status.get('message_queue_length', []):
200 |         if queue.get('name') == child_name:
201 |             return queue.get('messages')
202 | 
203 |     return 0
204 | 
205 | 
206 | @kopf.on.field('celeryproject.org', 'v1alpha1', 'celery',
207 |                field='status.message_queue_length')
208 | def horizontal_autoscale(spec, status, namespace, **kwargs):
209 |     worker_deployment_name = f"{spec['common']['appName']}-celery-worker"
210 |     current_replicas = get_current_replicas(worker_deployment_name, status)
211 |     updated_num_of_replicas = current_replicas
212 |     scaling_targets = spec['scaleTargetRef']
213 |     for scaling_target in scaling_targets:
214 |         # For now we only support 1 i.e message queue length
215 |         if scaling_target.get('kind') == 'worker':
216 |             min_replicas = scaling_target.get('minReplicas', spec['workerSpec']['numOfWorkers'])
217 |             max_replicas = scaling_target.get('maxReplicas')
218 |             queue_name = spec['workerSpec']['queues']
219 |             current_queue_length = get_current_queue_len(queue_name, status)
220 |             avg_queue_length = scaling_target['metrics'][0].get('target').get('averageValue')
221 |             updated_num_of_replicas = min(
222 |                 max(
223 |                     ceil(
224 |                         current_replicas * (current_queue_length / avg_queue_length)
225 |                     ),
226 |                     min_replicas
227 |                 ),
228 |                 max_replicas
229 |             )
230 | 
231 |     patch_body = {
232 |         "spec": {
233 |             "replicas": updated_num_of_replicas,
234 |         }
235 |     }
236 | 
237 |     apps_api_instance = kubernetes.client.AppsV1Api()
238 |     updated_deployment = apps_api_instance.patch_namespaced_deployment(
239 |         worker_deployment_name, namespace, patch_body
240 |     )
241 | 
242 |     return {
243 |         'deploymentName': updated_deployment.metadata.name,
244 |         'updated_num_of_replicas': updated_num_of_replicas
245 |     }
246 | 
247 | 
248 | def validate_stuff(spec):
249 |     """
250 |         1. If the deployment/svc already exists, k8s throws error
251 |         2. Response and spec classes and enums
252 |     """
253 |     pass
254 | 
255 | 
256 | def validate_spec(spec):
257 |     """
258 |         Validates the incoming spec
259 |         @returns - True/False, Error Message
260 |     """
261 |     # size = spec.get('size')
262 |     # if not size:
263 |     #     return size, "Size must be set"
264 |     return None, None
265 | 


--------------------------------------------------------------------------------
/models/worker_spec.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import Any, List, TypeVar, Type, cast, Callable
  3 | 
  4 | 
  5 | T = TypeVar("T")
  6 | 
  7 | 
  8 | def from_str(x: Any) -> str:
  9 |     assert isinstance(x, str)
 10 |     return x
 11 | 
 12 | 
 13 | def to_class(c: Type[T], x: Any) -> dict:
 14 |     assert isinstance(x, c)
 15 |     return cast(Any, x).to_dict()
 16 | 
 17 | 
 18 | def from_list(f: Callable[[Any], T], x: Any) -> List[T]:
 19 |     assert isinstance(x, list)
 20 |     return [f(y) for y in x]
 21 | 
 22 | 
 23 | @dataclass
 24 | class Constraints:
 25 |     cpu: str
 26 |     memory: str
 27 | 
 28 |     @staticmethod
 29 |     def from_dict(obj: Any) -> 'Constraints':
 30 |         assert isinstance(obj, dict)
 31 |         cpu = from_str(obj.get("cpu"))
 32 |         memory = from_str(obj.get("memory"))
 33 |         return Constraints(cpu, memory)
 34 | 
 35 |     def to_dict(self) -> dict:
 36 |         result: dict = {}
 37 |         result["cpu"] = from_str(self.cpu)
 38 |         result["memory"] = from_str(self.memory)
 39 |         return result
 40 | 
 41 | 
 42 | @dataclass
 43 | class Resources:
 44 |     requests: Constraints
 45 |     limits: Constraints
 46 | 
 47 |     @staticmethod
 48 |     def from_dict(obj: Any) -> 'Resources':
 49 |         assert isinstance(obj, dict)
 50 |         requests = Constraints.from_dict(obj.get("requests"))
 51 |         limits = Constraints.from_dict(obj.get("limits"))
 52 |         return Resources(requests, limits)
 53 | 
 54 |     def to_dict(self) -> dict:
 55 |         result: dict = {}
 56 |         result["requests"] = to_class(Constraints, self.requests)
 57 |         result["limits"] = to_class(Constraints, self.limits)
 58 |         return result
 59 | 
 60 | 
 61 | @dataclass
 62 | class WorkerSpec:
 63 |     args: List[str]
 64 |     command: List[str]
 65 |     image: str
 66 |     name: str
 67 |     resources: Resources
 68 | 
 69 |     @staticmethod
 70 |     def from_dict(obj: Any) -> 'WorkerSpec':
 71 |         assert isinstance(obj, dict)
 72 |         args = from_list(from_str, obj.get("args"))
 73 |         command = from_list(from_str, obj.get("command"))
 74 |         image = from_str(obj.get("image"))
 75 |         name = from_str(obj.get("name"))
 76 |         resources = Resources.from_dict(obj.get("resources"))
 77 |         return WorkerSpec(args, command, image, name, resources)
 78 | 
 79 |     def to_dict(self) -> dict:
 80 |         result: dict = {}
 81 |         result["args"] = from_list(from_str, self.args)
 82 |         result["command"] = from_list(from_str, self.command)
 83 |         result["image"] = from_str(self.image)
 84 |         result["name"] = from_str(self.name)
 85 |         result["resources"] = to_class(Resources, self.resources)
 86 |         return result
 87 | 
 88 | 
 89 | def args_list_from_spec_params(
 90 |     celery_app: str,
 91 |     queues: str,
 92 |     loglevel: str,
 93 |     concurrency: int
 94 | ) -> List[str]:
 95 |     return [
 96 |         f"--app={celery_app}",
 97 |         "worker",
 98 |         f"--queues={queues}",
 99 |         f"--loglevel={loglevel}",
100 |         f"--concurrency={concurrency}"
101 |     ]
102 | 
103 | 
104 | def worker_spec_from_dict(s: Any) -> WorkerSpec:
105 |     return WorkerSpec.from_dict(s)
106 | 
107 | 
108 | def worker_spec_to_dict(x: WorkerSpec) -> Any:
109 |     return to_class(WorkerSpec, x)
110 | 
111 | # To use this code, make sure you
112 | #
113 | #     import json
114 | #
115 | # and then, to convert JSON from a string, do
116 | #
117 | #     result = worker_spec_from_dict(json.loads(json_string))
118 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.7.4
 2 | aiojobs==0.2.2
 3 | amqp==5.0.9
 4 | appnope==0.1.0
 5 | async-timeout==3.0.1
 6 | attrs==19.3.0
 7 | backcall==0.1.0
 8 | billiard==3.6.4.0
 9 | cachetools==4.1.0
10 | celery==5.2.3
11 | certifi==2023.7.22
12 | chardet==3.0.4
13 | click==8.0.2
14 | decorator==4.4.2
15 | Flask==1.1.2
16 | flower==1.2.0
17 | future==0.18.3
18 | google-auth==1.16.1
19 | humanize==0.5.1
20 | idna==2.9
21 | ipdb==0.13.2
22 | ipython==8.10.0
23 | ipython-genutils==0.2.0
24 | iso8601==0.1.12
25 | itsdangerous==1.1.0
26 | jedi==0.17.0
27 | Jinja2==2.11.3
28 | kombu==5.2.3
29 | kopf==0.27
30 | kubernetes==11.0.0
31 | MarkupSafe==1.1.1
32 | multidict==4.7.6
33 | oauthlib==3.1.0
34 | parso==0.7.0
35 | pexpect==4.8.0
36 | pickleshare==0.7.5
37 | prompt-toolkit==3.0.5
38 | ptyprocess==0.6.0
39 | pyasn1==0.4.8
40 | pyasn1-modules==0.2.8
41 | Pygments==2.15.0
42 | pykube-ng==20.5.0
43 | python-dateutil==2.8.1
44 | pytz==2020.1
45 | PyYAML==5.4
46 | redis==3.5.3
47 | requests==2.23.0
48 | requests-oauthlib==1.3.0
49 | rsa==4.7
50 | six==1.15.0
51 | tornado==6.0.4
52 | traitlets==4.3.3
53 | typing-extensions==3.7.4.2
54 | urllib3==1.26.5
55 | vine==1.3.0
56 | wcwidth==0.2.3
57 | websocket-client==0.57.0
58 | Werkzeug==1.0.1
59 | yarl==1.4.2
60 | 


--------------------------------------------------------------------------------
/templates/config_maps/config_map.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 |     name: "{name}"
5 | data:
6 |     config.yaml: 'PLACEHOLDER'


--------------------------------------------------------------------------------
/templates/deployments/celery_worker_deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app: {app_name}
 6 |     celery: "true"
 7 |   name: {app_name}-celery-worker
 8 |   namespace: {namespace}
 9 | spec:
10 |   minReadySeconds: 10
11 |   replicas: {num_of_workers}
12 |   selector:
13 |     matchLabels:
14 |       app: {app_name}
15 |   strategy:
16 |     rollingUpdate:
17 |       maxSurge: 20%
18 |       maxUnavailable: 0%
19 |     type: RollingUpdate
20 |   template:
21 |     metadata:
22 |       labels:
23 |         app: {app_name}
24 |     spec:
25 |       containers:
26 |       - name: {app_name}-celery-worker
27 |         image: {image}
28 |         imagePullPolicy: Never
29 |         command: ["celery"]
30 |         args:
31 |         - "--app={celery_app}"
32 |         - "worker"
33 |         - "--queues={queues}"
34 |         - "--loglevel={loglevel}"
35 |         - "--concurrency={concurrency}"
36 |         resources:
37 |           limits:
38 |             cpu: "{lim_cpu}"
39 |             memory: "{lim_mem}"
40 |           requests:
41 |             cpu: "{req_cpu}"
42 |             memory: "{req_mem}"


--------------------------------------------------------------------------------
/templates/deployments/flower_deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app: {app_name}-flower
 6 |   name: {app_name}-flower
 7 |   namespace: {namespace}
 8 | spec:
 9 |   minReadySeconds: 10
10 |   replicas: {replicas}
11 |   selector:
12 |     matchLabels:
13 |       run: {app_name}-flower
14 |   strategy:
15 |     rollingUpdate:
16 |       maxSurge: 20%
17 |       maxUnavailable: 0%
18 |     type: RollingUpdate
19 |   template:
20 |     metadata:
21 |       labels:
22 |         run: {app_name}-flower
23 |     spec:
24 |       containers:
25 |       - name: {app_name}-flower
26 |         image: {image}
27 |         ports:
28 |           - containerPort: 5555
29 |         args:
30 |         - --app={celery_app}
31 |         command:
32 |         - flower
33 |         imagePullPolicy: Never
34 |         resources:
35 |           limits:
36 |             cpu: {lim_cpu}
37 |             memory: {lim_mem}
38 |           requests:
39 |             cpu: {req_cpu}
40 |             memory: {req_mem}
41 |       restartPolicy: Always
42 |       terminationGracePeriodSeconds: 30


--------------------------------------------------------------------------------
/templates/services/flower_service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {app_name}-flower
 5 |   namespace: {namespace}
 6 |   labels:
 7 |     app: {app_name}-flower
 8 | spec:
 9 |   type: NodePort
10 |   ports:
11 |   - port: 5555
12 |     protocol: TCP
13 |   selector:
14 |     run: {app_name}-flower


--------------------------------------------------------------------------------
/templates/static/celery-worker-static-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app: celery-crd-example
 6 |     celery: "true"
 7 |   name: celery-worker
 8 |   namespace: default
 9 | spec:
10 |   minReadySeconds: 10
11 |   progressDeadlineSeconds: 600
12 |   replicas: 1
13 |   revisionHistoryLimit: 10
14 |   selector:
15 |     matchLabels:
16 |       app: celery-crd-example
17 |   strategy:
18 |     rollingUpdate:
19 |       maxSurge: 20%
20 |       maxUnavailable: 0%
21 |     type: RollingUpdate
22 |   template:
23 |     metadata:
24 |       creationTimestamp: null
25 |       labels:
26 |         app: celery-crd-example
27 |     spec:
28 |       containers:
29 |       - name: example-celery-container
30 |         image: example-image
31 |         imagePullPolicy: Never
32 |         command: ["celery"]
33 |         args:
34 |         - "--app=app:celery_app"
35 |         - "worker"
36 |         - "--queues=default"
37 |         - "--loglevel=info"
38 |         - "--concurrency=2"
39 |         resources:
40 |           requests:
41 |             cpu: "100m"
42 |             memory: "64Mi"
43 |           limits:
44 |             cpu: "200m"
45 |             memory: "128Mi"
46 |       restartPolicy: Always


--------------------------------------------------------------------------------
/templates/static/flask-example.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app: my-flask-app
 6 |   name: my-flask-app
 7 |   namespace: default
 8 | spec:
 9 |   minReadySeconds: 5
10 |   progressDeadlineSeconds: 600
11 |   replicas: 1
12 |   selector:
13 |     matchLabels:
14 |       app: my-flask-app
15 |   strategy:
16 |     rollingUpdate:
17 |       maxSurge: 20%
18 |       maxUnavailable: 0%
19 |     type: RollingUpdate
20 |   template:
21 |     metadata:
22 |       creationTimestamp: null
23 |       labels:
24 |         app: my-flask-app
25 |     spec:
26 |       containers:
27 |       - name: my-flask-app
28 |         image: example-image
29 |         imagePullPolicy: Never
30 |         resources:
31 |           requests:
32 |             cpu: "100m"
33 |             memory: "64Mi"
34 |           limits:
35 |             cpu: "200m"
36 |             memory: "128Mi"


--------------------------------------------------------------------------------
/templates/static/redis-master.yaml:
--------------------------------------------------------------------------------
 1 | # Sets up a redis deployment and service inside cluster
 2 | # Taken from https://www.callicoder.com/deploy-multi-container-go-redis-app-kubernetes/
 3 | ---
 4 | apiVersion: apps/v1  # API version
 5 | kind: Deployment
 6 | metadata:
 7 |   name: redis-master # Unique name for the deployment
 8 |   labels:
 9 |     app: redis       # Labels to be applied to this deployment
10 | spec:
11 |   selector:
12 |     matchLabels:     # This deployment applies to the Pods matching these labels
13 |       app: redis
14 |       role: master
15 |       tier: backend
16 |   replicas: 1        # Run a single pod in the deployment
17 |   template:          # Template for the pods that will be created by this deployment
18 |     metadata:
19 |       labels:        # Labels to be applied to the Pods in this deployment
20 |         app: redis
21 |         role: master
22 |         tier: backend
23 |     spec:            # Spec for the container which will be run inside the Pod.
24 |       containers:
25 |       - name: master
26 |         image: redis
27 |         resources:
28 |           requests:
29 |             cpu: 100m
30 |             memory: 100Mi
31 |         ports:
32 |         - containerPort: 6379
33 | ---        
34 | apiVersion: v1
35 | kind: Service        # Type of Kubernetes resource
36 | metadata:
37 |   name: redis-master # Name of the Kubernetes resource
38 |   labels:            # Labels that will be applied to this resource
39 |     app: redis
40 |     role: master
41 |     tier: backend
42 | spec:
43 |   type: NodePort
44 |   ports:
45 |   - port: 6379       # Map incoming connections on port 6379 to the target port 6379 of the Pod
46 |     targetPort: 6379
47 |   selector:          # Map any Pod with the specified labels to this service
48 |     app: redis
49 |     role: master
50 |     tier: backend


--------------------------------------------------------------------------------
/update_utils.py:
--------------------------------------------------------------------------------
  1 | import constants
  2 | from models.worker_spec import (
  3 |     args_list_from_spec_params
  4 | )
  5 | 
  6 | 
  7 | def update_all_deployments(api, apps_api_instance, spec, status, namespace):
  8 |     worker_deployment = update_worker_deployment(
  9 |         apps_api_instance, spec, status, namespace
 10 |     )
 11 | 
 12 |     flower_deployment = update_flower_deployment(
 13 |         apps_api_instance, spec, status, namespace
 14 |     )
 15 | 
 16 |     flower_svc = update_flower_service(
 17 |         api, spec, status, namespace
 18 |     )
 19 | 
 20 |     children = [
 21 |         {
 22 |             'name': worker_deployment.metadata.name,
 23 |             'replicas': worker_deployment.spec.replicas,
 24 |             'kind': constants.DEPLOYMENT_KIND,
 25 |             'type': constants.WORKER_TYPE
 26 |         },
 27 |         {
 28 |             'name': flower_deployment.metadata.name,
 29 |             'replicas': flower_deployment.spec.replicas,
 30 |             'kind': constants.DEPLOYMENT_KIND,
 31 |             'type': constants.FLOWER_TYPE
 32 |         },
 33 |         {
 34 |             'name': flower_svc.metadata.name,
 35 |             'spec': flower_svc.spec.to_dict(),
 36 |             'kind': constants.SERVICE_KIND,
 37 |             'type': constants.FLOWER_TYPE
 38 |         }
 39 |     ]
 40 | 
 41 |     return {
 42 |         'children': children,
 43 |         'children_count': len(children),
 44 |         'status': constants.STATUS_UPDATED
 45 |     }
 46 | 
 47 | 
 48 | def get_curr_deployment_from_handler_status(handler_name, status, child_type):
 49 |     """
 50 |         Get current deployment name from handler's status
 51 |         @param: handler_name - which handler to get from
 52 |         @param: child_type - worker or flower
 53 |         @returns: current deployment name
 54 |     """
 55 |     for child in status.get(handler_name).get('children'):
 56 |         if child.get('type') == child_type and child.get('kind') == constants.DEPLOYMENT_KIND:  # NOQA
 57 |             return child.get('name')
 58 | 
 59 |     return None
 60 | 
 61 | 
 62 | def get_curr_deployment_name(status, child_type):
 63 |     """
 64 |         Get current deployment name from parent's status
 65 |         @param: child_type - worker or flower
 66 |         @returns: current deployment name
 67 |     """
 68 |     if status.get('update_fn'):
 69 |         return get_curr_deployment_from_handler_status('update_fn', status, child_type)
 70 | 
 71 |     return get_curr_deployment_from_handler_status('create_fn', status, child_type)
 72 | 
 73 | 
 74 | def update_worker_deployment(apps_api_instance, spec, status, namespace):
 75 |     worker_spec = spec['workerSpec']
 76 |     worker_spec_dict = {
 77 |         'args': args_list_from_spec_params(
 78 |             celery_app=spec['common']['celeryApp'],
 79 |             queues=worker_spec['queues'],
 80 |             loglevel=worker_spec['logLevel'],
 81 |             concurrency=worker_spec['concurrency']
 82 |         ),
 83 |         'command': ["celery"],
 84 |         'image': spec['common']['image'],
 85 |         'name': f"{spec['common']['appName']}-celery-worker",
 86 |         'resources': worker_spec['resources']
 87 |     }
 88 | 
 89 |     # JSON way of submitting spec to deploy/patch
 90 |     patch_body = {
 91 |         "spec": {
 92 |             "replicas": worker_spec['numOfWorkers'],
 93 |             "template": {
 94 |                 "spec": {
 95 |                     "containers": [
 96 |                         worker_spec_dict
 97 |                     ]
 98 |                 }
 99 |             }
100 |         }
101 |     }
102 | 
103 |     worker_deployment_name = get_curr_deployment_name(
104 |         status, constants.WORKER_TYPE
105 |     )
106 | 
107 |     return apps_api_instance.patch_namespaced_deployment(
108 |         worker_deployment_name, namespace, patch_body
109 |     )
110 | 
111 | 
112 | def update_flower_deployment(apps_api_instance, spec, status, namespace):
113 |     flower_spec = spec['flowerSpec']
114 | 
115 |     flower_spec_dict = {
116 |         'args': [spec['common']['celeryApp']],
117 |         'command': ['flower'],
118 |         'image': spec['common']['image'],
119 |         'name': f"{spec['common']['appName']}-flower",
120 |         'ports': [
121 |             {"containerPort": 5555}
122 |         ],
123 |         'resources': flower_spec['resources']
124 |     }
125 | 
126 |     # JSON way of submitting spec to deploy/patch
127 |     patch_body = {
128 |         "spec": {
129 |             "replicas": flower_spec['replicas'],
130 |             "template": {
131 |                 "spec": {
132 |                     "containers": [
133 |                         flower_spec_dict
134 |                     ]
135 |                 }
136 |             }
137 |         }
138 |     }
139 | 
140 |     flower_deployment_name = get_curr_deployment_name(
141 |         status, constants.FLOWER_TYPE
142 |     )
143 | 
144 |     return apps_api_instance.patch_namespaced_deployment(
145 |         flower_deployment_name, namespace, patch_body
146 |     )
147 | 
148 | 
149 | def update_flower_service(api, spec, status, namespace):
150 |     # Only app_name change will affect flower service
151 |     patch_body = {
152 |         "spec": {
153 |             "selector": {
154 |                 "run": f"{spec['common']['appName']}-flower"
155 |             }
156 |         }
157 |     }
158 | 
159 |     flower_svc_name = get_curr_deployment_name(
160 |         status, constants.FLOWER_TYPE
161 |     )  # flower svc is named same as flower deployment
162 |     return api.patch_namespaced_service(
163 |         flower_svc_name, namespace, patch_body
164 |     )
165 | 


--------------------------------------------------------------------------------