├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── cdk-project
    ├── .gitignore
    ├── README.md
    ├── app.py
    ├── batch_celery_container
    │   ├── Dockerfile
    │   └── celeryapp
    │   │   ├── app.py
    │   │   ├── fill-batch-queue.py
    │   │   ├── fill_batch_queue.sh
    │   │   └── run_celery_worker.sh
    ├── cdk.json
    └── requirements.txt
└── images
    ├── batch-console-jq-1.png
    ├── batch-console-jq-2.png
    ├── cw-alarms-not-ok.png
    ├── cw-alarms-ok.png
    ├── figure-1-batch-celery-architecture.png
    └── figure-2-step-functions-sm.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 | 
166 | ### Python Patch ###
167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168 | poetry.toml
169 | 
170 | # ruff
171 | .ruff_cache/
172 | 
173 | # End of https://www.toptal.com/developers/gitignore/api/python
174 | # Created by https://www.toptal.com/developers/gitignore/api/direnv
175 | # Edit at https://www.toptal.com/developers/gitignore?templates=direnv
176 | 
177 | ### direnv ###
178 | .direnv
179 | .envrc
180 | 
181 | # End of https://www.toptal.com/developers/gitignore/api/direnv
182 | .python-version
183 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT No Attribution
2 | 
3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Running Celery workers using AWS Batch
 2 | 
 3 | This repository provides a deployable example for the blog post ["Running Celery workers using AWS Batch" blog post](LIVE_BLOG_POST_URL), which discusses how to create an architecture for deploying  [Celery](https://docs.celeryq.dev/en/stable/index.html) workers using [AWS Batch](https://aws.amazon.com/batch/). The following figure (Figure 1) contains a high-level architecture of the example:
 4 | 
 5 | ![Figure 1: The architecture of the solution. The diagram shows the application sending Celery task requests to an SQS queue. Two CloudWatch alarms are configured to monitor the number of messages in a queue, and enter the `ALARM` state when the threshold is exceeded. A corresponding pair of EventBridge events are configured to either submit a single AWS Batch job for one Celery worker (in the case of a low number of messages) or submit an AWS Batch array job to start a set of workers (in the case when there are a lot of messages in the queue)](images/figure-1-batch-celery-architecture.png)
 6 | 
 7 | In the diagram:
 8 | 
 9 | 1.  A Celery application submits tasks to an SQS queue
10 | 2.  [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/) alarms monitor the depth of the queue, and enter `ALARM` state when the approximate visible message depth is >=5 and >=50.
11 | 3.  [Amazon Event Bridge](https://aws.amazon.com/eventbridge/) rules react to those alarms to start the Celery workers with AWS Batch. The worker processes drain the SQS queue and shut themselves down when the queue is empty for more than 60 seconds. 
12 | 
13 | ### Celery components
14 | 
15 | You will find the example Celery Python code in the [`cdk-project/batch_celery_container`](cdk-project/batch_celery_container) directory. 
16 | 
17 | * `app.py` - The Celery app that defines the task to send to the SQS queue and for the Celery worker launched by AWS Batch to process.
18 | * `fill-batch-queue.py` - A small program that imports the Celery app and actually submits a number of Celery requests into the queue for processing. 
19 | 
20 | ### AWS architectural components
21 | 
22 | This example leverages the [AWS CDK for Python](https://docs.aws.amazon.com/cdk/v2/guide/work-with-cdk-python.html) to define and manage the infrastructure, including: 
23 | 
24 | * A VPC with a public and private subnet deployed to a single Availability Zone. The private subnet has a NAT gateway attached. 
25 | * A Docker container and private ECR repository for the Celery processes.
26 | * An AWS Batch compute environment that leverages Fargate to deploy resources to the private VPC. 
27 | * An AWS Batch FIFO job queue attached to the compute environment.
28 | * AWS Batch job definitions that reference the Celery container for submitting the Celery tasks for Batch and for running Celery workers in Batch
29 | * An SQS queue for Celery tasks meant for Batch
30 | * Amazon CloudWatch alarms set to enter ALARM state when the SQS approximate queue depth reaches >= 5 and >= 50 messages
31 | * Amazon EventBridge rules that react to the alarms to submit jobs to AWS Batch to run Celery workers.
32 | 
33 | ### Deploying the example 
34 | 
35 | If you would like to deploy and run this example in your own AWS account, please refer to the [README.md](cdk-project/README.md) file within the [cdk-project](cdk-project/) subdirectory.
36 | 
37 | ## Security
38 | 
39 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
40 | 
41 | ## License
42 | 
43 | This library is licensed under the MIT-0 License. See the LICENSE file.
44 | 
45 | 


--------------------------------------------------------------------------------
/cdk-project/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | package-lock.json
 3 | __pycache__
 4 | .pytest_cache
 5 | .venv
 6 | *.egg-info
 7 | 
 8 | # CDK asset staging directory
 9 | .cdk.staging
10 | cdk.out
11 | cdk.context.json
12 | 


--------------------------------------------------------------------------------
/cdk-project/README.md:
--------------------------------------------------------------------------------
  1 | # Welcome to the AWS Batch Celery Example CDK Python project!
  2 | 
  3 | This project leverages the  [AWS CDK for Python](https://docs.aws.amazon.com/cdk/v2/guide/work-with-cdk-python.html) to define and manage the infrastructure. 
  4 | 
  5 | In order to run this example, you will need to be able to install and leverage CDK for Python, as well as build Docker containers. 
  6 | 
  7 | 
  8 | ### Setting up CDK and the project
  9 | 
 10 | Once your development environment is installed and configured to be able to deploy CDK stacks and build Docker containers, clone out this repository, then navigate to the contained CDK project directory: 
 11 | 
 12 | ```bash
 13 | git clone https://github.com/aws-samples/aws-batch-celery-example.git
 14 | cd aws-batch-celery-example/cdk-project
 15 | ```
 16 | 
 17 | The `cdk.json` file tells the CDK Toolkit how to execute your app.
 18 | 
 19 | This project is set up like a standard Python project.  We highly recommend that you create a Python virtualenv within this project, stored under the `.venv` directory to intall CDK and other Python modules specific to this project. To manually create a virtualenv in the Cloud9 terminal:
 20 | 
 21 | ```bash
 22 | $ python3 -m venv .venv
 23 | ```
 24 | 
 25 | After the init process completes and the virtualenv is created, you can use the following step to activate your virtualenv.
 26 | 
 27 | ```bash
 28 | $ source .venv/bin/activate
 29 | ```
 30 | 
 31 | Once the virtualenv is activated, you can install the required dependencies.
 32 | 
 33 | ```
 34 | $ pip install -r requirements.txt
 35 | ```
 36 | 
 37 | At this point you can now synthesize the CloudFormation template for this code.
 38 | 
 39 | ```
 40 | $ cdk synth
 41 | ```
 42 | 
 43 | ### Useful CDK commands
 44 | 
 45 | Here are some useful CDK commands for reference:
 46 | 
 47 |  * `cdk ls`          list all stacks in the app
 48 |  * `cdk synth`       emits the synthesized CloudFormation template
 49 |  * `cdk deploy`      deploy this stack to your default AWS account/region
 50 |  * `cdk diff`        compare deployed stack with current state
 51 |  * `cdk docs`        open CDK documentation
 52 | 
 53 | 
 54 | ## Deploying the example infrastructure and running the 
 55 | 
 56 | To deploy the stack to your AWS account: 
 57 | 
 58 | ```bash
 59 | cdk deploy
 60 | ```
 61 | 
 62 | You'll get a notice about needing elevated permissions to create the IAM roles. Accept the statement to create the resources. Once the stack deploys successfully, note the following `Outputs` values from the stack:
 63 | 
 64 | * `BatchJobQueueArn` - the create AWS Batch job queue ARN 
 65 | * `BatchCeleryFillQueueJobDefArn` - the created AWS Batch job definition for creating the Celery tasks for Celery workers to process. 
 66 | 
 67 | ## Running the example
 68 | 
 69 | Once the stack is deployed, you can issue the following command via the Cloud9 terminal to populate the SQS queue with 10 Celery tasks requests, using the values for  `BatchJobQueueArn` and `BatchCeleryFillQueueJobDefArn`. 
 70 | 
 71 | You can change the value of `numMessages` to >=50 to trigger the "high" CloudWatch alarm and EventBridge rule that submits a Batch array job starting multiple Celery workers.
 72 | 
 73 | ```bash
 74 | aws batch submit-job --job-name "fillQueue" \
 75 |   --job-queue "<BatchJobQueueArn>" \
 76 |   --job-definition "<BatchCeleryFillQueueJobDefArn>" \
 77 |   --parameters "numMessages=10" 
 78 | ```
 79 | 
 80 | To view the status of the job:
 81 | 
 82 | 1. Navigate to the [AWS Batch management console](https://console.aws.amazon.com/batch/home?#/jobs/list)
 83 | 2. On the left-hand side, select the **Jobs** tab, then for job queue select the **batch-celery-job-queue**
 84 | 
 85 | ![The AWS Batch management console, showing the Jobs information for the Celery job queue that was created](../images/batch-console-jq-1.png)
 86 | 
 87 | Once the job is complete, the specified number of messages will be in the created SQS queue.
 88 | 
 89 | To view the status of the Amazon CloudWatch Alarms: 
 90 | 
 91 | 1. Navigate to the [Amazon CloudWatch management console](https://console.aws.amazon.com/cloudwatch/home?#alarmsV2:)
 92 | 2. On the left-hand side, select **All alarms, then enter "**BatchFargateStack**" into the filter. You should see the alarms in **OK** state. 
 93 | 
 94 | ![The Amazon CloudWatch management console showing the created alarms for the Celery SQS queue](../images/cw-alarms-ok.png)
 95 | 
 96 | If the `fillQueue` job completed, then either one or both alarms may be in alarm state, depending if you defined less than or greater than 50 Celery messages to be inserted into the SQS queue. 
 97 | 
 98 | ![The Amazon CloudWatch management console showing the created alarms for the Celery SQS queue. One of the alarms is in `ALARM` state.](../images/cw-alarms-not-ok.png)
 99 | 
100 | When the alarm enters ALARM state, the EventBridge rule(s) will submit jobs to Batch to start Celery workers. These requests will show up in the Batch console as well
101 | 
102 | ![The AWS Batch management console showing the `runSingleCeleryWorker` job submission](../images/batch-console-jq-2.png)
103 | 
104 | ## Cleaning up
105 | 
106 | To clean up your resources and avoid unnessecary charges, leverage CDK to tear down the CloudFormation stacks using the following command: 
107 | 
108 | ```bash
109 | cdk destroy
110 | ```
111 | 


--------------------------------------------------------------------------------
/cdk-project/app.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from aws_cdk import (
  3 |                      aws_ec2 as ec2, 
  4 |                      aws_batch as batch,
  5 |                      aws_ecr_assets as ecr_assets,
  6 |                      aws_sqs as sqs,
  7 |                      aws_ecs as ecs,
  8 |                      aws_iam as iam,
  9 |                      aws_cloudwatch as cw,
 10 |                      aws_events as events,
 11 |                      aws_events_targets as targets, 
 12 |                      App, Stack, CfnOutput, Size,  Tags, Duration
 13 |                      )
 14 | from constructs import Construct
 15 | from os import path
 16 | 
 17 | class BatchFargateStack(Stack):
 18 | 
 19 |     def __init__(self, scope: Construct, id: str, **kwargs) -> None:
 20 |         super().__init__(scope, id, **kwargs)
 21 | 
 22 |         # This resource alone will create a private/public subnet in one AZ as well as nat/internet gateway(s)
 23 |         vpc = ec2.Vpc(self, "VPC", max_azs=1)
 24 | 
 25 |         # Create the SQS queue for Celery workers to send/recieve messages from 
 26 |         batch_celery_batch_sqs_queue = sqs.Queue(
 27 |             self, "BatchCeleryBatchQueue",
 28 |             queue_name="celery-batch"
 29 |         )
 30 | 
 31 |         # Creates the Docker image and CDK-controlled ECR repository for Celery worker.
 32 |         celery_worker_image = ecr_assets.DockerImageAsset(
 33 |             self, 
 34 |             "BatchCeleryWorkerImage",
 35 |             directory=path.join(
 36 |               path.dirname(__file__), "batch_celery_container"
 37 |             ), 
 38 |             platform=ecr_assets.Platform.LINUX_AMD64
 39 |         )
 40 | 
 41 |         # Create the AWS Batch resources. This includes a Fargate compute environment, a job queue, and a job definition for the Celery worker with the appropriate task execution IAM role to access the ECR registry and SQS queues.
 42 |         # Batch CE will be created in the same AZ as the VPC within the private subnet.
 43 |         # Batch CE will have a maximum of 48 vCPUs.
 44 |         batch_ce = batch.FargateComputeEnvironment(
 45 |           self, 
 46 |           "batchCeleryComputeEnvironment",
 47 |           vpc=vpc,
 48 |           vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS),
 49 |           maxv_cpus=48
 50 |         )
 51 | 
 52 |         # Batch JQ will have a maximum of 48 vCPUs.
 53 |         # Batch JQ will be attached the Batch CE created above.
 54 |         batch_queue = batch.JobQueue(
 55 |             self, 
 56 |             "JobQueue",
 57 |             compute_environments=[
 58 |               batch.OrderedComputeEnvironment(compute_environment=batch_ce, order=1)
 59 |             ],
 60 |             job_queue_name="batch-celery-job-queue"
 61 |         )
 62 | 
 63 |         # IAM roles for:
 64 |         # * allowing the ECS agent to allow pulling from private ECR repository
 65 |         # * allowing the Batch job to have full access to SQS
 66 |         ecs_task_execution_role = iam.Role(
 67 |             self, 
 68 |             "EcsTaskExecutionRole", 
 69 |             assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com"),
 70 |             managed_policies=[
 71 |               iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AmazonECSTaskExecutionRolePolicy")
 72 |             ]
 73 |         )
 74 |         batch_celery_job_role = iam.Role(
 75 |             self, 
 76 |             "BatchCeleryJobRole",
 77 |             assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com"),
 78 |             managed_policies=[
 79 |               iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSQSFullAccess")
 80 |             ]
 81 |         )
 82 | 
 83 |         # Create a Batch job definition for the Celery worker. 
 84 |         batch_celery_worker_job_def = batch.EcsJobDefinition(
 85 |             self, 
 86 |             "BatchCeleryWorkerJobDef",
 87 |             job_definition_name="runCeleryWorker",
 88 |             propagate_tags=True, 
 89 |             container=batch.EcsFargateContainerDefinition(
 90 |               self, 
 91 |               "BatchCeleryWorkerContainerDef",
 92 |               image=ecs.ContainerImage.from_registry(
 93 |                 celery_worker_image.image_uri
 94 |               ),
 95 |               command=["/opt/celeryapp/run_celery_worker.sh"], 
 96 |               memory=Size.mebibytes(2048),
 97 |               cpu=1,
 98 |               execution_role=ecs_task_execution_role,
 99 |               job_role=batch_celery_job_role,
100 |               environment={
101 |                 "CELERY_QUEUE_NAME": batch_celery_batch_sqs_queue.queue_name, 
102 |                 "CELERY_QUEUE_URL":   batch_celery_batch_sqs_queue.queue_url
103 |               }
104 |             )
105 |         )
106 |         Tags.of(batch_celery_worker_job_def).add("project", "batch-celery")
107 | 
108 |         # Create a Batch job definition to fill the Batch SQS queue with tasks.
109 |         batch_celery_fill_queue_job_def = batch.EcsJobDefinition(
110 |             self, 
111 |             "BatchCeleryFillQueueJobDef",
112 |             job_definition_name="fillCeleryQueue",
113 |             propagate_tags=True, 
114 |             container=batch.EcsFargateContainerDefinition(
115 |               self, 
116 |               "BatchFillCeleryQueueContainerDef",
117 |               image=ecs.ContainerImage.from_registry(
118 |                 celery_worker_image.image_uri
119 |               ),
120 |               command=["/opt/celeryapp/fill_batch_queue.sh", "Ref::numMessages"], 
121 |               memory=Size.mebibytes(2048),
122 |               cpu=1,
123 |               execution_role=ecs_task_execution_role,
124 |               job_role=batch_celery_job_role,
125 |               environment={
126 |                 "CELERY_QUEUE_NAME": batch_celery_batch_sqs_queue.queue_name, 
127 |                 "CELERY_QUEUE_URL":   batch_celery_batch_sqs_queue.queue_url
128 |               }, 
129 |             ), 
130 |             parameters={"numMessages": 5}
131 |         )
132 |         Tags.of(batch_celery_fill_queue_job_def).add("project", "batch-celery")
133 | 
134 | 
135 |         # Create the CloudWatch alarms for monitoring the SQS queue for compute intensive Celery tasks. 
136 |         # The low alarm will enter ALARM state once the queue is >=5 messages.
137 |         cw_alarm_low = cw.Alarm(
138 |               self, 
139 |               "CeleryQueueLowAlarm",
140 |               alarm_description="SQS queue for compute intensive Celery tasks is >=5 messages.",
141 |               metric=batch_celery_batch_sqs_queue.metric_approximate_number_of_messages_visible(
142 |                 period=Duration.seconds(60)
143 |               ),
144 |               threshold=5,
145 |               comparison_operator=cw.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
146 |               evaluation_periods=1,
147 |               datapoints_to_alarm=1,
148 |               treat_missing_data=cw.TreatMissingData.MISSING
149 |         )
150 |         # The high alarm will enter ALARM state once the queue is >=50 messages.
151 |         cw_alarm_high = cw.Alarm(
152 |               self, 
153 |               "CeleryQueueHighAlarm",
154 |               alarm_description="SQS queue for compute intensive Celery tasks is >=50 messages.",
155 |               metric=batch_celery_batch_sqs_queue.metric_approximate_number_of_messages_visible(
156 |                 period=Duration.seconds(60)
157 |               ),
158 |               threshold=50,
159 |               comparison_operator=cw.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
160 |               evaluation_periods=1,
161 |               datapoints_to_alarm=1,
162 |               treat_missing_data=cw.TreatMissingData.MISSING
163 |         )
164 | 
165 |         # Create EventBridge rule and target to respond to the low alarm. This rule submits a Batch job to start a single Celery worker to drain the SQS queue.
166 |         eb_rule_low = events.Rule(
167 |           self, 
168 |           "CeleryQueueLowAlarmRule",
169 |           description="SQS queue for compute intensive Celery tasks is >=5 messages.",
170 |           event_pattern=events.EventPattern(
171 |               source=["aws.cloudwatch"],
172 |               detail_type=["CloudWatch Alarm State Change"],
173 |               resources=[cw_alarm_low.alarm_arn],
174 |               detail={
175 |                   "state": {
176 |                     "value": ["ALARM"]
177 |                   }
178 |               }
179 |           )
180 |         )
181 |         eb_rule_low.add_target(
182 |           targets.BatchJob(
183 |               batch_queue.job_queue_arn,
184 |               batch_queue,
185 |               batch_celery_worker_job_def.job_definition_arn,
186 |               batch_celery_worker_job_def,
187 |               job_name="runSingleCeleryWorker"
188 |           )
189 |         )
190 |         # Create EventBridge rule and target to respond to the high alarm. This rule submits a Batch array job to start 5 celery workers to drain the SQS queue.
191 |         eb_rule_high = events.Rule(
192 |           self, 
193 |           "CeleryQueueHighAlarmRule",
194 |           description="SQS queue for compute intensive Celery tasks is >=50 messages.",
195 |           event_pattern=events.EventPattern(
196 |               source=["aws.cloudwatch"],
197 |               detail_type=["CloudWatch Alarm State Change"],
198 |               resources=[cw_alarm_high.alarm_arn],
199 |               detail={
200 |                   "state": {
201 |                     "value": ["ALARM"]
202 |                   }
203 |               }
204 |           )
205 |         )
206 |         eb_rule_high.add_target(
207 |           targets.BatchJob(
208 |             batch_queue.job_queue_arn,
209 |             batch_queue,
210 |             batch_celery_worker_job_def.job_definition_arn,
211 |             batch_celery_worker_job_def,
212 |             job_name="runMultipleCeleryWorkers",
213 |             size=10
214 |           )
215 |         )
216 | 
217 |         # Output CloudFormation resources for use in the example
218 |         CfnOutput(self, "BatchJobQueueArn",value=batch_queue.job_queue_name)
219 |         CfnOutput(self, "BatchCeleryFillQueueJobDefArn",value=batch_celery_fill_queue_job_def.job_definition_name)
220 | 
221 | app = App()
222 | BatchFargateStack(app, "BatchFargateStack")
223 | app.synth()


--------------------------------------------------------------------------------
/cdk-project/batch_celery_container/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | 
 4 | FROM public.ecr.aws/amazonlinux/amazonlinux:2
 5 | 
 6 | # RUN yum group install -y "AWS Tools" "Development Tools"
 7 | RUN yum install -y tar bzip2 curl unzip
 8 | 
 9 | # Install Micromamba
10 | RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -C /usr/local -xvj bin/micromamba
11 | 
12 | RUN mkdir -p /opt/micromamba && \
13 |     export MAMBA_ROOT_PREFIX="/opt/micromamba" && \
14 |     eval "$(micromamba shell hook -s posix)" && \
15 |     micromamba activate && \ 
16 |     micromamba install -y python=3.10 celery boto3 pycurl -c conda-forge
17 | 
18 | # Install the AWS CLI
19 | RUN cd /tmp && \
20 |     curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
21 |     unzip awscliv2.zip && ./aws/install
22 | 
23 | COPY ./celeryapp /opt/celeryapp
24 | 
25 | CMD ["/opt/celeryapp/run_celery_worker.sh"]


--------------------------------------------------------------------------------
/cdk-project/batch_celery_container/celeryapp/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | 
 4 | from celery import Celery
 5 | from sys import stderr
 6 | from os import environ
 7 | 
 8 | app = Celery("celerybatch", 
 9 |              broker="sqs://")
10 | 
11 | # Update some of the configurations. Specifically: 
12 | # * Set the default queue name to be `celery-default`. 
13 | # * Set the task route for the `calculate_pi` method to be the queue named `celery-batch`. 
14 | app.conf.update(
15 |   task_default_queue = 'celery-default',
16 |   task_routes = {
17 |    'app.calculate_pi': 'celery-batch'
18 |   }
19 | )
20 | 
21 | # A Celery task that will go into the `celery-batch` SQS queue
22 | # This method does a naive calculation of Pi mutliple times to take up CPU cycles. A 1 vCPU Fargate resource should accomlish this task in about 5 seconds. 
23 | @app.task
24 | def calculate_pi():
25 |   s = 0
26 |   for x in range(10):
27 |     # Initialize denominator
28 |     k = 1
29 |     # Initialize sum
30 |     s = 0
31 |     for i in range(1000000):
32 |         # even index elements are positive
33 |         if i % 2 == 0:
34 |             s += 4/k
35 |         else:
36 |             # odd index elements are negative
37 |             s -= 4/k
38 |         # denominator is odd
39 |         k += 2
40 |   stderr.write("Pi = " + str(s) + "\n")
41 | 


--------------------------------------------------------------------------------
/cdk-project/batch_celery_container/celeryapp/fill-batch-queue.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | 
 4 | from app import *
 5 | from argparse import ArgumentParser
 6 | 
 7 | parser = ArgumentParser()
 8 | parser.add_argument(
 9 |     '-n', 
10 |     '--num_messages', 
11 |     type=int, 
12 |     default=1, 
13 |     help='number of messages of each task to send'
14 | )
15 | args = parser.parse_args()
16 | 
17 | if __name__ == '__main__':
18 |   for i in range(args.num_messages):
19 |     calculate_pi.apply_async()
20 | 


--------------------------------------------------------------------------------
/cdk-project/batch_celery_container/celeryapp/fill_batch_queue.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | 
 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | # SPDX-License-Identifier: MIT-0
 5 | 
 6 | if [ "$1" != "" ]; then
 7 |     NUM_MESSAGES=$1
 8 | else
 9 |     NUM_MESSAGES=5
10 | fi
11 | # Initialize micromamba and activate the base environment
12 | export MAMBA_ROOT_PREFIX=/opt/micromamba
13 | eval "$(micromamba shell hook --shell=bash )"
14 | micromamba activate 
15 | 
16 | # Change directory to the celeryapp
17 | cd /opt/celeryapp
18 | 
19 | # Run the python script to populate the celery queue
20 | python fill-batch-queue.py -n $NUM_MESSAGES


--------------------------------------------------------------------------------
/cdk-project/batch_celery_container/celeryapp/run_celery_worker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | 
 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | # SPDX-License-Identifier: MIT-0
 5 | 
 6 | # Initialize micromamba and activate the base environment
 7 | export MAMBA_ROOT_PREFIX=/opt/micromamba
 8 | eval "$(micromamba shell hook --shell=bash )"
 9 | micromamba activate 
10 | 
11 | # Change directory to the celeryapp
12 | cd /opt/celeryapp
13 | 
14 | # Start the worker as a detached process
15 | celery -A app worker -l ERROR -Q ${CELERY_QUEUE_NAME} --pidfile /var/run/batch-celery-worker.pid --detach 
16 | 
17 | while [ true ]; do
18 |   if [ ! -f "/var/run/batch-celery-worker.pid" ]; then
19 |     sleep 2
20 |   else
21 |     # Get the process ID for the Celery worker
22 |     PID=$(cat /var/run/batch-celery-worker.pid)
23 |     echo "Celery worker parent PID=$PID"
24 |     break
25 |   fi
26 | done
27 | 
28 | # Enter a wait loop and periodically check the message queue size. 
29 | # If no messages, break
30 | while [ true ]
31 | do 
32 |   sleep 60
33 |   n=$(aws sqs get-queue-attributes --queue-url ${CELERY_QUEUE_URL} --attribute-names ApproximateNumberOfMessages --query "Attributes.ApproximateNumberOfMessages" --output text)
34 |   if [ $n -eq 0 ]; then
35 |     break;
36 |   else
37 |     echo "NUM_MESSAGES=$n";
38 |   fi
39 | done
40 | 
41 | # No more messages, stop the worker and exit
42 | echo "SIGTERM CELERY PID=$PID"
43 | kill -15 $PID
44 | while [ true ]; do
45 |   if [ -f "/var/run/batch-celery-worker.pid" ]; then
46 |     break
47 |   fi
48 |   sleep 5
49 | done
50 | echo "EXIT(0)"
51 | exit


--------------------------------------------------------------------------------
/cdk-project/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "python3 app.py",
 3 |   "watch": {
 4 |     "include": [
 5 |       "**"
 6 |     ],
 7 |     "exclude": [
 8 |       "README.md",
 9 |       "cdk*.json",
10 |       "requirements*.txt",
11 |       "source.bat",
12 |       "**/__init__.py",
13 |       "python/__pycache__",
14 |       "tests"
15 |     ]
16 |   },
17 |   "context": {
18 |     "@aws-cdk/aws-lambda:recognizeLayerVersion": true,
19 |     "@aws-cdk/core:checkSecretUsage": true,
20 |     "@aws-cdk/core:target-partitions": [
21 |       "aws",
22 |       "aws-cn"
23 |     ],
24 |     "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
25 |     "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
26 |     "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true,
27 |     "@aws-cdk/aws-iam:minimizePolicies": true,
28 |     "@aws-cdk/core:validateSnapshotRemovalPolicy": true,
29 |     "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
30 |     "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true,
31 |     "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true,
32 |     "@aws-cdk/aws-apigateway:disableCloudWatchRole": true,
33 |     "@aws-cdk/core:enablePartitionLiterals": true,
34 |     "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true,
35 |     "@aws-cdk/aws-iam:standardizedServicePrincipals": true,
36 |     "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true,
37 |     "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true,
38 |     "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true,
39 |     "@aws-cdk/aws-route53-patters:useCertificate": true,
40 |     "@aws-cdk/customresources:installLatestAwsSdkDefault": false,
41 |     "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true,
42 |     "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true,
43 |     "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true,
44 |     "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true,
45 |     "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true,
46 |     "@aws-cdk/aws-redshift:columnId": true,
47 |     "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/cdk-project/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-cdk-lib>=2.200.1
2 | constructs>=10.0.0,<11.0.0
3 | 


--------------------------------------------------------------------------------
/images/batch-console-jq-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/batch-console-jq-1.png


--------------------------------------------------------------------------------
/images/batch-console-jq-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/batch-console-jq-2.png


--------------------------------------------------------------------------------
/images/cw-alarms-not-ok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/cw-alarms-not-ok.png


--------------------------------------------------------------------------------
/images/cw-alarms-ok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/cw-alarms-ok.png


--------------------------------------------------------------------------------
/images/figure-1-batch-celery-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/figure-1-batch-celery-architecture.png


--------------------------------------------------------------------------------
/images/figure-2-step-functions-sm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/figure-2-step-functions-sm.png


--------------------------------------------------------------------------------