├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── cdk-project ├── .gitignore ├── README.md ├── app.py ├── batch_celery_container │ ├── Dockerfile │ └── celeryapp │ │ ├── app.py │ │ ├── fill-batch-queue.py │ │ ├── fill_batch_queue.sh │ │ └── run_celery_worker.sh ├── cdk.json └── requirements.txt └── images ├── batch-console-jq-1.png ├── batch-console-jq-2.png ├── cw-alarms-not-ok.png ├── cw-alarms-ok.png ├── figure-1-batch-celery-architecture.png └── figure-2-step-functions-sm.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | #.idea/ 165 | 166 | ### Python Patch ### 167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 168 | poetry.toml 169 | 170 | # ruff 171 | .ruff_cache/ 172 | 173 | # End of https://www.toptal.com/developers/gitignore/api/python 174 | # Created by https://www.toptal.com/developers/gitignore/api/direnv 175 | # Edit at https://www.toptal.com/developers/gitignore?templates=direnv 176 | 177 | ### direnv ### 178 | .direnv 179 | .envrc 180 | 181 | # End of https://www.toptal.com/developers/gitignore/api/direnv 182 | .python-version 183 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Running Celery workers using AWS Batch 2 | 3 | This repository provides a deployable example for the blog post ["Running Celery workers using AWS Batch" blog post](LIVE_BLOG_POST_URL), which discusses how to create an architecture for deploying [Celery](https://docs.celeryq.dev/en/stable/index.html) workers using [AWS Batch](https://aws.amazon.com/batch/). The following figure (Figure 1) contains a high-level architecture of the example: 4 | 5 | ![Figure 1: The architecture of the solution. The diagram shows the application sending Celery task requests to an SQS queue. Two CloudWatch alarms are configured to monitor the number of messages in a queue, and enter the `ALARM` state when the threshold is exceeded. A corresponding pair of EventBridge events are configured to either submit a single AWS Batch job for one Celery worker (in the case of a low number of messages) or submit an AWS Batch array job to start a set of workers (in the case when there are a lot of messages in the queue)](images/figure-1-batch-celery-architecture.png) 6 | 7 | In the diagram: 8 | 9 | 1. A Celery application submits tasks to an SQS queue 10 | 2. [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/) alarms monitor the depth of the queue, and enter `ALARM` state when the approximate visible message depth is >=5 and >=50. 11 | 3. [Amazon Event Bridge](https://aws.amazon.com/eventbridge/) rules react to those alarms to start the Celery workers with AWS Batch. The worker processes drain the SQS queue and shut themselves down when the queue is empty for more than 60 seconds. 12 | 13 | ### Celery components 14 | 15 | You will find the example Celery Python code in the [`cdk-project/batch_celery_container`](cdk-project/batch_celery_container) directory. 16 | 17 | * `app.py` - The Celery app that defines the task to send to the SQS queue and for the Celery worker launched by AWS Batch to process. 18 | * `fill-batch-queue.py` - A small program that imports the Celery app and actually submits a number of Celery requests into the queue for processing. 19 | 20 | ### AWS architectural components 21 | 22 | This example leverages the [AWS CDK for Python](https://docs.aws.amazon.com/cdk/v2/guide/work-with-cdk-python.html) to define and manage the infrastructure, including: 23 | 24 | * A VPC with a public and private subnet deployed to a single Availability Zone. The private subnet has a NAT gateway attached. 25 | * A Docker container and private ECR repository for the Celery processes. 26 | * An AWS Batch compute environment that leverages Fargate to deploy resources to the private VPC. 27 | * An AWS Batch FIFO job queue attached to the compute environment. 28 | * AWS Batch job definitions that reference the Celery container for submitting the Celery tasks for Batch and for running Celery workers in Batch 29 | * An SQS queue for Celery tasks meant for Batch 30 | * Amazon CloudWatch alarms set to enter ALARM state when the SQS approximate queue depth reaches >= 5 and >= 50 messages 31 | * Amazon EventBridge rules that react to the alarms to submit jobs to AWS Batch to run Celery workers. 32 | 33 | ### Deploying the example 34 | 35 | If you would like to deploy and run this example in your own AWS account, please refer to the [README.md](cdk-project/README.md) file within the [cdk-project](cdk-project/) subdirectory. 36 | 37 | ## Security 38 | 39 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 40 | 41 | ## License 42 | 43 | This library is licensed under the MIT-0 License. See the LICENSE file. 44 | 45 | -------------------------------------------------------------------------------- /cdk-project/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | package-lock.json 3 | __pycache__ 4 | .pytest_cache 5 | .venv 6 | *.egg-info 7 | 8 | # CDK asset staging directory 9 | .cdk.staging 10 | cdk.out 11 | cdk.context.json 12 | -------------------------------------------------------------------------------- /cdk-project/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to the AWS Batch Celery Example CDK Python project! 2 | 3 | This project leverages the [AWS CDK for Python](https://docs.aws.amazon.com/cdk/v2/guide/work-with-cdk-python.html) to define and manage the infrastructure. 4 | 5 | In order to run this example, you will need to be able to install and leverage CDK for Python, as well as build Docker containers. 6 | 7 | 8 | ### Setting up CDK and the project 9 | 10 | Once your development environment is installed and configured to be able to deploy CDK stacks and build Docker containers, clone out this repository, then navigate to the contained CDK project directory: 11 | 12 | ```bash 13 | git clone https://github.com/aws-samples/aws-batch-celery-example.git 14 | cd aws-batch-celery-example/cdk-project 15 | ``` 16 | 17 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 18 | 19 | This project is set up like a standard Python project. We highly recommend that you create a Python virtualenv within this project, stored under the `.venv` directory to intall CDK and other Python modules specific to this project. To manually create a virtualenv in the Cloud9 terminal: 20 | 21 | ```bash 22 | $ python3 -m venv .venv 23 | ``` 24 | 25 | After the init process completes and the virtualenv is created, you can use the following step to activate your virtualenv. 26 | 27 | ```bash 28 | $ source .venv/bin/activate 29 | ``` 30 | 31 | Once the virtualenv is activated, you can install the required dependencies. 32 | 33 | ``` 34 | $ pip install -r requirements.txt 35 | ``` 36 | 37 | At this point you can now synthesize the CloudFormation template for this code. 38 | 39 | ``` 40 | $ cdk synth 41 | ``` 42 | 43 | ### Useful CDK commands 44 | 45 | Here are some useful CDK commands for reference: 46 | 47 | * `cdk ls` list all stacks in the app 48 | * `cdk synth` emits the synthesized CloudFormation template 49 | * `cdk deploy` deploy this stack to your default AWS account/region 50 | * `cdk diff` compare deployed stack with current state 51 | * `cdk docs` open CDK documentation 52 | 53 | 54 | ## Deploying the example infrastructure and running the 55 | 56 | To deploy the stack to your AWS account: 57 | 58 | ```bash 59 | cdk deploy 60 | ``` 61 | 62 | You'll get a notice about needing elevated permissions to create the IAM roles. Accept the statement to create the resources. Once the stack deploys successfully, note the following `Outputs` values from the stack: 63 | 64 | * `BatchJobQueueArn` - the create AWS Batch job queue ARN 65 | * `BatchCeleryFillQueueJobDefArn` - the created AWS Batch job definition for creating the Celery tasks for Celery workers to process. 66 | 67 | ## Running the example 68 | 69 | Once the stack is deployed, you can issue the following command via the Cloud9 terminal to populate the SQS queue with 10 Celery tasks requests, using the values for `BatchJobQueueArn` and `BatchCeleryFillQueueJobDefArn`. 70 | 71 | You can change the value of `numMessages` to >=50 to trigger the "high" CloudWatch alarm and EventBridge rule that submits a Batch array job starting multiple Celery workers. 72 | 73 | ```bash 74 | aws batch submit-job --job-name "fillQueue" \ 75 | --job-queue "" \ 76 | --job-definition "" \ 77 | --parameters "numMessages=10" 78 | ``` 79 | 80 | To view the status of the job: 81 | 82 | 1. Navigate to the [AWS Batch management console](https://console.aws.amazon.com/batch/home?#/jobs/list) 83 | 2. On the left-hand side, select the **Jobs** tab, then for job queue select the **batch-celery-job-queue** 84 | 85 | ![The AWS Batch management console, showing the Jobs information for the Celery job queue that was created](../images/batch-console-jq-1.png) 86 | 87 | Once the job is complete, the specified number of messages will be in the created SQS queue. 88 | 89 | To view the status of the Amazon CloudWatch Alarms: 90 | 91 | 1. Navigate to the [Amazon CloudWatch management console](https://console.aws.amazon.com/cloudwatch/home?#alarmsV2:) 92 | 2. On the left-hand side, select **All alarms, then enter "**BatchFargateStack**" into the filter. You should see the alarms in **OK** state. 93 | 94 | ![The Amazon CloudWatch management console showing the created alarms for the Celery SQS queue](../images/cw-alarms-ok.png) 95 | 96 | If the `fillQueue` job completed, then either one or both alarms may be in alarm state, depending if you defined less than or greater than 50 Celery messages to be inserted into the SQS queue. 97 | 98 | ![The Amazon CloudWatch management console showing the created alarms for the Celery SQS queue. One of the alarms is in `ALARM` state.](../images/cw-alarms-not-ok.png) 99 | 100 | When the alarm enters ALARM state, the EventBridge rule(s) will submit jobs to Batch to start Celery workers. These requests will show up in the Batch console as well 101 | 102 | ![The AWS Batch management console showing the `runSingleCeleryWorker` job submission](../images/batch-console-jq-2.png) 103 | 104 | ## Cleaning up 105 | 106 | To clean up your resources and avoid unnessecary charges, leverage CDK to tear down the CloudFormation stacks using the following command: 107 | 108 | ```bash 109 | cdk destroy 110 | ``` 111 | -------------------------------------------------------------------------------- /cdk-project/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from aws_cdk import ( 3 | aws_ec2 as ec2, 4 | aws_batch as batch, 5 | aws_ecr_assets as ecr_assets, 6 | aws_sqs as sqs, 7 | aws_ecs as ecs, 8 | aws_iam as iam, 9 | aws_cloudwatch as cw, 10 | aws_events as events, 11 | aws_events_targets as targets, 12 | App, Stack, CfnOutput, Size, Tags, Duration 13 | ) 14 | from constructs import Construct 15 | from os import path 16 | 17 | class BatchFargateStack(Stack): 18 | 19 | def __init__(self, scope: Construct, id: str, **kwargs) -> None: 20 | super().__init__(scope, id, **kwargs) 21 | 22 | # This resource alone will create a private/public subnet in one AZ as well as nat/internet gateway(s) 23 | vpc = ec2.Vpc(self, "VPC", max_azs=1) 24 | 25 | # Create the SQS queue for Celery workers to send/recieve messages from 26 | batch_celery_batch_sqs_queue = sqs.Queue( 27 | self, "BatchCeleryBatchQueue", 28 | queue_name="celery-batch" 29 | ) 30 | 31 | # Creates the Docker image and CDK-controlled ECR repository for Celery worker. 32 | celery_worker_image = ecr_assets.DockerImageAsset( 33 | self, 34 | "BatchCeleryWorkerImage", 35 | directory=path.join( 36 | path.dirname(__file__), "batch_celery_container" 37 | ), 38 | platform=ecr_assets.Platform.LINUX_AMD64 39 | ) 40 | 41 | # Create the AWS Batch resources. This includes a Fargate compute environment, a job queue, and a job definition for the Celery worker with the appropriate task execution IAM role to access the ECR registry and SQS queues. 42 | # Batch CE will be created in the same AZ as the VPC within the private subnet. 43 | # Batch CE will have a maximum of 48 vCPUs. 44 | batch_ce = batch.FargateComputeEnvironment( 45 | self, 46 | "batchCeleryComputeEnvironment", 47 | vpc=vpc, 48 | vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS), 49 | maxv_cpus=48 50 | ) 51 | 52 | # Batch JQ will have a maximum of 48 vCPUs. 53 | # Batch JQ will be attached the Batch CE created above. 54 | batch_queue = batch.JobQueue( 55 | self, 56 | "JobQueue", 57 | compute_environments=[ 58 | batch.OrderedComputeEnvironment(compute_environment=batch_ce, order=1) 59 | ], 60 | job_queue_name="batch-celery-job-queue" 61 | ) 62 | 63 | # IAM roles for: 64 | # * allowing the ECS agent to allow pulling from private ECR repository 65 | # * allowing the Batch job to have full access to SQS 66 | ecs_task_execution_role = iam.Role( 67 | self, 68 | "EcsTaskExecutionRole", 69 | assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com"), 70 | managed_policies=[ 71 | iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AmazonECSTaskExecutionRolePolicy") 72 | ] 73 | ) 74 | batch_celery_job_role = iam.Role( 75 | self, 76 | "BatchCeleryJobRole", 77 | assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com"), 78 | managed_policies=[ 79 | iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSQSFullAccess") 80 | ] 81 | ) 82 | 83 | # Create a Batch job definition for the Celery worker. 84 | batch_celery_worker_job_def = batch.EcsJobDefinition( 85 | self, 86 | "BatchCeleryWorkerJobDef", 87 | job_definition_name="runCeleryWorker", 88 | propagate_tags=True, 89 | container=batch.EcsFargateContainerDefinition( 90 | self, 91 | "BatchCeleryWorkerContainerDef", 92 | image=ecs.ContainerImage.from_registry( 93 | celery_worker_image.image_uri 94 | ), 95 | command=["/opt/celeryapp/run_celery_worker.sh"], 96 | memory=Size.mebibytes(2048), 97 | cpu=1, 98 | execution_role=ecs_task_execution_role, 99 | job_role=batch_celery_job_role, 100 | environment={ 101 | "CELERY_QUEUE_NAME": batch_celery_batch_sqs_queue.queue_name, 102 | "CELERY_QUEUE_URL": batch_celery_batch_sqs_queue.queue_url 103 | } 104 | ) 105 | ) 106 | Tags.of(batch_celery_worker_job_def).add("project", "batch-celery") 107 | 108 | # Create a Batch job definition to fill the Batch SQS queue with tasks. 109 | batch_celery_fill_queue_job_def = batch.EcsJobDefinition( 110 | self, 111 | "BatchCeleryFillQueueJobDef", 112 | job_definition_name="fillCeleryQueue", 113 | propagate_tags=True, 114 | container=batch.EcsFargateContainerDefinition( 115 | self, 116 | "BatchFillCeleryQueueContainerDef", 117 | image=ecs.ContainerImage.from_registry( 118 | celery_worker_image.image_uri 119 | ), 120 | command=["/opt/celeryapp/fill_batch_queue.sh", "Ref::numMessages"], 121 | memory=Size.mebibytes(2048), 122 | cpu=1, 123 | execution_role=ecs_task_execution_role, 124 | job_role=batch_celery_job_role, 125 | environment={ 126 | "CELERY_QUEUE_NAME": batch_celery_batch_sqs_queue.queue_name, 127 | "CELERY_QUEUE_URL": batch_celery_batch_sqs_queue.queue_url 128 | }, 129 | ), 130 | parameters={"numMessages": 5} 131 | ) 132 | Tags.of(batch_celery_fill_queue_job_def).add("project", "batch-celery") 133 | 134 | 135 | # Create the CloudWatch alarms for monitoring the SQS queue for compute intensive Celery tasks. 136 | # The low alarm will enter ALARM state once the queue is >=5 messages. 137 | cw_alarm_low = cw.Alarm( 138 | self, 139 | "CeleryQueueLowAlarm", 140 | alarm_description="SQS queue for compute intensive Celery tasks is >=5 messages.", 141 | metric=batch_celery_batch_sqs_queue.metric_approximate_number_of_messages_visible( 142 | period=Duration.seconds(60) 143 | ), 144 | threshold=5, 145 | comparison_operator=cw.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 146 | evaluation_periods=1, 147 | datapoints_to_alarm=1, 148 | treat_missing_data=cw.TreatMissingData.MISSING 149 | ) 150 | # The high alarm will enter ALARM state once the queue is >=50 messages. 151 | cw_alarm_high = cw.Alarm( 152 | self, 153 | "CeleryQueueHighAlarm", 154 | alarm_description="SQS queue for compute intensive Celery tasks is >=50 messages.", 155 | metric=batch_celery_batch_sqs_queue.metric_approximate_number_of_messages_visible( 156 | period=Duration.seconds(60) 157 | ), 158 | threshold=50, 159 | comparison_operator=cw.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 160 | evaluation_periods=1, 161 | datapoints_to_alarm=1, 162 | treat_missing_data=cw.TreatMissingData.MISSING 163 | ) 164 | 165 | # Create EventBridge rule and target to respond to the low alarm. This rule submits a Batch job to start a single Celery worker to drain the SQS queue. 166 | eb_rule_low = events.Rule( 167 | self, 168 | "CeleryQueueLowAlarmRule", 169 | description="SQS queue for compute intensive Celery tasks is >=5 messages.", 170 | event_pattern=events.EventPattern( 171 | source=["aws.cloudwatch"], 172 | detail_type=["CloudWatch Alarm State Change"], 173 | resources=[cw_alarm_low.alarm_arn], 174 | detail={ 175 | "state": { 176 | "value": ["ALARM"] 177 | } 178 | } 179 | ) 180 | ) 181 | eb_rule_low.add_target( 182 | targets.BatchJob( 183 | batch_queue.job_queue_arn, 184 | batch_queue, 185 | batch_celery_worker_job_def.job_definition_arn, 186 | batch_celery_worker_job_def, 187 | job_name="runSingleCeleryWorker" 188 | ) 189 | ) 190 | # Create EventBridge rule and target to respond to the high alarm. This rule submits a Batch array job to start 5 celery workers to drain the SQS queue. 191 | eb_rule_high = events.Rule( 192 | self, 193 | "CeleryQueueHighAlarmRule", 194 | description="SQS queue for compute intensive Celery tasks is >=50 messages.", 195 | event_pattern=events.EventPattern( 196 | source=["aws.cloudwatch"], 197 | detail_type=["CloudWatch Alarm State Change"], 198 | resources=[cw_alarm_high.alarm_arn], 199 | detail={ 200 | "state": { 201 | "value": ["ALARM"] 202 | } 203 | } 204 | ) 205 | ) 206 | eb_rule_high.add_target( 207 | targets.BatchJob( 208 | batch_queue.job_queue_arn, 209 | batch_queue, 210 | batch_celery_worker_job_def.job_definition_arn, 211 | batch_celery_worker_job_def, 212 | job_name="runMultipleCeleryWorkers", 213 | size=10 214 | ) 215 | ) 216 | 217 | # Output CloudFormation resources for use in the example 218 | CfnOutput(self, "BatchJobQueueArn",value=batch_queue.job_queue_name) 219 | CfnOutput(self, "BatchCeleryFillQueueJobDefArn",value=batch_celery_fill_queue_job_def.job_definition_name) 220 | 221 | app = App() 222 | BatchFargateStack(app, "BatchFargateStack") 223 | app.synth() -------------------------------------------------------------------------------- /cdk-project/batch_celery_container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | FROM public.ecr.aws/amazonlinux/amazonlinux:2 5 | 6 | # RUN yum group install -y "AWS Tools" "Development Tools" 7 | RUN yum install -y tar bzip2 curl unzip 8 | 9 | # Install Micromamba 10 | RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -C /usr/local -xvj bin/micromamba 11 | 12 | RUN mkdir -p /opt/micromamba && \ 13 | export MAMBA_ROOT_PREFIX="/opt/micromamba" && \ 14 | eval "$(micromamba shell hook -s posix)" && \ 15 | micromamba activate && \ 16 | micromamba install -y python=3.10 celery boto3 pycurl -c conda-forge 17 | 18 | # Install the AWS CLI 19 | RUN cd /tmp && \ 20 | curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ 21 | unzip awscliv2.zip && ./aws/install 22 | 23 | COPY ./celeryapp /opt/celeryapp 24 | 25 | CMD ["/opt/celeryapp/run_celery_worker.sh"] -------------------------------------------------------------------------------- /cdk-project/batch_celery_container/celeryapp/app.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from celery import Celery 5 | from sys import stderr 6 | from os import environ 7 | 8 | app = Celery("celerybatch", 9 | broker="sqs://") 10 | 11 | # Update some of the configurations. Specifically: 12 | # * Set the default queue name to be `celery-default`. 13 | # * Set the task route for the `calculate_pi` method to be the queue named `celery-batch`. 14 | app.conf.update( 15 | task_default_queue = 'celery-default', 16 | task_routes = { 17 | 'app.calculate_pi': 'celery-batch' 18 | } 19 | ) 20 | 21 | # A Celery task that will go into the `celery-batch` SQS queue 22 | # This method does a naive calculation of Pi mutliple times to take up CPU cycles. A 1 vCPU Fargate resource should accomlish this task in about 5 seconds. 23 | @app.task 24 | def calculate_pi(): 25 | s = 0 26 | for x in range(10): 27 | # Initialize denominator 28 | k = 1 29 | # Initialize sum 30 | s = 0 31 | for i in range(1000000): 32 | # even index elements are positive 33 | if i % 2 == 0: 34 | s += 4/k 35 | else: 36 | # odd index elements are negative 37 | s -= 4/k 38 | # denominator is odd 39 | k += 2 40 | stderr.write("Pi = " + str(s) + "\n") 41 | -------------------------------------------------------------------------------- /cdk-project/batch_celery_container/celeryapp/fill-batch-queue.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from app import * 5 | from argparse import ArgumentParser 6 | 7 | parser = ArgumentParser() 8 | parser.add_argument( 9 | '-n', 10 | '--num_messages', 11 | type=int, 12 | default=1, 13 | help='number of messages of each task to send' 14 | ) 15 | args = parser.parse_args() 16 | 17 | if __name__ == '__main__': 18 | for i in range(args.num_messages): 19 | calculate_pi.apply_async() 20 | -------------------------------------------------------------------------------- /cdk-project/batch_celery_container/celeryapp/fill_batch_queue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: MIT-0 5 | 6 | if [ "$1" != "" ]; then 7 | NUM_MESSAGES=$1 8 | else 9 | NUM_MESSAGES=5 10 | fi 11 | # Initialize micromamba and activate the base environment 12 | export MAMBA_ROOT_PREFIX=/opt/micromamba 13 | eval "$(micromamba shell hook --shell=bash )" 14 | micromamba activate 15 | 16 | # Change directory to the celeryapp 17 | cd /opt/celeryapp 18 | 19 | # Run the python script to populate the celery queue 20 | python fill-batch-queue.py -n $NUM_MESSAGES -------------------------------------------------------------------------------- /cdk-project/batch_celery_container/celeryapp/run_celery_worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: MIT-0 5 | 6 | # Initialize micromamba and activate the base environment 7 | export MAMBA_ROOT_PREFIX=/opt/micromamba 8 | eval "$(micromamba shell hook --shell=bash )" 9 | micromamba activate 10 | 11 | # Change directory to the celeryapp 12 | cd /opt/celeryapp 13 | 14 | # Start the worker as a detached process 15 | celery -A app worker -l ERROR -Q ${CELERY_QUEUE_NAME} --pidfile /var/run/batch-celery-worker.pid --detach 16 | 17 | while [ true ]; do 18 | if [ ! -f "/var/run/batch-celery-worker.pid" ]; then 19 | sleep 2 20 | else 21 | # Get the process ID for the Celery worker 22 | PID=$(cat /var/run/batch-celery-worker.pid) 23 | echo "Celery worker parent PID=$PID" 24 | break 25 | fi 26 | done 27 | 28 | # Enter a wait loop and periodically check the message queue size. 29 | # If no messages, break 30 | while [ true ] 31 | do 32 | sleep 60 33 | n=$(aws sqs get-queue-attributes --queue-url ${CELERY_QUEUE_URL} --attribute-names ApproximateNumberOfMessages --query "Attributes.ApproximateNumberOfMessages" --output text) 34 | if [ $n -eq 0 ]; then 35 | break; 36 | else 37 | echo "NUM_MESSAGES=$n"; 38 | fi 39 | done 40 | 41 | # No more messages, stop the worker and exit 42 | echo "SIGTERM CELERY PID=$PID" 43 | kill -15 $PID 44 | while [ true ]; do 45 | if [ -f "/var/run/batch-celery-worker.pid" ]; then 46 | break 47 | fi 48 | sleep 5 49 | done 50 | echo "EXIT(0)" 51 | exit -------------------------------------------------------------------------------- /cdk-project/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 19 | "@aws-cdk/core:checkSecretUsage": true, 20 | "@aws-cdk/core:target-partitions": [ 21 | "aws", 22 | "aws-cn" 23 | ], 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 29 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 30 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 31 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 32 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 33 | "@aws-cdk/core:enablePartitionLiterals": true, 34 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 35 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 36 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 37 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 38 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 39 | "@aws-cdk/aws-route53-patters:useCertificate": true, 40 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 41 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 42 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 43 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 44 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 45 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 46 | "@aws-cdk/aws-redshift:columnId": true, 47 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /cdk-project/requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib>=2.200.1 2 | constructs>=10.0.0,<11.0.0 3 | -------------------------------------------------------------------------------- /images/batch-console-jq-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/batch-console-jq-1.png -------------------------------------------------------------------------------- /images/batch-console-jq-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/batch-console-jq-2.png -------------------------------------------------------------------------------- /images/cw-alarms-not-ok.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/cw-alarms-not-ok.png -------------------------------------------------------------------------------- /images/cw-alarms-ok.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/cw-alarms-ok.png -------------------------------------------------------------------------------- /images/figure-1-batch-celery-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/figure-1-batch-celery-architecture.png -------------------------------------------------------------------------------- /images/figure-2-step-functions-sm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-batch-celery-worker-example/e986f105223b67bf50ffda51df9f8727d8bd24c3/images/figure-2-step-functions-sm.png --------------------------------------------------------------------------------