├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── THIRD-PARTY-LICENSES_FYybBx1Dvz.txt
├── assets
    ├── Architecture.png
    ├── Project1.png
    ├── add-config.png
    ├── add-config2.png
    ├── debug.png
    ├── delete-stack.png
    ├── deploy-diag.png
    ├── deploy-pop.png
    ├── deploy-progress-1.png
    ├── deploy-progress-2.png
    ├── deploy-progress-3.png
    └── testfile.csv
├── events
    ├── event.json
    ├── event_call-step-functions.json
    ├── event_chunk-processing-status.json
    ├── event_get-data.json
    ├── event_merge-s3-files.json
    ├── event_read-file.json
    ├── event_s3-lambda-notification.json
    ├── event_split-ip-file.json
    ├── event_validate-data.json
    └── event_write-output-chunk.json
├── source
    ├── custom-resource
    │   ├── app.py
    │   ├── cfnresponse.py
    │   ├── requirements.txt
    │   └── testfile_financial_data.csv
    ├── get-data
    │   ├── __init__.py
    │   ├── app.py
    │   ├── requirements.txt
    │   └── schemas.py
    ├── merge-s3-files
    │   ├── __init__.py
    │   ├── app.py
    │   └── requirements.txt
    ├── read-file
    │   ├── __init__.py
    │   ├── app.py
    │   └── requirements.txt
    ├── s3-lambda-notification
    │   ├── __init__.py
    │   ├── app.py
    │   └── requirements.txt
    ├── send-email
    │   ├── __init__.py
    │   ├── app.py
    │   └── requirements.txt
    ├── split-ip-file
    │   ├── __init__.py
    │   ├── app.py
    │   └── requirements.txt
    ├── statemachine
    │   ├── blog-sfn-main-orchestrator.json
    │   └── blog-sfn-process-chunk.json
    ├── validate-data
    │   ├── __init__.py
    │   ├── app.py
    │   ├── requirements.txt
    │   └── schemas.py
    └── write-output-chunk
    │   ├── __init__.py
    │   ├── app.py
    │   └── requirements.txt
└── template.yaml


/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Creating AWS Serverless batch processing architectures
  2 | 
  3 | This project shows how to use [AWS Step Functions](https://aws.amazon.com/step-functions/) features and integrations to orchestrate a batch processing solution. We use two Steps Functions workflows to implement batch processing, with one workflow splitting the original file and a second workflow processing each chunk file.
  4 | 
  5 | 
  6 | ## Supporting Blog Posts
  7 | 
  8 | [Creating AWS Serverless batch processing architectures](https://aws.amazon.com/blogs/compute/creating-aws-serverless-batch-processing-architectures/)
  9 | 
 10 | ## Architecture
 11 | 
 12 | ![Batch processing With Step Functions - Architecture](assets/Architecture.png "Batch processing With Step Functions - Architecture")
 13 | 
 14 | 1. The file upload to an [Amazon S3](https://aws.amazon.com/s3/) bucket triggers the S3 event notification. It invokes the [AWS Lambda](https://aws.amazon.com/lambda/) function asynchronously with an event that contains details about the object.
 15 | 2. Lambda function calls the Main batch orchestrator workflow to start the processing of the file.
 16 | 3. Main batch orchestrator workflow reads the input file and splits it into multiple chunks and stores them in an S3 bucket.
 17 | 4. Main batch orchestrator then invokes the Chunk Processor workflow for each split file chunk.
 18 | 5. Each Chunk processor workflow execution reads and processes a single split chunk file.
 19 | 6. Chunk processor workflow writes the processed chunk file back to the S3 bucket.
 20 | 7. Chunk processor workflow writes the details about any validation errors in an [Amazon DynamoDB](https://aws.amazon.com/dynamodb/) table.
 21 | 8. Main batch orchestrator workflow then merges all the processed chunk files and saves it to an S3 bucket.
 22 | 9. Main batch orchestrator workflow then emails the consolidated files to the intended recipients using [Amazon Simple Email Service](https://aws.amazon.com/ses/).
 23 | 
 24 | ### AWS services used in the solution
 25 | * [AWS Step Functions](https://aws.amazon.com/step-functions/)
 26 | * [AWS Lambda](https://aws.amazon.com/lambda/)
 27 | * [Amazon DynamoDB](https://aws.amazon.com/dynamodb/)
 28 | * [Amazon API Gateway](https://aws.amazon.com/api-gateway/)
 29 | * [Amazon S3](https://aws.amazon.com/s3/)
 30 | * [Amazon Simple Email Service](https://aws.amazon.com/ses/)
 31 | 
 32 | ## Prerequisites
 33 | * [AWS account](https://aws.amazon.com/free/?trk=ps_a134p000003yBfsAAE&trkCampaign=acq_paid_search_brand&sc_channel=ps&sc_campaign=acquisition_US&sc_publisher=google&sc_category=core&sc_country=US&sc_geo=NAMER&sc_outcome=acq&sc_detail=%2Baws%20%2Baccount&sc_content=Account_bmm&sc_segment=438195700994&sc_medium=ACQ-P|PS-GO|Brand|Desktop|SU|AWS|Core|US|EN|Text&s_kwcid=AL!4422!3!438195700994!b!!g!!%2Baws%20%2Baccount&ef_id=Cj0KCQjwsuP5BRCoARIsAPtX_wEmxImXtbdvL3n4ntAafj32KMc_sXL9Z-o8FyXVQzPk7w__h2FMje0aAhOFEALw_wcB:G:s&s_kwcid=AL!4422!3!438195700994!b!!g!!%2Baws%20%2Baccount&all-free-tier.sort-by=item.additionalFields.SortRank&all-free-tier.sort-order=asc)
 34 | * [AWS SAM CLI](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/serverless-sam-cli-install.html)
 35 | * [Python 3](https://www.python.org/downloads/)
 36 | * An [AWS Identity and Access Management](http://aws.amazon.com/iam) (IAM) role with appropriate access.  
 37 | * [Docker](https://docs.docker.com/get-docker/) (Docker is a prerequisite only for testing your application locally or using the --use-container option.)
 38 |   
 39 | ### Local Development in an Integrated development environment
 40 | * PyCharm (You are free to use any of your favourite IDE or use the cli)
 41 | * [AWS Toolkit for PyCharm](https://aws.amazon.com/pycharm/) (Only needed if you are using PyCharm as your IDE, AWS Toolkit for PyCharm is an open source plug-in for the PyCharm IDE that makes it easier to create, debug, and deploy Python applications on Amazon Web Services)
 42 | 
 43 | ## Project structure
 44 | ```
 45 | aws-stepfunctions-batchprocessing/
 46 | ├── .aws-sam - This is used to save the deployment package related files which the SAM CLI builds
 47 | ├── assets - This has the image files used for the README.md
 48 | ├── events - Invocation events that you can use to invoke the function. You can create an event file for each function and populate it with the event which the Lambda function will expect. 
 49 | ├── source
 50 |     ├── statemachine - AWS Step Functions state machines.
 51 |     ├── split-ip-file - Main batch orchestrator: Splits the input file into multiple chunks.
 52 |     ├── merge-s3-files - Main batch orchestrator: Takes each of the processed chunks and then merges them together.
 53 |     ├── send-email - Main batch orchestrator: Creates a S3 presigned URL for the merged S3 file and sends email to the recipients.
 54 |     ├── read-file - Chunk processor: Reads each chunk and converts them into an array of JSON objects.
 55 |     ├── validate-data - Chunk processor: Validates each record from each row and outputs if the record is valid or not.
 56 |     ├── get-data - Chunk processor: This is a backing function for the API gateway, this is a test function to simulate the process of enriching the data, we fetch the data from a sample DynamoDB table.
 57 |     ├── write-output-chunk - Chunk processor: This function writes the processed output chunk to the S3 bucket.
 58 |     ├── s3-lambda-notification - This function is configured as an S3 notification handler which is called when a object is created in S3 and is reponsible for calling the Main batch orchestrator workflow.
 59 |     ├── custom-resource -
 60 |         ├── app.py - This function is responsible for adding an S3 event notification to the SourceBucket to trigger our lambda function, this notification is not added in the SAM template itself as it causes a circular dependency. This function also loads the FinancialTable DynamoDB table with initial data which will store the data that is needed by our api service (source/get-data/app.py). This is only used for the purpose of this example.
 61 |         ├── cfnresponse.py - This file is used for the cfnresponse module for our custom-resource. The cfn-response module is available only when you use the ZipFile property to write your source code. It isn't available for source code that's stored in Amazon S3 buckets, since we are not using the ZipFile property for our custom resource we have to provide this module. 
 62 | └── template.yaml - A template that defines the application's AWS resources.
 63 | ```
 64 | Refer [cfnresponse](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-lambda-function-code-cfnresponsemodule.html) for more details on the cfnresponse module. 
 65 | 
 66 | Refer [ZipFile](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-lambda-function-code.html) for more details.
 67 | 
 68 | ## Solution Deployment Walkthrough
 69 | At a high-level, here are the steps you will follow to get this solution up and running.
 70 | 
 71 | 1. Deploy the solution using PyCharm (or any other IDE) and an appropriate AWS Toolkit.
 72 | 2. Deploy the solution using SAM CLI (if step 1 is not used).
 73 | 3. Test the solution.
 74 | 
 75 | Detailed steps are provided below:
 76 | ### 1. Deploy the solution using PyCharm and AWS Toolkit
 77 | 
 78 | Download the code from the [GitHub](https://github.com/aws-samples/aws-serverless-batch-architecture) location.
 79 | 
 80 | ```
 81 |  git clone https://github.com/aws-samples/aws-serverless-batch-architecture
 82 | ```
 83 | 
 84 | You can follow the steps [here](https://www.jetbrains.com/help/pycharm/manage-projects-hosted-on-github.html) to Clone the project from GitHub.
 85 | 
 86 | You should be able to see the project as shown here, please take a moment to review the code structure:
 87 | 
 88 | ![Project Clone view](assets/Project1.png "Project Clone view")
 89 | 
 90 | If you have not already. please [Setup your AWS credentials for the AWS Toolkit](https://docs.aws.amazon.com/toolkit-for-jetbrains/latest/userguide/setup-credentials.html)
 91 | 
 92 | Right Click on the project and select Deploy Serverless Application as shown in the screenshot below:
 93 | 
 94 | ![Deploy pop up](assets/deploy-pop.png "Deploy pop up")
 95 | 
 96 | Deploy Serverless Application dialog will open up as shown below:
 97 | 
 98 | ![Deploy dialog](assets/deploy-diag.png "Deploy dialog")
 99 | 
100 | Enter the name of the stack in the "Create Stack" input:
101 | 
102 | Make a note of the Template parameters:
103 | 
104 | - **SESSender**: The sender email address for the output file email.
105 | - **SESRecipient**: The recipient email address for the output file email.
106 | - **SESIdentityName**: An email address or domain that Amazon SES users use to send email. It is a best practice to authorize only specific email addresses, such as in this case you can keep it same as SESSender (email address to send emails). If your SES Accounts are in sandbox you have to specify both the sender and recipient emails, in that case modify the template.yaml to add the permissions for recipient email address.
107 | - **InputArchiveFolder**: Amazon S3 prefix in the SourceBucket where the input file will be archived after processing.
108 | - **FileChunkSize**: Size of each of the chunks, which is split from the input file. For this use case the chunk size of each file is around 600 rows i.e. the input file is split into multiple files with each file having 600 rows, the desired chunk size is passed in the event payload to the Main orchestrator workflow. The split chunk file is then passed as a payload to the chunk processor workflow, executions that pass large payloads of data between states can be terminated if the data you are passing between states could grow to over the maximum payload size of 262,144 bytes. You will have to adjust this value based on the data in the input file.
109 | - **FileDelimiter**: Delimiter of the CSV file (for example, a comma).
110 | 
111 | Select an existing S3 bucket for storing the deployment package, you can alternatively create a new S3 bucket.
112 | 
113 | Keep rest of the options as shown.
114 | 
115 | You will see the progress of the deployments as shown in the screenshot below:
116 | 
117 | ![Deploy Progress 1](assets/deploy-progress-1.png "Deploy Progress 1")
118 | 
119 | ![Deploy Progress 2](assets/deploy-progress-2.png "Deploy Progress 2")
120 | 
121 | After a few minutes you will see that the stack is deployed as shown below:
122 | 
123 | ![Deploy Progress 3](assets/deploy-progress-3.png "Deploy Progress 2")
124 | 
125 | Note here that after the stack is created, a [custom resource](source/custom-resource/app.py) is used to do 2 things:
126 | - Add an S3 event notification to the SourceBucket to trigger our lambda function, this is not done in the template itself as it causes a circular dependency because our notification [lambda function](source/s3-lambda-notification/app.py) depends on the BlogBatchMainOrchestrator as well as the S3 bucket.
127 | - Loads the DynamoDB table (FinancialTable) with initial data which stores the data that is needed by our [api service](source/get-data/app.py). This is only used for the purpose of this example.
128 | 
129 | ### 2. Deploy the solution using AWS SAM CLI (if step 1 is not used)
130 | Refer the [Supporting Blog](https://aws.amazon.com/blogs/compute/creating-aws-serverless-batch-processing-architectures/) post for instructions.
131 | 
132 | ### 3. Testing the solution
133 | Refer the [Supporting Blog](https://aws.amazon.com/blogs/compute/creating-aws-serverless-batch-processing-architectures/) post for instructions. 
134 | 
135 | #### Checking the output
136 | Refer the [Supporting Blog](https://aws.amazon.com/blogs/compute/creating-aws-serverless-batch-processing-architectures/) post for instructions. 
137 | 
138 | ## Debug your AWS Lambda Function
139 | 
140 | Start by adding the configuration as shown in the screenshot below:
141 | 
142 | ![Add config](assets/add-config.png "Add config")
143 | 
144 | Make the selections as shown in the screenshot below:
145 | 
146 | ![Add config 2](assets/add-config2.png "Add config 2")
147 | 
148 | 1. Select the **From template** option
149 | 2. Browse to the template.yaml within the aws-stepfunctions-batchprocessing directory.
150 | 3. Select the function that you want to debug, this drop-down will list all the functions that are present in the template.yaml.
151 | 4. Select the Input to the Lambda function, here you can pass the event for the Lambda function as text or in an input file. For the purpose of this example we use an event file. You can populate this file with whatever event that is expected by your Lambda function
152 | 5. Click OK to save the configuration
153 | 
154 | ![Add config 2](assets/debug.png "Add config 2")
155 | 
156 | 1. Set a break point by clicking on the area between the line number and the code.
157 | 2. Click on the Debug icon.
158 | 3. In a few minutes the debugging session should start, you can also perform additional debugger actions through the Debug tool window.
159 | 
160 | ## Cleanup
161 | 
162 | To delete the application stack using the AWS CLI you can run the following command (replace stack-name with your actual stack name). 
163 | Note: Deletion of the stack could fail if the SourceBucket is not empty, cleanup the files in the SourceBucket if they are not needed.
164 | 
165 | ```bash
166 | aws cloudformation delete-stack --stack-name <stack-name>
167 | ```
168 | You can also delete the CloudFormation stack through the PyCharm IDE, AWS Explorer pane as shown below:
169 | 
170 | ![Delete Stack](assets/delete-stack.png "Delete Stack")
171 | 
172 | Right Click on your stack name and select Delete stack.
173 | 
174 | 
175 | 


--------------------------------------------------------------------------------
/THIRD-PARTY-LICENSES_FYybBx1Dvz.txt:
--------------------------------------------------------------------------------
 1 | ** amazon-pinpoint-developer-guide-samplecode; version v1 --
 2 | https://github.com/awsdocs/amazon-pinpoint-developer-guide
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 5 | this
 6 | software and associated documentation files (the "Software"), to deal in the
 7 | Software
 8 | without restriction, including without limitation the rights to use, copy,
 9 | modify,
10 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and
11 | to
12 | permit persons to whom the Software is furnished to do so.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED,
16 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
17 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 | ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | * For amazon-pinpoint-developer-guide-samplecode see also this required NOTICE:
25 |     Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
26 | 
27 | ------
28 | 
29 | ** Amazon SES Sample code; version v1 --
30 | https://github.com/awsdocs/amazon-ses-developer-guide/blob/master/doc-source/send-using-sdk-python.md
31 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
32 | ** aws-lambda-powertools-python-samplecode; version v1 --
33 | https://github.com/awslabs/aws-lambda-powertools-python/
34 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
35 | 
36 | Permission is hereby granted, free of charge, to any person obtaining a copy of
37 | this
38 | software and associated documentation files (the "Software"), to deal in the
39 | Software
40 | without restriction, including without limitation the rights to use, copy,
41 | modify,
42 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and
43 | to
44 | permit persons to whom the Software is furnished to do so.
45 | 
46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
47 | IMPLIED,
48 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
49 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
50 | COPYRIGHT
51 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52 | ACTION
53 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
54 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/assets/Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/Architecture.png


--------------------------------------------------------------------------------
/assets/Project1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/Project1.png


--------------------------------------------------------------------------------
/assets/add-config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/add-config.png


--------------------------------------------------------------------------------
/assets/add-config2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/add-config2.png


--------------------------------------------------------------------------------
/assets/debug.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/debug.png


--------------------------------------------------------------------------------
/assets/delete-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/delete-stack.png


--------------------------------------------------------------------------------
/assets/deploy-diag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/deploy-diag.png


--------------------------------------------------------------------------------
/assets/deploy-pop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/deploy-pop.png


--------------------------------------------------------------------------------
/assets/deploy-progress-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/deploy-progress-1.png


--------------------------------------------------------------------------------
/assets/deploy-progress-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/deploy-progress-2.png


--------------------------------------------------------------------------------
/assets/deploy-progress-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/assets/deploy-progress-3.png


--------------------------------------------------------------------------------
/events/event.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event.json


--------------------------------------------------------------------------------
/events/event_call-step-functions.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_call-step-functions.json


--------------------------------------------------------------------------------
/events/event_chunk-processing-status.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_chunk-processing-status.json


--------------------------------------------------------------------------------
/events/event_get-data.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_get-data.json


--------------------------------------------------------------------------------
/events/event_merge-s3-files.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_merge-s3-files.json


--------------------------------------------------------------------------------
/events/event_read-file.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_read-file.json


--------------------------------------------------------------------------------
/events/event_s3-lambda-notification.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_s3-lambda-notification.json


--------------------------------------------------------------------------------
/events/event_split-ip-file.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_split-ip-file.json


--------------------------------------------------------------------------------
/events/event_validate-data.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_validate-data.json


--------------------------------------------------------------------------------
/events/event_write-output-chunk.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/events/event_write-output-chunk.json


--------------------------------------------------------------------------------
/source/custom-resource/app.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | import boto3
  4 | import logging
  5 | import json
  6 | import cfnresponse
  7 | import csv
  8 | 
  9 | s3Client = boto3.client('s3')
 10 | dynamodb = boto3.resource('dynamodb')
 11 | logger = logging.getLogger()
 12 | logger.setLevel(logging.DEBUG)
 13 | 
 14 | 
 15 | def add_bucket_notification(bucket_name, notification_id, function_arn):
 16 |     notification_response = s3Client.put_bucket_notification_configuration(
 17 |         Bucket=bucket_name,
 18 |         NotificationConfiguration={
 19 |             'LambdaFunctionConfigurations': [
 20 |                 {
 21 |                     'Id': notification_id,
 22 |                     'LambdaFunctionArn': function_arn,
 23 |                     'Events': [
 24 |                         's3:ObjectCreated:*'
 25 |                     ],
 26 |                     'Filter': {
 27 |                         'Key': {
 28 |                             'FilterRules': [
 29 |                                 {
 30 |                                     'Name': 'prefix',
 31 |                                     'Value': 'input/'
 32 |                                 },
 33 |                                 {
 34 |                                     'Name': 'suffix',
 35 |                                     'Value': 'csv'
 36 |                                 },
 37 |                             ]
 38 |                         }
 39 |                     }
 40 |                 },
 41 |             ]
 42 |         }
 43 |     )
 44 |     return notification_response
 45 | 
 46 | 
 47 | def load_csv_data(table_name):
 48 |     csv_file = "testfile_financial_data.csv"
 49 | 
 50 |     batch_size = 100
 51 |     batch = []
 52 | 
 53 |     for row in csv.DictReader(open(csv_file)):
 54 |         if len(batch) >= batch_size:
 55 |             write_to_dynamo(batch, table_name)
 56 |             batch.clear()
 57 |         batch.append(row)
 58 | 
 59 |     if batch:
 60 |         write_to_dynamo(batch, table_name)
 61 | 
 62 |     return {
 63 |         'statusCode': 200,
 64 |         'body': json.dumps('CSV file loaded into the DYnamoDB table')
 65 |     }
 66 | 
 67 | 
 68 | def write_to_dynamo(rows, table_name):
 69 |     try:
 70 |         table = dynamodb.Table(table_name)
 71 |     except:
 72 |         print("Error loading DynamoDB table. Check if table was created correctly and environment variable.")
 73 | 
 74 |     try:
 75 |         with table.batch_writer() as batch:
 76 |             for i in range(len(rows)):
 77 |                 batch.put_item(
 78 |                     Item=rows[i]
 79 |                 )
 80 |     except Exception as e:
 81 |         print(e.response['Error']['Message'])
 82 | 
 83 | 
 84 | def create(properties, physical_id):
 85 |     bucket_name = properties['S3Bucket']
 86 |     notification_id = properties['NotificationId']
 87 |     function_arn = properties['FunctionARN']
 88 |     table_name = properties['FinancialTableName']
 89 |     response = add_bucket_notification(bucket_name, notification_id, function_arn)
 90 |     logger.info('AddBucketNotification response: %s' % json.dumps(response))
 91 |     logger.info('Loading table: %s' % table_name)
 92 |     response = load_csv_data(table_name)
 93 |     logger.info('AddBucketNotification response: %s' % json.dumps(response))
 94 | 
 95 |     return cfnresponse.SUCCESS, physical_id
 96 | 
 97 | 
 98 | def update(properties, physical_id):
 99 |     return cfnresponse.SUCCESS, None
100 | 
101 | 
102 | def delete(properties, physical_id):
103 |     return cfnresponse.SUCCESS, None
104 | 
105 | 
106 | def lambda_handler(event, context):
107 |     logger.info('Received event: %s' % json.dumps(event))
108 | 
109 |     status = cfnresponse.FAILED
110 |     new_physical_id = None
111 | 
112 |     try:
113 |         properties = event.get('ResourceProperties')
114 |         physical_id = event.get('PhysicalResourceId')
115 | 
116 |         status, new_physical_id = {
117 |             'Create': create,
118 |             'Update': update,
119 |             'Delete': delete
120 |         }.get(event['RequestType'], lambda x, y: (cfnresponse.FAILED, None))(properties, physical_id)
121 |     except Exception as e:
122 |         logger.error('Exception: %s' % e)
123 |         status = cfnresponse.FAILED
124 |     finally:
125 |         cfnresponse.send(event, context, status, {}, new_physical_id)


--------------------------------------------------------------------------------
/source/custom-resource/cfnresponse.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | 
 4 | from __future__ import print_function
 5 | import urllib3
 6 | import json
 7 | 
 8 | SUCCESS = "SUCCESS"
 9 | FAILED = "FAILED"
10 | 
11 | http = urllib3.PoolManager()
12 | 
13 | 
14 | def send(event, context, responseStatus, responseData, physicalResourceId=None, noEcho=False, reason=None):
15 |     responseUrl = event['ResponseURL']
16 | 
17 |     print(responseUrl)
18 | 
19 |     responseBody = {
20 |         'Status': responseStatus,
21 |         'Reason': reason or "See the details in CloudWatch Log Stream: {}".format(context.log_stream_name),
22 |         'PhysicalResourceId': physicalResourceId or context.log_stream_name,
23 |         'StackId': event['StackId'],
24 |         'RequestId': event['RequestId'],
25 |         'LogicalResourceId': event['LogicalResourceId'],
26 |         'NoEcho': noEcho,
27 |         'Data': responseData
28 |     }
29 | 
30 |     json_responseBody = json.dumps(responseBody)
31 | 
32 |     print("Response body:")
33 |     print(json_responseBody)
34 | 
35 |     headers = {
36 |         'content-type': '',
37 |         'content-length': str(len(json_responseBody))
38 |     }
39 | 
40 |     try:
41 |         response = http.request('PUT', responseUrl, headers=headers, body=json_responseBody)
42 |         print("Status code:", response.status)
43 | 
44 | 
45 |     except Exception as e:
46 | 
47 |         print("send(..) failed executing http.request(..):", e)


--------------------------------------------------------------------------------
/source/custom-resource/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/custom-resource/requirements.txt


--------------------------------------------------------------------------------
/source/get-data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/get-data/__init__.py


--------------------------------------------------------------------------------
/source/get-data/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import boto3
 4 | import os
 5 | import json
 6 | from botocore.exceptions import ClientError
 7 | from aws_lambda_powertools.utilities.validation import validate
 8 | from aws_lambda_powertools.utilities.validation.exceptions import SchemaValidationError
 9 | import schemas
10 | 
11 | dynamodb = boto3.resource('dynamodb')
12 | 
13 | 
14 | def lambda_handler(event, context):
15 |     request = event.get('pathParameters')
16 | 
17 |     uuid = request.get('uuid')
18 | 
19 |     input_object = {"uuid": uuid}
20 | 
21 |     try:
22 |         validate(event=input_object, schema=schemas.INPUT)
23 |     except SchemaValidationError as e:
24 |         return {"response": "failure", "error": e}
25 | 
26 |     table_name = os.environ['TABLE_NAME']
27 | 
28 |     table = dynamodb.Table(table_name)
29 | 
30 |     try:
31 |         response = table.get_item(
32 |             Key={
33 |                 'uuid': request.get('uuid')
34 |             }
35 |         )
36 |     except ClientError as e:
37 |         print(e.response['Error']['Message'])
38 |     else:
39 |         item = response['Item']
40 | 
41 |     return {
42 |         'statusCode': 200,
43 |         'body': json.dumps({"item": item})
44 |     }
45 | 


--------------------------------------------------------------------------------
/source/get-data/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-lambda-powertools


--------------------------------------------------------------------------------
/source/get-data/schemas.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | INPUT = {
 4 |     "$schema": "http://json-schema.org/draft-07/schema",
 5 |     "$id": "http://example.com/example.json",
 6 |     "type": "object",
 7 |     "title": "Batch processing sample schema for the use case",
 8 |     "description": "The root schema comprises the entire JSON document.",
 9 |     "required": ["uuid"],
10 |     "properties": {
11 |         "uuid": {
12 |             "type": "string",
13 |             "maxLength": 9,
14 |             "pattern": "[0-9]{9}"
15 |         }
16 |     },
17 | }


--------------------------------------------------------------------------------
/source/merge-s3-files/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/merge-s3-files/__init__.py


--------------------------------------------------------------------------------
/source/merge-s3-files/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import boto3
 4 | 
 5 | s3_client = boto3.client('s3')
 6 | 
 7 | 
 8 | def lambda_handler(event, context):
 9 |     bucket = event['bucket']
10 |     key = event['key']
11 |     to_process_folder = event['toProcessFolder']
12 | 
13 |     output_path = to_process_folder.replace("to_process", "output")
14 | 
15 |     output = []
16 | 
17 |     header_text = [
18 |         'uuid',
19 |         'Country',
20 |         'Item Type',
21 |         'Sales Channel',
22 |         'Order Priority',
23 |         'Order Date',
24 |         'Region',
25 |         'Ship Date',
26 |         'Units Sold',
27 |         'Unit Price',
28 |         'Unit Cost',
29 |         'Total Revenue',
30 |         'Total Cost',
31 |         'Total Profit'
32 | 
33 |     ]
34 | 
35 |     output.append(",".join(header_text)+"\n")
36 | 
37 |     try:
38 |         for item in s3_client.list_objects_v2(Bucket=bucket, Prefix=output_path)['Contents']:
39 |             if item['Key'].endswith('.csv'):
40 |                 resp = s3_client.select_object_content(
41 |                     Bucket=bucket,
42 |                     Key=item['Key'],
43 |                     ExpressionType='SQL',
44 |                     Expression="select * from s3object",
45 |                     InputSerialization={'CSV': {"FileHeaderInfo": "NONE"}, 'CompressionType': 'NONE'},
46 |                     OutputSerialization={'CSV': {}},
47 |                 )
48 | 
49 |                 for event in resp['Payload']:
50 |                     if 'Records' in event:
51 |                         records = event['Records']['Payload'].decode('utf-8')
52 |                         payloads = (''.join(response for response in records))
53 |                         output.append(payloads)
54 | 
55 |         output_body = "".join(output)
56 |         s3_target_key = output_path + "/" + get_output_filename(key)
57 |         response = s3_client.put_object(Bucket=bucket,
58 |                                         Key=s3_target_key,
59 |                                         Body=output_body)
60 | 
61 |         return {"response": response, "S3OutputFileName": s3_target_key}
62 | 
63 |     except Exception as e:
64 |         raise Exception(str(e))
65 | 
66 | 
67 | def get_output_filename(key):
68 |     last_part_pos = key.rfind("/")
69 |     if last_part_pos == -1:
70 |         return ""
71 |     last_part_pos += 1
72 |     input_file_name = key[last_part_pos:]
73 | 
74 |     return "completed/" + input_file_name
75 | 


--------------------------------------------------------------------------------
/source/merge-s3-files/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/merge-s3-files/requirements.txt


--------------------------------------------------------------------------------
/source/read-file/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/read-file/__init__.py


--------------------------------------------------------------------------------
/source/read-file/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import csv
 4 | import s3fs
 5 | import os
 6 | 
 7 | s3 = s3fs.S3FileSystem(anon=False)
 8 | 
 9 | header = [
10 |     'uuid',
11 |     'country',
12 |     'itemType',
13 |     'salesChannel',
14 |     'orderPriority',
15 |     'orderDate',
16 |     'region',
17 |     'shipDate'
18 | ]
19 | 
20 | 
21 | def lambda_handler(event, context):
22 |     input_file = event['input']['FilePath']
23 |     output_data = []
24 |     skip_first = 0
25 |     with s3.open(input_file, 'r', newline='', encoding='utf-8-sig') as inFile:
26 |         file_reader = csv.reader(inFile)
27 |         for row in file_reader:
28 |             if skip_first == 0:
29 |                 skip_first = skip_first + 1
30 |                 continue
31 |             new_object = {}
32 |             for i in range(len(header)):
33 |                 new_object[header[i]] = row[i]
34 | 
35 |             output_data.append(new_object)
36 | 
37 |     return output_data
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/source/read-file/requirements.txt:
--------------------------------------------------------------------------------
1 | s3fs


--------------------------------------------------------------------------------
/source/s3-lambda-notification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/s3-lambda-notification/__init__.py


--------------------------------------------------------------------------------
/source/s3-lambda-notification/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import json
 4 | import os
 5 | import boto3
 6 | import time
 7 | 
 8 | state_machine_client = boto3.client('stepfunctions')
 9 | 
10 | 
11 | def lambda_handler(event, context):
12 | 
13 |     for record in event['Records']:
14 |         param = {
15 |             "Records": record,
16 |             "inputArchiveFolder": os.environ['INPUT_ARCHIVE_FOLDER'],
17 |             "fileChunkSize": int(os.environ['FILE_CHUNK_SIZE']),
18 |             "fileDelimiter": os.environ['FILE_DELIMITER']
19 | 
20 |         }
21 |         state_machine_arn = os.environ['STATE_MACHINE_ARN']
22 |         state_machine_execution_name = os.environ['STATE_MACHINE_EXECUTION_NAME'] + str(time.time())
23 | 
24 |         response = state_machine_client.start_execution(
25 |             stateMachineArn=state_machine_arn,
26 |             name=state_machine_execution_name,
27 |             input=json.dumps(param)
28 |         )
29 | 
30 |         print(response)
31 | 


--------------------------------------------------------------------------------
/source/s3-lambda-notification/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/s3-lambda-notification/requirements.txt


--------------------------------------------------------------------------------
/source/send-email/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/send-email/__init__.py


--------------------------------------------------------------------------------
/source/send-email/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import boto3
 4 | from botocore.exceptions import ClientError
 5 | 
 6 | s3_client = boto3.client('s3')
 7 | 
 8 | 
 9 | def lambda_handler(event, context):
10 |     sender = event['sender']
11 |     recipient = event['recipient']
12 | 
13 |     bucket = event['bucket']
14 |     s3_output_file = event['s3OutputFileName']
15 | 
16 |     pre_signed_url = generate_s3_signed_url(bucket, s3_output_file)
17 | 
18 |     send_email(sender, recipient, pre_signed_url)
19 | 
20 |     return {"response": "success"}
21 | 
22 | 
23 | def generate_s3_signed_url(bucket, s3_target_key):
24 |     return s3_client.generate_presigned_url('get_object',
25 |                                             Params={'Bucket': bucket,
26 |                                                     'Key': s3_target_key},
27 |                                             ExpiresIn=3600)
28 | 
29 | 
30 | def send_email(sender, recipient, pre_signed_url):
31 |     # The subject line for the email.
32 |     subject = "Batch Processing complete: Output file information"
33 | 
34 |     # The email body for recipients with non-HTML email clients.
35 |     body_text = ("The file has been processed successfully\r\n"
36 |                  "Click the pre-signed S3 URL to access the output file "
37 |                  + pre_signed_url + ", The link will expire in 60 minutes."
38 |                  )
39 | 
40 |     # The HTML body of the email.
41 |     body_html = """<html>
42 |     <head></head>
43 |     <body>
44 |       <h1>The file has been processed successfully</h1>
45 |       <p>Click the pre-signed S3 URL to access the output file:
46 |         <a href='{url}'>Output File</a></p>
47 |       <p>The link will expire in 60 minutes.</p>
48 |     </body>
49 |     </html>""".format(url=pre_signed_url)
50 | 
51 |     # The character encoding for the email.
52 |     charset = "UTF-8"
53 | 
54 |     # Create a new SES resource and specify a region.
55 |     client = boto3.client('ses')
56 | 
57 |     # Try to send the email.
58 |     try:
59 |         # Provide the contents of the email.
60 |         response = client.send_email(
61 |             Destination={
62 |                 'ToAddresses': [
63 |                     recipient,
64 |                 ],
65 |             },
66 |             Message={
67 |                 'Body': {
68 |                     'Html': {
69 |                         'Charset': charset,
70 |                         'Data': body_html,
71 |                     },
72 |                     'Text': {
73 |                         'Charset': charset,
74 |                         'Data': body_text,
75 |                     },
76 |                 },
77 |                 'Subject': {
78 |                     'Charset': charset,
79 |                     'Data': subject,
80 |                 },
81 |             },
82 |             Source=sender,
83 | 
84 |         )
85 |     # Display an error if something goes wrong.
86 |     except ClientError as e:
87 |         print(e.response['Error']['Message'])
88 |     else:
89 |         print("Email sent! Message ID:"),
90 |         print(response['MessageId'])
91 | 


--------------------------------------------------------------------------------
/source/send-email/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/send-email/requirements.txt


--------------------------------------------------------------------------------
/source/split-ip-file/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/split-ip-file/__init__.py


--------------------------------------------------------------------------------
/source/split-ip-file/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import os
 4 | import s3fs
 5 | import uuid
 6 | 
 7 | # S3 bucket info
 8 | s3 = s3fs.S3FileSystem(anon=False)
 9 | 
10 | 
11 | def lambda_handler(event, context):
12 |     input_archive_folder = event['inputArchiveFolder']
13 |     to_process_folder = str(uuid.uuid4()) + "/" + "to_process"
14 |     file_row_limit = event['fileChunkSize']
15 |     file_delimiter = event['fileDelimiter']
16 |     output_path = to_process_folder.replace("to_process", "output")
17 | 
18 |     record = event['Records']
19 | 
20 |     bucket = record['s3']['bucket']['name']
21 |     key = record['s3']['object']['key']
22 |     create_start_indicator(bucket, output_path)
23 |     input_file = os.path.join(bucket, key)
24 |     archive_path = os.path.join(bucket, input_archive_folder, os.path.basename(key))
25 |     folder = os.path.split(key)[0]
26 |     s3_url = os.path.join(bucket, folder)
27 |     output_file_template = os.path.splitext(os.path.basename(key))[0] + "__part"
28 |     output_path = os.path.join(bucket, to_process_folder)
29 | 
30 |     # Number of files to be created
31 |     num_files = file_count(s3.open(input_file, 'r'), file_delimiter, file_row_limit)
32 | 
33 |     # Split the input file into several files, each with the number of records mentioned in the fileChunkSize parameter.
34 |     splitFileNames = split(s3.open(input_file, 'r'), file_delimiter, file_row_limit, output_file_template,
35 |                            output_path, True,
36 |                            num_files)
37 | 
38 |     # Archive the input file.
39 |     archive(input_file, archive_path)
40 | 
41 |     response = {"bucket": bucket, "key": key, "splitFileNames": splitFileNames,
42 |                 "toProcessFolder": to_process_folder}
43 |     return response
44 | 
45 | 
46 | # Determine the number of files that this Lambda function will create.
47 | def file_count(file_handler, delimiter, row_limit):
48 |     import csv
49 |     reader = csv.reader(file_handler, delimiter=delimiter)
50 |     # Figure out the number of files this function will generate.
51 |     row_count = sum(1 for row in reader) - 1
52 |     # If there's a remainder, always round up.
53 |     file_count = int(row_count // row_limit) + (row_count % row_limit > 0)
54 |     return file_count
55 | 
56 | 
57 | # Split the input into several smaller files.
58 | def split(filehandler, delimiter, row_limit, output_name_template, output_path, keep_headers, num_files):
59 |     import csv
60 |     reader = csv.reader(filehandler, delimiter=delimiter)
61 |     split_file_path = []
62 | 
63 |     current_piece = 1
64 |     current_out_path = os.path.join(
65 |         output_path,
66 |         output_name_template + str(current_piece) + "__of" + str(num_files) + ".csv"
67 |     )
68 |     split_file_path.append(current_out_path)
69 |     current_out_writer = csv.writer(s3.open(current_out_path, 'w'), delimiter=delimiter, quoting=csv.QUOTE_ALL)
70 |     current_limit = row_limit
71 |     if keep_headers:
72 |         headers = next(reader)
73 |         current_out_writer.writerow(headers)
74 |     for i, row in enumerate(reader):
75 |         if i + 1 > current_limit:
76 |             current_piece += 1
77 |             current_limit = row_limit * current_piece
78 |             current_out_path = os.path.join(
79 |                 output_path,
80 |                 output_name_template + str(current_piece) + "__of" + str(num_files) + ".csv"
81 |             )
82 |             split_file_path.append(current_out_path)
83 |             current_out_writer = csv.writer(s3.open(current_out_path, 'w'), delimiter=delimiter, quoting=csv.QUOTE_ALL)
84 |             if keep_headers:
85 |                 current_out_writer.writerow(headers)
86 |         current_out_writer.writerow(row)
87 |     return split_file_path
88 | 
89 | 
90 | # Move the original input file into an archive folder.
91 | def archive(input_file, archive_path):
92 |     s3.copy(input_file, archive_path)
93 |     s3.rm(input_file)
94 | 
95 | 
96 | def create_start_indicator(bucket, folder_name):
97 |     response = s3.touch(bucket + "/" + folder_name + "/_started")
98 | 


--------------------------------------------------------------------------------
/source/split-ip-file/requirements.txt:
--------------------------------------------------------------------------------
1 | s3fs


--------------------------------------------------------------------------------
/source/statemachine/blog-sfn-main-orchestrator.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Comment": "State machine for batch processing",
 3 |   "StartAt": "Split Input File into chunks",
 4 |   "States": {
 5 |     "Split Input File into chunks": {
 6 |       "Type": "Task",
 7 |       "ResultPath": "$.splitOutput",
 8 |       "Resource": "${SplitInputFileFunctionArn}",
 9 |       "Next": "Call Step function for each chunk"
10 |     },
11 |     "Call Step function for each chunk": {
12 |       "Type": "Map",
13 |       "Next": "Merge all Files",
14 |       "ItemsPath": "$.splitOutput.splitFileNames",
15 |       "ResultPath": null,
16 |       "Parameters": {
17 |         "FilePath.$": "$$.Map.Item.Value",
18 |         "FileIndex.$": "$$.Map.Item.Index"
19 |       },
20 |       "Iterator": {
21 |         "StartAt": "Call Chunk Processor Workflow",
22 |         "States": {
23 |           "Call Chunk Processor Workflow": {
24 |            "Type":"Task",
25 |            "Resource":"arn:aws:states:::states:startExecution.sync:2",
26 |            "Parameters":{
27 |               "Input":{
28 |                 "input": {
29 |                   "FilePath.$": "$.FilePath"
30 |                 }
31 |                },
32 |               "StateMachineArn":"${BlogBatchProcessChunkArn}"
33 |            },
34 |            "End":true
35 |         }
36 |         }
37 |       }
38 |     },
39 |     "Merge all Files": {
40 |       "Type": "Task",
41 |       "Resource": "${MergeS3FilesFunctionArn}",
42 |       "Parameters": {
43 |         "toProcessFolder.$": "$.splitOutput.toProcessFolder",
44 |         "bucket.$": "$.splitOutput.bucket",
45 |         "key.$" : "$.splitOutput.key"
46 |       },
47 |       "ResultPath": "$.mergeResponse",
48 |       "Next": "Email the file",
49 |       "Retry": [
50 |         {
51 |           "ErrorEquals": [
52 |             "States.ALL"
53 |           ],
54 |           "IntervalSeconds": 1,
55 |           "MaxAttempts": 3,
56 |           "BackoffRate": 2
57 |         }
58 |       ]
59 |     },
60 |     "Email the file": {
61 |       "Type": "Task",
62 |       "Resource": "${SendEmailFunctionArn}",
63 |       "Parameters": {
64 |         "sender": "${SESSender}",
65 |         "recipient": "${SESRecipient}",
66 |         "bucket.$": "$.splitOutput.bucket",
67 |         "s3OutputFileName.$": "$.mergeResponse.S3OutputFileName"
68 |       },
69 |       "Retry": [
70 |         {
71 |           "ErrorEquals": [
72 |             "States.ALL"
73 |           ],
74 |           "IntervalSeconds": 1,
75 |           "MaxAttempts": 3,
76 |           "BackoffRate": 2
77 |         }
78 |       ],
79 |       "End": true
80 |     }
81 |   }
82 | }


--------------------------------------------------------------------------------
/source/statemachine/blog-sfn-process-chunk.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "Comment": "AWS Step Functions example for batch processing",
  3 |   "StartAt": "Read File",
  4 |   "States": {
  5 |     "Read File": {
  6 |       "Type": "Task",
  7 |       "ResultPath": "$.fileContents",
  8 |       "Resource": "${ReadFileFunctionArn}",
  9 |       "Next": "Process messages"
 10 |     },
 11 |     "Process messages": {
 12 |       "Type": "Map",
 13 |       "Next": "Write output file",
 14 |       "ItemsPath": "$.fileContents",
 15 |       "ResultPath": "$.input.enrichedData",
 16 |       "OutputPath": "$.input",
 17 |       "Parameters": {
 18 |         "MessageNumber.$": "$$.Map.Item.Index",
 19 |         "MessageDetails.$": "$$.Map.Item.Value"
 20 |       },
 21 |       "Iterator": {
 22 |         "StartAt": "Validate Data",
 23 |         "States": {
 24 |           "Validate Data": {
 25 |             "Type": "Task",
 26 |             "Resource": "${ValidateDataFunctionArn}",
 27 |             "InputPath": "$.MessageDetails",
 28 |             "ResultPath": "$.MessageDetails.validatedresult",
 29 |             "Next": "Get Financial Data",
 30 |             "Catch": [
 31 |               {
 32 |                 "ErrorEquals": [
 33 |                   "States.ALL"
 34 |                 ],
 35 |                 "ResultPath": "$.MessageDetails.error-info",
 36 |                 "Next": "Store Error Record"
 37 |               }
 38 |             ]
 39 |           },
 40 |          "Store Error Record": {
 41 |             "Type": "Task",
 42 |             "Resource": "arn:aws:states:::dynamodb:putItem",
 43 |             "InputPath": "$.MessageDetails",
 44 |             "OutputPath": "$.MessageDetails",
 45 |            "ResultPath": null,
 46 |             "Parameters": {
 47 |                 "TableName": "${ErrorTableName}",
 48 |                 "Item": {
 49 |                     "uuid": {
 50 |                         "S.$": "$.uuid"
 51 |                     },
 52 |                     "country": {
 53 |                         "S.$": "$.country"
 54 |                     },
 55 |                     "itemType": {
 56 |                         "S.$": "$.itemType"
 57 |                     },
 58 |                     "salesChannel": {
 59 |                         "S.$": "$.salesChannel"
 60 |                     },
 61 |                     "orderPriority": {
 62 |                         "S.$": "$.orderPriority"
 63 |                     },
 64 |                     "orderDate": {
 65 |                         "S.$": "$.orderDate"
 66 |                     },
 67 |                     "region": {
 68 |                         "S.$": "$.region"
 69 |                     },
 70 |                     "shipDate": {
 71 |                         "S.$": "$.shipDate"
 72 |                     },
 73 |                   "error": {
 74 |                         "S.$": "$.error-info.Error"
 75 |                     },
 76 |                   "cause": {
 77 |                         "S.$": "$.error-info.Cause"
 78 |                     }
 79 |                 }
 80 |             },
 81 |             "Retry": [
 82 |                 {
 83 |                     "ErrorEquals": [
 84 |                         "States.TaskFailed"
 85 |                     ],
 86 |                     "IntervalSeconds": 20,
 87 |                     "MaxAttempts": 5,
 88 |                     "BackoffRate": 10
 89 |                 }
 90 |             ],
 91 |             "End": true
 92 |         },
 93 |           "Get Financial Data": {
 94 |             "Type": "Task",
 95 |             "Resource": "arn:aws:states:::apigateway:invoke",
 96 |             "ResultPath": "$.MessageDetails.financialdata",
 97 |             "OutputPath": "$.MessageDetails",
 98 |             "ResultSelector": {
 99 |               "item.$": "$.ResponseBody.item"
100 |             },
101 |             "Parameters": {
102 |               "ApiEndpoint": "${ApiEndpoint}",
103 |               "Method": "GET",
104 |               "Stage": "Prod",
105 |               "Path.$": "States.Format('financials/{}', $.MessageDetails.uuid)",
106 |               "AuthType": "IAM_ROLE"
107 |             },
108 |             "Retry": [
109 |               {
110 |                 "ErrorEquals": [
111 |                   "States.TaskFailed"
112 |                 ],
113 |                 "IntervalSeconds": 2,
114 |                 "MaxAttempts": 3,
115 |                 "BackoffRate": 1
116 |               }
117 |             ],
118 |             "End": true
119 |           }
120 |         }
121 |       }
122 |     },
123 |     "Write output file": {
124 |       "Type": "Task",
125 |       "Resource": "${WriteOutputChunkFunctionArn}",
126 |       "ResultPath": "$.writeOutputFileResponse",
127 |       "End": true,
128 |       "Retry": [
129 |         {
130 |           "ErrorEquals": [
131 |             "States.ALL"
132 |           ],
133 |           "IntervalSeconds": 1,
134 |           "MaxAttempts": 3,
135 |           "BackoffRate": 2
136 |         }
137 |       ]
138 |     }
139 |   }
140 | }


--------------------------------------------------------------------------------
/source/validate-data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/validate-data/__init__.py


--------------------------------------------------------------------------------
/source/validate-data/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | from aws_lambda_powertools.utilities.validation import validate
 4 | from aws_lambda_powertools.utilities.validation.exceptions import SchemaValidationError
 5 | import schemas
 6 | 
 7 | 
 8 | def lambda_handler(event, context):
 9 |     try:
10 |         validate(event=event, schema=schemas.INPUT)
11 |     except SchemaValidationError as e:
12 |         return {"response": "failure", "error": e}
13 | 
14 |     return {"response": "success"}
15 | 


--------------------------------------------------------------------------------
/source/validate-data/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-lambda-powertools


--------------------------------------------------------------------------------
/source/validate-data/schemas.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | INPUT = {
 4 |     "$schema": "http://json-schema.org/draft-07/schema",
 5 |     "$id": "http://example.com/example.json",
 6 |     "type": "object",
 7 |     "title": "Batch processing sample schema for the use case",
 8 |     "description": "The root schema comprises the entire JSON document.",
 9 |     "required": ["uuid", "country", "itemType", "salesChannel", "orderPriority", "orderDate", "region", "shipDate"],
10 |     "properties": {
11 |         "uuid": {
12 |             "type": "string",
13 |             "maxLength": 9,
14 |         },
15 |         "country": {
16 |             "type": "string",
17 |             "maxLength": 50,
18 |         },
19 |         "itemType": {
20 |             "type": "string",
21 |             "maxLength": 30,
22 |         },
23 |         "salesChannel": {
24 |             "type": "string",
25 |             "maxLength": 10,
26 |         },
27 |         "orderPriority": {
28 |             "type": "string",
29 |             "maxLength": 5,
30 |         },
31 |         "orderDate": {
32 |             "type": "string",
33 |             "maxLength": 10,
34 |         },
35 |         "region": {
36 |             "type": "string",
37 |             "maxLength": 100,
38 |         },
39 |         "shipDate": {
40 |             "type": "string",
41 |             "maxLength": 10,
42 |         }
43 |     },
44 | }


--------------------------------------------------------------------------------
/source/write-output-chunk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/write-output-chunk/__init__.py


--------------------------------------------------------------------------------
/source/write-output-chunk/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import csv
 4 | import boto3
 5 | 
 6 | import json
 7 | from io import StringIO
 8 | 
 9 | s3_client = boto3.client('s3')
10 | 
11 | header = [
12 |     'uuid',
13 |     'country',
14 |     'itemType',
15 |     'salesChannel',
16 |     'orderPriority',
17 |     'orderDate',
18 |     'region',
19 |     'shipDate',
20 |     'unitsSold',
21 |     'unitPrice',
22 |     'unitCost',
23 |     'totalRevenue',
24 |     'totalCost',
25 |     'totalProfit'
26 | 
27 | ]
28 | 
29 | 
30 | def lambda_handler(event, context):
31 |     dataset = event['enrichedData']
32 |     input_file_key = event['FilePath']
33 |     output_file_key = input_file_key.replace("to_process", "output")
34 |     bucket_info = get_bucket_info(output_file_key)
35 |     print(bucket_info)
36 | 
37 |     out_file = StringIO()
38 |     file_writer = csv.writer(out_file, quoting=csv.QUOTE_ALL)
39 | 
40 |     for data in dataset:
41 |         if 'error-info' in data:
42 |             continue
43 |         data_list = convert_to_list(data)
44 |         file_writer.writerow(data_list)
45 | 
46 |     response = s3_client.put_object(Bucket=bucket_info['bucket'],
47 |                                     Key=bucket_info['key'],
48 |                                     Body=out_file.getvalue())
49 | 
50 |     if response['ResponseMetadata']['HTTPStatusCode'] != 200:
51 |         message = 'Writing chunk to S3 failed' + json.dumps(response, indent=2)
52 |         raise Exception(message)
53 | 
54 |     return {"response": "success"}
55 | 
56 | 
57 | def convert_to_list(data):
58 |     data_list = [data['uuid'], data['country'], data['itemType'], data['salesChannel'], data['orderPriority'],
59 |                  data['orderDate'], data['region'], data['shipDate'],
60 |                  data['financialdata']['item']['unitsSold'],
61 |                  data['financialdata']['item']['unitPrice'],
62 |                  data['financialdata']['item']['unitCost'],
63 |                  data['financialdata']['item']['totalRevenue'],
64 |                  data['financialdata']['item']['totalCost'],
65 |                  data['financialdata']['item']['totalProfit']]
66 | 
67 |     return data_list
68 | 
69 | def get_bucket_info(filename):
70 |     first_part_pos = filename.find("/")
71 |     if first_part_pos == -1:
72 |         return ""
73 |     bucket_name = filename[:first_part_pos]
74 |     file_prefix = filename[(first_part_pos + 1):]
75 | 
76 |     return {"bucket": bucket_name, "key": file_prefix}
77 | 
78 | 


--------------------------------------------------------------------------------
/source/write-output-chunk/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-serverless-batch-architecture/1672d7623c2a0b6141bf83d019efe3c6c70efd00/source/write-output-chunk/requirements.txt


--------------------------------------------------------------------------------
/template.yaml:
--------------------------------------------------------------------------------
  1 | #Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #SPDX-License-Identifier: MIT-0
  3 | AWSTemplateFormatVersion: '2010-09-09'
  4 | Transform: AWS::Serverless-2016-10-31
  5 | Description: >
  6 |   SAM Template for AWS Step Functions batch processing example
  7 | 
  8 | Globals:
  9 |   Function:
 10 |     Timeout: 900
 11 | 
 12 | Parameters:
 13 |   SESSender:
 14 |     Type: String
 15 |     Default: "sender@example.com"
 16 |     Description: Specify the sender email address.
 17 |   SESRecipient:
 18 |     Type: String
 19 |     Default: "recipient@example.com"
 20 |     Description: Specify the recipient email address.
 21 |   SESIdentityName:
 22 |     Type: String
 23 |     Default: "sender@example.com"
 24 |     Description: An email address or domain that Amazon SES users use to send email. It is a best practice to authorize only specific email addresses such as in this case sender@example.com to send emails. If your SES Accounts are in sandbox you have to specify both the sender and recipient emails, in that case modify the template.yaml to add the permissions for recipient email address.
 25 |   InputArchiveFolder:
 26 |     Type: String
 27 |     Default: "input_archive"
 28 |     Description: Amazon S3 prefix in the SourceBucket where the input file will be archived after processing.
 29 |   FileChunkSize:
 30 |     Type: String
 31 |     Default: 600
 32 |     Description: Size of each of the chunks, which is split from the input file.
 33 |   FileDelimiter:
 34 |     Type: String
 35 |     Default: ","
 36 |     Description: Delimiter of the CSV file (for example, a comma).
 37 | 
 38 | 
 39 | Resources:
 40 |   BlogBatchProcessChunk:
 41 |     Type: AWS::Serverless::StateMachine
 42 |     Properties:
 43 |       DefinitionUri: source/statemachine/blog-sfn-process-chunk.json
 44 |       DefinitionSubstitutions:
 45 |         ReadFileFunctionArn: !GetAtt ReadFileFunction.Arn
 46 |         WriteOutputChunkFunctionArn: !GetAtt WriteOutputChunkFunction.Arn
 47 |         ValidateDataFunctionArn: !GetAtt ValidateDataFunction.Arn
 48 |         ApiEndpoint: !Sub "${Api}.execute-api.${AWS::Region}.amazonaws.com"
 49 |         ErrorTableName: !Ref ErrorTable
 50 |       Policies:
 51 |         - LambdaInvokePolicy:
 52 |             FunctionName: !Ref GetDataFunction
 53 |         - LambdaInvokePolicy:
 54 |             FunctionName: !Ref ReadFileFunction
 55 |         - LambdaInvokePolicy:
 56 |             FunctionName: !Ref WriteOutputChunkFunction
 57 |         - LambdaInvokePolicy:
 58 |             FunctionName: !Ref ValidateDataFunction
 59 |         - DynamoDBWritePolicy:
 60 |             TableName: !Ref ErrorTable
 61 |         - Statement:
 62 |             - Sid: AllowApiGatewayInvoke
 63 |               Effect: Allow
 64 |               Action:
 65 |                 - execute-api:Invoke
 66 |               Resource: !Sub "arn:${AWS::Partition}:execute-api:${AWS::Region}:${AWS::AccountId}:${Api}/*/GET/financials/*"
 67 | 
 68 |   BlogBatchMainOrchestrator:
 69 |     Type: AWS::Serverless::StateMachine
 70 |     Properties:
 71 |       DefinitionUri: source/statemachine/blog-sfn-main-orchestrator.json
 72 |       DefinitionSubstitutions:
 73 |         SplitInputFileFunctionArn: !GetAtt SplitInputFileFunction.Arn
 74 |         MergeS3FilesFunctionArn: !GetAtt MergeS3FilesFunction.Arn
 75 |         SendEmailFunctionArn: !GetAtt SendEmailFunction.Arn
 76 |         SNSArn: !Ref SNSTopic
 77 |         SESSender: !Ref SESSender
 78 |         SESRecipient: !Ref SESRecipient
 79 |         BlogBatchProcessChunkArn: !GetAtt BlogBatchProcessChunk.Arn
 80 |       Policies:
 81 |         - LambdaInvokePolicy:
 82 |             FunctionName: !Ref SplitInputFileFunction
 83 |         - LambdaInvokePolicy:
 84 |             FunctionName: !Ref MergeS3FilesFunction
 85 |         - LambdaInvokePolicy:
 86 |             FunctionName: !Ref SendEmailFunction
 87 |         - SNSCrudPolicy:
 88 |             TopicName: !GetAtt SNSTopic.TopicName
 89 |         - StepFunctionsExecutionPolicy:
 90 |             StateMachineName: !GetAtt BlogBatchProcessChunk.Name
 91 |         - Statement:
 92 |             - Sid: AllowPutTargets
 93 |               Effect: Allow
 94 |               Action:
 95 |                 - events:PutTargets
 96 |                 - events:PutRule
 97 |                 - events:DescribeRule
 98 |               Resource: !Sub "arn:${AWS::Partition}:events:${AWS::Region}:${AWS::AccountId}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule"
 99 |             - Sid: AllowStatesDescribeStop
100 |               Effect: Allow
101 |               Action:
102 |                  - states:DescribeExecution
103 |                  - states:StopExecution
104 |               Resource: !Sub "arn:aws:states:${AWS::Region}:${AWS::AccountId}:execution:${BlogBatchProcessChunk.Name}:*"
105 | 
106 | 
107 | 
108 |   SplitInputFileFunction:
109 |     Type: AWS::Serverless::Function
110 |     Properties:
111 |       CodeUri: source/split-ip-file/
112 |       Handler: app.lambda_handler
113 |       Runtime: python3.8
114 |       Policies:
115 |         - S3CrudPolicy:
116 |             BucketName: !Ref SourceBucket
117 | 
118 | 
119 | 
120 |   SplitInputFileFunctionLogGroup:
121 |     DependsOn: SplitInputFileFunction
122 |     Type: AWS::Logs::LogGroup
123 |     Properties:
124 |       KmsKeyId: !GetAtt LogGroupKey.Arn
125 |       LogGroupName: !Sub /aws/lambda/${SplitInputFileFunction}
126 |       RetentionInDays: 7
127 | 
128 | 
129 |   MergeS3FilesFunction:
130 |     Type: AWS::Serverless::Function
131 |     Properties:
132 |       CodeUri: source/merge-s3-files/
133 |       Handler: app.lambda_handler
134 |       Runtime: python3.8
135 |       Policies:
136 |         - S3ReadPolicy:
137 |             BucketName: !Ref SourceBucket
138 |         - S3WritePolicy:
139 |             BucketName: !Ref SourceBucket
140 | 
141 | 
142 |   MergeS3FilesFunctionLogGroup:
143 |     DependsOn: MergeS3FilesFunction
144 |     Type: AWS::Logs::LogGroup
145 |     Properties:
146 |       KmsKeyId: !GetAtt LogGroupKey.Arn
147 |       LogGroupName: !Sub /aws/lambda/${MergeS3FilesFunction}
148 |       RetentionInDays: 7
149 | 
150 |   SendEmailFunction:
151 |     Type: AWS::Serverless::Function
152 |     Properties:
153 |       CodeUri: source/send-email/
154 |       Handler: app.lambda_handler
155 |       Runtime: python3.8
156 |       Policies:
157 |         - SESCrudPolicy:
158 |             IdentityName: !Ref SESIdentityName
159 |         - S3ReadPolicy:
160 |             BucketName: !Ref SourceBucket
161 | 
162 | 
163 | 
164 |   SendEmailFunctionLogGroup:
165 |     DependsOn: SendEmailFunction
166 |     Type: AWS::Logs::LogGroup
167 |     Properties:
168 |       KmsKeyId: !GetAtt LogGroupKey.Arn
169 |       LogGroupName: !Sub /aws/lambda/${SendEmailFunction}
170 |       RetentionInDays: 7
171 | 
172 |   Api:
173 |     Type: AWS::Serverless::Api
174 |     DependsOn: ApiCWLRoleArn
175 |     Properties:
176 |       StageName: Prod
177 |       Auth:
178 |         DefaultAuthorizer: AWS_IAM
179 |         UsagePlan:
180 |           CreateUsagePlan: PER_API
181 |           UsagePlanName: "blog-api-usage-plan"
182 |           Quota:
183 |             Limit: 100
184 |             Period: DAY
185 |           Throttle:
186 |             BurstLimit: 50
187 |             RateLimit: 100
188 |           Description: "Blog API Usage Plan"
189 |       AccessLogSetting:
190 |         DestinationArn: !Sub ${ApiAccessLogGroup.Arn}
191 |         Format: "{ 'requestId':'$context.requestId', 'ip': '$context.identity.sourceIp', 'caller':'$context.identity.caller', 'user':'$context.identity.user','requestTime':'$context.requestTime', 'xrayTraceId':'$context.xrayTraceId', 'wafResponseCode':'$context.wafResponseCode', 'httpMethod':'$context.httpMethod','resourcePath':'$context.resourcePath', 'status':'$context.status','protocol':'$context.protocol', 'responseLength':'$context.responseLength' }"
192 | 
193 |   ApiAccessLogGroup:
194 |     Type: AWS::Logs::LogGroup
195 |     DependsOn: Api
196 |     Properties:
197 |       LogGroupName: !Sub /aws/apigateway/${Api}
198 |       RetentionInDays: 7
199 |       KmsKeyId: !GetAtt LogGroupKey.Arn
200 | 
201 |   LogGroupKey:
202 |     Type: AWS::KMS::Key
203 |     Properties:
204 |       Enabled: true
205 |       EnableKeyRotation: true
206 |       KeyPolicy:
207 |         Version: 2012-10-17
208 |         Id: key-loggroup
209 |         Statement:
210 |           - Sid: Enable IAM User Permissions
211 |             Effect: Allow
212 |             Principal:
213 |               AWS: !Join
214 |                 - ''
215 |                 - - !Sub 'arn:${AWS::Partition}:iam::'
216 |                   - !Ref 'AWS::AccountId'
217 |                   - ':root'
218 |             Action: 'kms:*'
219 |             Resource: '*'
220 |           - Sid: Enable Cloudwatch access
221 |             Effect: Allow
222 |             Principal:
223 |               Service: !Sub "logs.${AWS::Region}.amazonaws.com"
224 |             Action:
225 |               - kms:Encrypt*
226 |               - kms:Decrypt*
227 |               - kms:ReEncrypt*
228 |               - kms:GenerateDataKey*
229 |               - kms:Describe*
230 |             Resource: '*'
231 | 
232 | 
233 |   ApiCWLRoleArn:
234 |     Type: AWS::ApiGateway::Account
235 |     Properties:
236 |       CloudWatchRoleArn: !GetAtt CloudWatchRole.Arn
237 | 
238 | 
239 |   CloudWatchRole:
240 |     Type: AWS::IAM::Role
241 |     Properties:
242 |       AssumeRolePolicyDocument:
243 |         Version: '2012-10-17'
244 |         Statement:
245 |           Action: 'sts:AssumeRole'
246 |           Effect: Allow
247 |           Principal:
248 |             Service: apigateway.amazonaws.com
249 |       Path: /
250 |       ManagedPolicyArns:
251 |         - !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonAPIGatewayPushToCloudWatchLogs'
252 | 
253 |   GetDataFunction:
254 |     Type: AWS::Serverless::Function
255 |     Properties:
256 |       CodeUri: source/get-data/
257 |       Handler: app.lambda_handler
258 |       Runtime: python3.8
259 |       Environment:
260 |         Variables:
261 |           TABLE_NAME: !Ref FinancialTable
262 |       Policies:
263 |         - AWSLambdaExecute
264 |         - DynamoDBReadPolicy:
265 |             TableName: !Ref FinancialTable
266 |       Events:
267 |         GetData:
268 |           Type: Api
269 |           Properties:
270 |             RestApiId: !Ref Api
271 |             Path: /financials/{uuid}
272 |             Method: get
273 | 
274 |   GetDataFunctionLogGroup:
275 |     DependsOn: GetDataFunction
276 |     Type: AWS::Logs::LogGroup
277 |     Properties:
278 |       KmsKeyId: !GetAtt LogGroupKey.Arn
279 |       LogGroupName: !Sub /aws/lambda/${GetDataFunction}
280 |       RetentionInDays: 7
281 | 
282 |   ReadFileFunction:
283 |     Type: AWS::Serverless::Function
284 |     Properties:
285 |       CodeUri: source/read-file/
286 |       Handler: app.lambda_handler
287 |       Runtime: python3.8
288 |       Policies:
289 |         - S3ReadPolicy:
290 |             BucketName: !Ref SourceBucket
291 | 
292 | 
293 |   ReadFileFunctionLogGroup:
294 |     DependsOn: ReadFileFunction
295 |     Type: AWS::Logs::LogGroup
296 |     Properties:
297 |       KmsKeyId: !GetAtt LogGroupKey.Arn
298 |       LogGroupName: !Sub /aws/lambda/${ReadFileFunction}
299 |       RetentionInDays: 7
300 | 
301 | 
302 |   FinancialTable:
303 |     Type: AWS::DynamoDB::Table
304 |     Properties:
305 |       PointInTimeRecoverySpecification:
306 |         PointInTimeRecoveryEnabled: true
307 |       SSESpecification:
308 |         SSEEnabled: true
309 |       AttributeDefinitions:
310 |         - AttributeName: uuid
311 |           AttributeType: S
312 |       KeySchema:
313 |         - AttributeName: uuid
314 |           KeyType: HASH
315 |       BillingMode: PAY_PER_REQUEST
316 | 
317 |   ErrorTable:
318 |     Type: AWS::DynamoDB::Table
319 |     Properties:
320 |       PointInTimeRecoverySpecification:
321 |         PointInTimeRecoveryEnabled: true
322 |       SSESpecification:
323 |         SSEEnabled: true
324 |       AttributeDefinitions:
325 |         - AttributeName: uuid
326 |           AttributeType: S
327 |       KeySchema:
328 |           - AttributeName: uuid
329 |             KeyType: HASH
330 |       BillingMode: PAY_PER_REQUEST
331 | 
332 |   WriteOutputChunkFunction:
333 |     Type: AWS::Serverless::Function
334 |     Properties:
335 |       CodeUri: source/write-output-chunk/
336 |       Handler: app.lambda_handler
337 |       Runtime: python3.8
338 |       Policies:
339 |         - S3WritePolicy:
340 |             BucketName: !Ref SourceBucket
341 | 
342 | 
343 |   WriteOutputChunkFunctionLogGroup:
344 |     DependsOn: WriteOutputChunkFunction
345 |     Type: AWS::Logs::LogGroup
346 |     Properties:
347 |       KmsKeyId: !GetAtt LogGroupKey.Arn
348 |       LogGroupName: !Sub /aws/lambda/${WriteOutputChunkFunction}
349 |       RetentionInDays: 7
350 | 
351 |   ValidateDataFunction:
352 |     Type: AWS::Serverless::Function
353 |     Properties:
354 |       CodeUri: source/validate-data/
355 |       Handler: app.lambda_handler
356 |       Runtime: python3.8
357 | 
358 | 
359 |   ValidateDataFunctionLogGroup:
360 |     DependsOn: ValidateDataFunction
361 |     Type: AWS::Logs::LogGroup
362 |     Properties:
363 |       KmsKeyId: !GetAtt LogGroupKey.Arn
364 |       LogGroupName: !Sub /aws/lambda/${ValidateDataFunction}
365 |       RetentionInDays: 7
366 | 
367 |   SourceBucket:
368 |     Type: AWS::S3::Bucket
369 |     Properties:
370 |       BucketEncryption:
371 |         ServerSideEncryptionConfiguration:
372 |           - ServerSideEncryptionByDefault:
373 |               SSEAlgorithm: AES256
374 |       LoggingConfiguration:
375 |         DestinationBucketName: !Ref LoggingBucket
376 |       VersioningConfiguration:
377 |         Status: Enabled
378 | 
379 |   LoggingBucket:
380 |     Type: 'AWS::S3::Bucket'
381 |     Properties:
382 |       AccessControl: LogDeliveryWrite
383 |       BucketEncryption:
384 |         ServerSideEncryptionConfiguration:
385 |           - ServerSideEncryptionByDefault:
386 |               SSEAlgorithm: AES256
387 |       VersioningConfiguration:
388 |         Status: Enabled
389 | 
390 |   S3NotificationLambdaFunction:
391 |     Type: AWS::Serverless::Function
392 |     Properties:
393 |       CodeUri: source/s3-lambda-notification/
394 |       Handler: app.lambda_handler
395 |       Runtime: python3.8
396 |       Policies:
397 |         - StepFunctionsExecutionPolicy:
398 |             StateMachineName: !GetAtt BlogBatchMainOrchestrator.Name
399 |       Environment:
400 |         Variables:
401 |           STATE_MACHINE_EXECUTION_NAME: "BlogBatchMainOrchestrator"
402 |           INPUT_ARCHIVE_FOLDER: !Ref InputArchiveFolder
403 |           FILE_CHUNK_SIZE: !Ref FileChunkSize
404 |           FILE_DELIMITER: !Ref FileDelimiter
405 |           STATE_MACHINE_ARN: !GetAtt BlogBatchMainOrchestrator.Arn
406 | 
407 |   S3NotificationLambdaFunctionLogGroup:
408 |     DependsOn: S3NotificationLambdaFunction
409 |     Type: AWS::Logs::LogGroup
410 |     Properties:
411 |       KmsKeyId: !GetAtt LogGroupKey.Arn
412 |       LogGroupName: !Sub /aws/lambda/${S3NotificationLambdaFunction}
413 |       RetentionInDays: 7
414 | 
415 |   S3BucketEventPermission:
416 |     Type: AWS::Lambda::Permission
417 |     Properties:
418 |       Action: lambda:invokeFunction
419 |       SourceAccount: !Ref 'AWS::AccountId'
420 |       FunctionName: !Ref S3NotificationLambdaFunction
421 |       SourceArn: !GetAtt SourceBucket.Arn
422 |       Principal: s3.amazonaws.com
423 | 
424 |   PostStackProcessingFunctionRole:
425 |     Type: AWS::IAM::Role
426 |     Properties:
427 |       AssumeRolePolicyDocument:
428 |         Version: '2012-10-17'
429 |         Statement:
430 |           - Effect: Allow
431 |             Principal:
432 |               Service: lambda.amazonaws.com
433 |             Action: sts:AssumeRole
434 |       Path: /
435 |       Policies:
436 |         - PolicyName: S3BucketNotificationDynamoDBInsertPolicy
437 |           PolicyDocument:
438 |             Version: '2012-10-17'
439 |             Statement:
440 |               - Sid: AllowBucketNotification
441 |                 Effect: Allow
442 |                 Action: s3:PutBucketNotification
443 |                 Resource:
444 |                   - !Sub 'arn:${AWS::Partition}:s3:::${SourceBucket}'
445 |                   - !Sub 'arn:${AWS::Partition}:s3:::${SourceBucket}/*'
446 |               - Sid: DynamoDBInsert
447 |                 Effect: Allow
448 |                 Action: dynamodb:BatchWriteItem
449 |                 Resource:
450 |                   - !GetAtt FinancialTable.Arn
451 | 
452 | 
453 |   PostStackProcessingFunction:
454 |     Type: AWS::Serverless::Function
455 |     Properties:
456 |       Description: Function to apply notification to the S3 bucket
457 |       CodeUri: source/custom-resource/
458 |       Handler: app.lambda_handler
459 |       Runtime: python3.8
460 |       Role: !GetAtt PostStackProcessingFunctionRole.Arn
461 | 
462 |   PostStackProcessingFunctionLogGroup:
463 |     DependsOn: PostStackProcessingFunction
464 |     Type: AWS::Logs::LogGroup
465 |     Properties:
466 |       KmsKeyId: !GetAtt LogGroupKey.Arn
467 |       LogGroupName: !Sub /aws/lambda/${PostStackProcessingFunction}
468 |       RetentionInDays: 7
469 | 
470 | 
471 | 
472 |   PostStackProcessing:
473 |     Type: Custom::PostStackProcessing
474 |     Properties:
475 |       ServiceToken: !GetAtt PostStackProcessingFunction.Arn
476 |       S3Bucket: !Ref SourceBucket
477 |       FunctionARN: !GetAtt S3NotificationLambdaFunction.Arn
478 |       NotificationId: S3ObjectCreatedEvent
479 |       FinancialTableName: !Ref FinancialTable
480 | 
481 |   SNSTopic:
482 |     Type: AWS::SNS::Topic
483 |     Properties:
484 |       KmsMasterKeyId: alias/aws/sns
485 | 
486 | 
487 | Outputs:
488 | 
489 |   SourceBucketARN:
490 |     Description: "ARN for the Source Bucket"
491 |     Value: !GetAtt SourceBucket.Arn
492 | 
493 | 


--------------------------------------------------------------------------------