├── .classpath ├── .gitignore ├── .project ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── amazonaws │ │ └── kda │ │ └── flink │ │ └── benchmarking │ │ ├── BenchmarkScheduler.java │ │ ├── KinesisProducerForFlinkSessionWindow.java │ │ ├── model │ │ ├── BenchmarkingSpecs.java │ │ ├── ChildJob.java │ │ ├── Event.java │ │ └── JobSchedule.java │ │ └── util │ │ ├── DDBUtil.java │ │ ├── KDSProducerUtil.java │ │ └── KinesisStreamUtil.java └── resources │ ├── Amazon-kda-flink-benchmarking-utility-Architecture.png │ ├── Amazon-kda-flink-benchmarking-utility-Detailed-Architecture.png │ ├── amazon-kda-flink-benchmarking-utility.sh │ ├── benchmarking_specs.json │ ├── create_table_child_job_summary.json │ ├── create_table_kinesis_stream.json │ ├── create_table_parent_job_summary.json │ └── event_sample.json └── text └── java └── com └── amazonaws └── kda └── benchmarking └── util └── GetSampleData.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .settings/ 3 | build/ 4 | dependency-reduced-pom.xml -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | amazon-kinesis-data-analytics-flink-benchmarking-utility 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon Managed Service for Apache Flink (formerly Amazon Kinesis Data Analytics) – Benchmarking Utility 2 | 3 | -------- 4 | 5 | > #### 🚨 August 30, 2023: Amazon Kinesis Data Analytics has been renamed to [Amazon Managed Service for Apache Flink](https://aws.amazon.com/managed-service-apache-flink). 6 | 7 | -------- 8 | 9 | Amazon Kinesis Data Analytics Flink Benchmarking Utility helps with capacity planning, integration testing, and benchmarking of [Kinesis Data Analytics for Apache Flink](https://docs.aws.amazon.com/kinesisanalytics/latest/java/what-is.html) applications. Using this utility, you can generate sample data and write it to one or more Kinesis Data Streams based on the requirements of your Flink applications. This utility is used in conjunction with a Flink Application with Kinesis Data Stream as a source and one of the supported [Sinks](https://docs.aws.amazon.com/kinesisanalytics/latest/java/how-sinks.html) for e.g. Amazon S3. 10 | 11 | Capacity planning, integration testing, and benchmarking of Flink applications generally involves a lot of work. This utility will provide you a solution where you can define data format, generate and write sample data to a Kinesis Data Stream. Using this Kinesis Data Stream as a source, you will create a Kinesis Data Analytics Flink application and perform necessary testing. The format used and the data generated are compatible with Flink application's business logic. You will define benchmarking specifications based on your capacity or load testing requirements. 12 | 13 | This utility along with [Amazon Kinesis Data Analytics Flink Starter Kit](https://github.com/aws-samples/amazon-kinesis-data-analytics-flink-starter-kit) will provide you a complete example. 14 | 15 | **Contents:** 16 | 17 | * [Architecture](#architecture) 18 | * [Detailed Architecture](#detailed-architecture) 19 | * [Application Overview](#application-overview) 20 | * [Build Instructions](#build-instructions) 21 | * [Deployment Instructions](#deployment-instructions) 22 | * [Appendix](#appendix) 23 | 24 | - - - 25 | 26 | ## Architecture 27 | 28 | The below diagram represents the architecture of this utility. 29 | 30 | ![Alt](./src/main/resources/Amazon-kda-flink-benchmarking-utility-Architecture.png) 31 | 32 | - - - 33 | 34 | ## Detailed Architecture 35 | 36 | The diagram below represents the detailed architecture. 37 | 38 | ![Alt](./src/main/resources/Amazon-kda-flink-benchmarking-utility-Detailed-Architecture.png) 39 | 40 | In the diagram, each time the Kinesis data producer job is invoked, it runs one or more child child jobs based on the specifications provided in a JSON file. Each child job has the below characteristics: 41 | 42 | 1. Generate sample records and write them to a Kinesis Data Streams. These records will be consumed by a Flink application 43 | 1. Records are generated based on a pre-defined record format 44 | 1. Records are randomized based on a number of unique identifiers 45 | 1. Records generated as batches which has a configurable size 46 | 1. Batches are written to Kinesis Stream with a configurable cadence in seconds 47 | 1. Each child job terminates gracefully once it complete writing data to Kinesis Stream 48 | 49 | [Benchmarking Specifications](#benchmarking-specifications) section explains this process in detail. 50 | 51 | - - - 52 | 53 | ## Application Overview 54 | 55 | ### Pre-requisites 56 | 57 | 1. JDK 8 58 | 1. IDE for e.g. [Eclipse](https://www.eclipse.org/) or [Spring Tools](https://spring.io/tools) or [Intellij IDEA](https://www.jetbrains.com/idea/) 59 | 1. [Apache Maven](https://maven.apache.org/) 60 | 1. [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 61 | 62 | - - - 63 | 64 | ### AWS Service Requirements 65 | 66 | The following AWS services are required for this utility: 67 | 68 | 1. 1 [Amazon EC2](https://aws.amazon.com/ec2/) Instance 69 | 1. [DynamoDB Local](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html) 70 | 1. 1 [Amazon Kinesis Data Stream](https://aws.amazon.com/kinesis/data-streams/) 71 | 1. 1 IAM role for EC2 instance 72 | 1. 1 EC2 key pair to log onto EC2 73 | 74 | - - - 75 | 76 | ### Benchmarking Specifications 77 | 78 | This utility requires you to pass benchmarking specifications in other words load testing requirements in a JSON file based on a format defined in the sample [benchmarking_specs.json](./src/main/resources/benchmarking_specs.json). 79 | 80 | #### Schema Definition 81 | 82 | The below Table will help you define the specifications: 83 | 84 | | Property | Type | Purpose | 85 | |--------- | -----| --------| 86 | | ```jobName``` | String | The name of the Benchmarking Job | 87 | | ```jobDurationInMinutes``` | String | The duration of the job e.g. 65 minutes | 88 | | ```region``` | String | The AWS region where the target Kinesis Stream(s) exist | 89 | | ```targetKinesisStreams``` | Array | Names of target Kinesis Streams. This utility writes sample data to one or more configured streams. | 90 | | ```isUsingDynamoDBLocal``` | boolean | When DynamoDB Local is used for status tracking this attribute is set to ```true```. When it is set to ```false```, it will use Amazon DynamoDB web service. | 91 | | ```dynamoDBLocalURI``` | String | The URI for DynamoDB Local | 92 | | ```parentJobSummaryDDBTableName``` | String | The name of the DynamoDB Table for Parent Job Summary | 93 | | ```childJobSummaryDDBTableName``` | String | The name of the DynamoDB Table for Child Job Summary | 94 | | ```childJobs``` | Array | The list of Child Jobs to run part of the utility| 95 | 96 | - - - 97 | 98 | #### Schema Definition for childJobs 99 | 100 | | Property | Type | Purpose | 101 | |--------- | -----| --------| 102 | | ```jobName``` | String | The name of child job| 103 | | ```numberofInteractions``` | Integer | Number of unique session ids | 104 | | ```batchSize``` | Integer | The size of the batch | 105 | | ```batchCadence``` | Integer | The batch frequency in seconds | 106 | | ```numberofBatches``` | Integer | Number of batches | 107 | 108 | - - - 109 | 110 | ### Class Overview 111 | 112 | | Class | Purpose | 113 | |-------------------------------------------------------------- | --------| 114 | | [BenchmarkScheduler](./src/main/java/com/amazonaws/kda/flink/benchmarking/BenchmarkScheduler.java) | Entry point and is the heart of the benchmarking utility. Its main main algorithm was developed based on open-source [Quartz Job Scheduling Library](http://www.quartz-scheduler.org/overview/). It schedules one more more Kinesis Producer jobs based on the benchmarking specifications.| 115 | | [KinesisProducerForFlinkSessionWindow](./src/main/java/com/amazonaws/kda/flink/benchmarking//KinesisProducerForFlinkSessionWindow.java) | The class has the business logic to write sample records to Kinesis Stream. The sample records are compatible with a Flink Application implements [Session Window](https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/operators/windows.html#session-windows). This class also has the logic to track its own progress in DynamoDB tables. This class implements [Job Interface](http://www.quartz-scheduler.org/documentation/quartz-2.1.7/tutorials/tutorial-lesson-03.html) from **Quartz Scheduler**. | 116 | | [KDSProducerUtil](./src/main/java/com/amazonaws/kda/flink/benchmarking/util/KDSProducerUtil.java) | Utility class with methods used by **KinesisProducerForFlinkSessionWindow** class.| 117 | | [KinesisStreamUtil](./src/main/java/com/amazonaws/kda/flink/benchmarking/util/KinesisStreamUtil.java) | Utility class with business logic to work with Kinesis Data Stream.| 118 | | [DDBUtil](./src/main/java/com/amazonaws/kda/flink/benchmarking/util/DDBUtil.java) | Utility class with business logic to write / update items (records) to DynamoDB tables.| 119 | 120 | - - - 121 | 122 | ## Build Instructions 123 | 124 | 1. Clone this starter kit to your Laptop / MacBook 125 | 1. It has Maven nature, so you can import it to your IDE. 126 | 1. Build the Jar file using one of the steps below: 127 | 1. Using standalone Maven, go to project home directory and run command ```mvn -X clean install``` 128 | 1. From Eclipse or STS, run command ```-X clean install```. Navigation: Project right click --> Run As --> Maven Build (Option 4) 129 | 1. Build process will generate a jar file ```amazon-kinesis-data-analytics-flink-benchmarking-utility-0.1.jar```. Note: The size of the jar file is around 20 MB 130 | 131 | - - - 132 | 133 | ## Deployment Instructions 134 | 135 | ### EC2 Instance Provisioning and Configuration 136 | 137 | 1. Create an IAM role for EC2 instance. It needs to have two policies as below 138 | 1. Policy with write permissions for one or more Kinesis Stream configured as targets for this utility 139 | 1. Policy with write permissions for DynamoDB tables used by this utility 140 | 1. **Note:** For more details, on this topic, refer Amazon EC2 documentation [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html). 141 | 1. Launch an EC2 instance with the IAM role 142 | 1. Take the Private IP address of your EC2 143 | 1. Log on to EC2 instance using command 144 | 145 | ```ssh -i my_ec2_keypair.pem ec2-user@IP_Address``` 146 | 147 | 1. Run ```sudo yum update -y``` 148 | 1. Install **OpenJDK** 8 using command 149 | 150 | ```sudo yum -y install java-1.8.0-openjdk.x86_64``` 151 | 152 | 1. Check the Java version using the command ```java -version```. 153 | Sample output: 154 | 155 | ```bash 156 | openjdk version "1.8.0_252" 157 | OpenJDK Runtime Environment (build 1.8.0_252-b09) 158 | OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode) 159 | ``` 160 | 161 | 1. Create a folder for Data Generator Application Binary. Use the command 162 | 163 | ```mkdir kda-flink-benchmarking-utility``` 164 | 165 | 1. Create a folder for DynamoDB Locals. Use the command 166 | 167 | ```mkdir dynamodb_local``` 168 | 169 | 1. Go to folder **kda-flink-benchmarking-utility** and create a folder for Logging. Use the command 170 | 171 | ```mkdir logs``` 172 | 173 | - - - 174 | 175 | ### Install DynamoDB Local 176 | 177 | 1. Go to DynamoDB Local folder 178 | 179 | ```cd dynamodb_local/``` 180 | 181 | 1. Download DynamoDB Local Binary 182 | 183 | ```bash 184 | curl https://s3.us-west-2.amazonaws.com/dynamodb-local/dynamodb_local_latest.zip --output dynamodb_local_latest.zip 185 | ``` 186 | 187 | 1. Unzip the file 188 | 189 | ```unzip dynamodb_local_latest.zip``` 190 | 191 | 1. Start the DynamoDB Local 192 | 193 | ```nohup java -jar DynamoDBLocal.jar -sharedDb &``` 194 | 195 | 1. Check the status of DynamoDB Local in the nohup.out as follows: 196 | 197 | ```bash 198 | [ec2-user@ip-X-X-X-X ~]$ cat nohup.out 199 | Initializing DynamoDB Local with the following configuration: 200 | Port: 8000 201 | InMemory: false 202 | DbPath: null 203 | SharedDb: true 204 | shouldDelayTransientStatuses: false 205 | CorsParams: * 206 | ``` 207 | 208 | 1. At any time, check the status of DynamoDB Local using command ```ps -ef``` 209 | 210 | ```bash 211 | ec2-user 13995 1 0 Sep09 ? 00:12:54 java -jar DynamoDBLocal.jar -sharedDb 212 | ``` 213 | 214 | - - - 215 | 216 | ### Configure the Kinesis Data Generator App 217 | 218 | 1. Come back to your MacBook or Laptop 219 | 1. In ```src/main/resources/benchmarking_specs.json``` update the ```targetKinesisStreams``` array to the kinesis stream(s) that you want data written to and ```region``` to the region the stream(s) exist in 220 | 1. In ```src/main/resources/create_table_kinesis_stream.json``` update ```TableName``` to match the kinesis stream you want data written to 221 | 1. If you have more than 1 stream to write to then duplicate ```src/main/resources/create_table_kinesis_stream.json``` for each stream changing ```TableName``` in each file accordingly. 222 | 223 | - - - 224 | 225 | ### Deploy Kinesis Data Generator App 226 | 227 | 1. Copy Kinesis Data Generator binaries to the EC2 instance. **Note:** Steps below are relevant for SCP (secure copy) tool on MacBook. 228 | 229 | 1. Copy the jar file to EC2 instance 230 | 231 | ```bash 232 | scp -i my_ec2_keypair.pem /Amazon-kda-flink-benchmarking-utility/target/amazon-kinesis-data-analytics-flink-benchmarking-utility-0.1.jar ec2-user@IP_Address:/home/ec2-user/kda-flink-benchmarking-utility/ 233 | ``` 234 | 235 | 1. Copy benchmarking specifications JSON to EC2 instance 236 | 237 | ```bash 238 | scp -i my_ec2_keypair.pem /Amazon-kda-flink-benchmarking-utility/src/main/resources/benchmarking_specs.json ec2-user@IP_Address:/home/ec2-user/kda-flink-benchmarking-utility/ 239 | ``` 240 | 241 | 1. Copy DynamoDB Table JSON files to EC2 instance 242 | 243 | ```bash 244 | scp -i my_ec2_keypair.pem /Amazon-kda-flink-benchmarking-utility/src/main/resources/create_table_child_job_summary.json ec2-user@IP_Address:/home/ec2-user/kda-flink-benchmarking-utility/ 245 | ``` 246 | 247 | ```bash 248 | scp -i my_ec2_keypair.pem /Amazon-kda-flink-benchmarking-utility/src/main/resources/create_table_parent_job_summary.json ec2-user@IP_Address:/home/ec2-user/kda-flink-benchmarking-utility/ 249 | ``` 250 | 251 | 1. Copy DynamoDB Table JSON files for Kinesis Streams to EC2 instance 252 | ```bash 253 | scp -i my_ec2_keypair.pem /Amazon-kda-flink-benchmarking-utility/src/main/resources/create_table_kinesis_stream.json ec2-user@IP_Address:/home/ec2-user/kda-flink-benchmarking-utility/ 254 | ``` 255 | (Repeat for all duplicates created if you're writing to multiple streams) 256 | 257 | 1. Copy the Bash script to EC2 instance 258 | 259 | ```bash 260 | scp -i my_ec2_keypair.pem /Amazon-kda-flink-benchmarking-utility/src/main/resources/amazon-kda-flink-benchmarking-utility.sh ec2-user@IP_Address:/home/ec2-user/kda-flink-benchmarking-utility/ 261 | ``` 262 | 263 | 1. **Note:** To use PuTTY, refer [Connecting to Your Linux Instance from Windows Using PuTTY](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/putty.html) 264 | 265 | - - - 266 | 267 | ### Create Tables in DynamoDB Local 268 | 269 | 1. From the EC2 instance and while in folder **kda-flink-benchmarking-utility**, run the below command to create DynamoDB tables 270 | 271 | ```bash 272 | aws dynamodb create-table \ 273 | --cli-input-json file://create_table_parent_job_summary.json \ 274 | --region us-east-1 \ 275 | --endpoint-url http://localhost:8000 276 | ``` 277 | 278 | ```bash 279 | aws dynamodb create-table \ 280 | --cli-input-json file://create_table_child_job_summary.json \ 281 | --region us-east-1 \ 282 | --endpoint-url http://localhost:8000 283 | ``` 284 | 285 | ```bash 286 | aws dynamodb create-table \ 287 | --cli-input-json file://create_table_kinesis_stream.json \ 288 | --region us-east-1 \ 289 | --endpoint-url http://localhost:8000 290 | ``` 291 | (Repeat for all files if you're writing to multiple streams) 292 | 293 | 1. Check the tables by running the below command 294 | 295 | ```bash 296 | aws dynamodb list-tables --region us-east-1 --endpoint-url http://localhost:8000 297 | ``` 298 | 299 | 1. Expected output 300 | 301 | ```json 302 | { 303 | "TableNames": [ 304 | "kda_flink_benchmarking_child_job_summary", 305 | "kda_flink_benchmarking_parent_job_summary", 306 | "" 307 | ] 308 | } 309 | ``` 310 | 311 | - - - 312 | 313 | ### Setup Cron Scheduler 314 | 315 | 1. Check the status of Cron on EC2 instance using command ```service crond status```. You will get an output something like below. 316 | 317 | ```bash 318 | Redirecting to /bin/systemctl status crond.service 319 | ● crond.service - Command Scheduler 320 | Loaded: loaded (/usr/lib/systemd/system/crond.service; enabled; vendor preset: enabled) 321 | Active: active (running) since Mon 2020-09-07 09:50:49 UTC; 3 days ago 322 | ``` 323 | 324 | 1. Open the crontab using the command 325 | 326 | ```crontab -e``` 327 | 328 | 1. Enter the following line 329 | 330 | ```30 * * * * /bin/bash /home/ec2-user/kda-flink-benchmarking-utility/amazon-kda-flink-benchmarking-utility.sh``` 331 | This will run the data-generator as a cron-job every hour at 30 minutes past the hour. 332 | Once the job starts for the first time you should see incoming data on your kinesis stream(s) 333 | 334 | - - - 335 | 336 | ## Appendix 337 | 338 | ### Using DynamoDB web service instead of DynamoDB Local 339 | 340 | Using DynamoDB web service instead of DynamoDB Local is easy. Follow the below instructions: 341 | 342 | 1. In [benchmarking_specs.json](./src/main/resources/benchmarking_specs.json), set ```"isUsingDynamoDBLocal":``` to ```false``` and you can leave the default value for attribute ```"dynamoDBLocalURI:"``` as is or set it to ```"None"```. 343 | 1. Run the below command to create tables in Amazon DynamoDB web service: 344 | 345 | ```bash 346 | aws dynamodb create-table \ 347 | --cli-input-json file://create_table_kinesis_stream.json \ 348 | --region us-east-1 349 | ``` 350 | 351 | ```bash 352 | aws dynamodb create-table \ 353 | --cli-input-json file://create_table_parent_job_summary.json \ 354 | --region us-east-1 355 | ``` 356 | 357 | ```bash 358 | aws dynamodb create-table \ 359 | --cli-input-json file://create_table_child_job_summary.json \ 360 | --region us-east-1 361 | ``` 362 | 363 | ### Future Releases 364 | 365 | 1. Support Amazon ECS to host the solution 366 | 1. Support additional scheduling methods 367 | 1. Generate test data suitable for [Tumbling Windows](https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/operators/windows.html#tumbling-windows) and [Sliding Windows](https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/operators/windows.html#sliding-windows) 368 | 1. Ability to generate test data with timestamp information. This will be useful for Flink Applications configured to use Event Time or Processing Time. For more details, refer Apache Flink [documentation](https://ci.apache.org/projects/flink/flink-docs-release-1.8/dev/event_time.html). 369 | 370 | - - - 371 | 372 | ## License Summary 373 | 374 | This sample code is made available under the MIT-0 license. See the LICENSE file. -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 4.0.0 5 | Amazon Kinesis Data Analytics Flink - Benchmarking Utility 6 | com.amazonaws.kinesis 7 | amazon-kinesis-data-analytics-flink-benchmarking-utility 8 | 0.1 9 | 10 | 11 | 12 | 13 | 14 | com.amazonaws 15 | amazon-kinesis-client 16 | 1.14.0 17 | 18 | 19 | 21 | 26 | 27 | 28 | 29 | com.google.code.gson 30 | gson 31 | 2.8.9 32 | 33 | 34 | 35 | 36 | org.quartz-scheduler 37 | quartz 38 | 2.3.2 39 | 40 | 41 | org.quartz-scheduler 42 | quartz-jobs 43 | 2.3.2 44 | 45 | 46 | 47 | 48 | org.junit.jupiter 49 | junit-jupiter-engine 50 | 5.6.2 51 | 52 | 53 | 54 | 55 | org.slf4j 56 | slf4j-api 57 | 1.7.25 58 | 59 | 60 | org.slf4j 61 | slf4j-jdk14 62 | 1.7.25 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | dynamodb-local-oregon 71 | DynamoDB Local Release Repository 72 | https://s3-us-west-2.amazonaws.com/dynamodb-local/release 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | true 81 | org.apache.maven.plugins 82 | maven-compiler-plugin 83 | 3.5.1 84 | 85 | 1.8 86 | 1.8 87 | 88 | 89 | 90 | org.apache.maven.plugins 91 | maven-shade-plugin 92 | 3.1.0 93 | 94 | 95 | package 96 | 97 | shade 98 | 99 | 100 | 101 | 103 | com.amazonaws.kda.flink.benchmarking.BenchmarkScheduler 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/BenchmarkScheduler.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking; 5 | 6 | import static org.quartz.JobBuilder.newJob; 7 | import static org.quartz.SimpleScheduleBuilder.simpleSchedule; 8 | import static org.quartz.TriggerBuilder.newTrigger; 9 | 10 | import java.io.IOException; 11 | import java.io.InputStream; 12 | import java.nio.charset.StandardCharsets; 13 | import java.nio.file.Files; 14 | import java.nio.file.Paths; 15 | import java.time.LocalDateTime; 16 | import java.time.format.DateTimeFormatter; 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.UUID; 20 | import java.util.stream.Collectors; 21 | 22 | import org.quartz.JobDetail; 23 | import org.quartz.Scheduler; 24 | import org.quartz.SchedulerException; 25 | import org.quartz.Trigger; 26 | import org.quartz.impl.StdSchedulerFactory; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import com.amazonaws.client.builder.AwsClientBuilder; 31 | import com.amazonaws.kda.flink.benchmarking.model.BenchmarkingSpecs; 32 | import com.amazonaws.kda.flink.benchmarking.model.ChildJob; 33 | import com.amazonaws.kda.flink.benchmarking.model.JobSchedule; 34 | import com.amazonaws.kda.flink.benchmarking.util.DDBUtil; 35 | import com.amazonaws.kda.flink.benchmarking.util.KDSProducerUtil; 36 | import com.amazonaws.kda.flink.benchmarking.util.KinesisStreamUtil; 37 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; 38 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; 39 | import com.amazonaws.services.dynamodbv2.document.DynamoDB; 40 | import com.amazonaws.services.kinesis.AmazonKinesisClientBuilder; 41 | import com.amazonaws.util.IOUtils; 42 | import com.google.gson.Gson; 43 | 44 | /** 45 | * Benchmarking Utility main program 46 | * 47 | * @author Ravi Itha, Amazon Web Services, Inc. 48 | * 49 | */ 50 | public class BenchmarkScheduler { 51 | 52 | public static void main(String[] args) { 53 | 54 | Logger logger = LoggerFactory.getLogger(BenchmarkScheduler.class); 55 | 56 | BenchmarkingSpecs benchMarkingSpecs = parseBenchamrkingSpecs(args[0]); 57 | benchMarkingSpecs.setJobId(UUID.randomUUID().toString()); 58 | benchMarkingSpecs.setNumberofChildJobs(benchMarkingSpecs.getChildJobs().size()); 59 | benchMarkingSpecs 60 | .setJobStartTime(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))); 61 | 62 | String targetKinesisStreams = benchMarkingSpecs.getTargetKinesisStreams().stream() 63 | .collect(Collectors.joining("$")); 64 | String startingHashKeys = KinesisStreamUtil.getHashKeysForOpenShards( 65 | AmazonKinesisClientBuilder.standard().withRegion(benchMarkingSpecs.getRegion()).build(), 66 | benchMarkingSpecs.getTargetKinesisStreams().get(0)).stream().collect(Collectors.joining("$")); 67 | 68 | /** 69 | * Define JobDetail and Trigger for each Job provided in the Job Template 70 | * 71 | */ 72 | List jobSchedules = new ArrayList(); 73 | for (ChildJob childJob : benchMarkingSpecs.getChildJobs()) { 74 | List interactions = KDSProducerUtil.createInteractions(childJob.getNumberofInteractions()); 75 | 76 | childJob.setJobId(UUID.randomUUID().toString()); 77 | childJob.setParentJobId(benchMarkingSpecs.getJobId()); 78 | 79 | JobDetail jobDetail = newJob(KinesisProducerForFlinkSessionWindow.class) 80 | .withIdentity(childJob.getJobName().concat("-").concat(benchMarkingSpecs.getJobStartTime()), 81 | childJob.getJobName()) 82 | .usingJobData("jobId", childJob.getJobId()).usingJobData("jobName", childJob.getJobName()) 83 | .usingJobData("parentJobId", childJob.getParentJobId()) 84 | .usingJobData("isUsingDynamoDBLocal", benchMarkingSpecs.isUsingDynamoDBLocal()) 85 | .usingJobData("dynamoDBLocalURI", benchMarkingSpecs.getDynamoDBLocalURI()) 86 | .usingJobData("childJobSummaryDDBTblName", benchMarkingSpecs.getChildJobSummaryDDBTableName()) 87 | .usingJobData("region", benchMarkingSpecs.getRegion()) 88 | .usingJobData("masterJobId", benchMarkingSpecs.getJobId()) 89 | .usingJobData("targetKinesisStreams", targetKinesisStreams) 90 | .usingJobData("startingHashKeys", startingHashKeys) 91 | .usingJobData("interactionsIds", interactions.stream().collect(Collectors.joining("$"))) 92 | .usingJobData("stringSeparator", "$").usingJobData("batchSize", childJob.getBatchSize()) 93 | .usingJobData("startingHashKeyIndex", 0).build(); 94 | 95 | Trigger trigger = newTrigger() 96 | .withIdentity(childJob.getJobName().concat("-").concat("-trigger"), 97 | childJob.getJobName().concat("-").concat("min-group")) 98 | .startNow().withSchedule(simpleSchedule().withIntervalInSeconds(childJob.getBatchCadence()) 99 | .withRepeatCount(childJob.getNumberofBatches())) 100 | .build(); 101 | 102 | JobSchedule jobSchedule = new JobSchedule(); 103 | jobSchedule.setJobDetail(jobDetail); 104 | jobSchedule.setTrigger(trigger); 105 | jobSchedules.add(jobSchedule); 106 | } 107 | 108 | /** 109 | * Schedule the Jobs via Quartz Enterprise Job Scheduler 110 | */ 111 | try { 112 | Scheduler scheduler = StdSchedulerFactory.getDefaultScheduler(); 113 | scheduler.start(); 114 | for (JobSchedule jobSchedule : jobSchedules) { 115 | scheduler.scheduleJob(jobSchedule.getJobDetail(), jobSchedule.getTrigger()); 116 | } 117 | logger.info( 118 | "Put Main thread in sleeping mode for " + benchMarkingSpecs.getJobDurationInMinutes() + " minutes"); 119 | 120 | // Update DynamoDB 121 | trackJobs(benchMarkingSpecs); 122 | 123 | Thread.sleep(benchMarkingSpecs.getJobDurationInMinutes() * 60000); 124 | scheduler.shutdown(); 125 | } catch (SchedulerException se) { 126 | se.printStackTrace(); 127 | } catch (InterruptedException e) { 128 | e.printStackTrace(); 129 | } 130 | } 131 | 132 | /** 133 | * This method to parse the Job Definition 134 | * 135 | * @param filePath 136 | * @return 137 | */ 138 | public static BenchmarkingSpecs parseBenchamrkingSpecs(String filePath) { 139 | BenchmarkingSpecs benchMarkingSpecs = null; 140 | try { 141 | String jsonString = new String(Files.readAllBytes(Paths.get(filePath)), StandardCharsets.UTF_8); 142 | benchMarkingSpecs = new Gson().fromJson(jsonString, BenchmarkingSpecs.class); 143 | } catch (IOException e) { 144 | e.printStackTrace(); 145 | } 146 | return benchMarkingSpecs; 147 | } 148 | 149 | /** 150 | * This method tracks Job status in DynamoDB 151 | * 152 | * @param benchMarkingSpecs 153 | */ 154 | public static void trackJobs(BenchmarkingSpecs benchMarkingSpecs) { 155 | int numInteractionsProcessed = 0; 156 | AmazonDynamoDB dynamoDB = null; 157 | 158 | if (benchMarkingSpecs.isUsingDynamoDBLocal()) 159 | dynamoDB = AmazonDynamoDBClientBuilder.standard() 160 | .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration( 161 | benchMarkingSpecs.getDynamoDBLocalURI(), benchMarkingSpecs.getRegion())) 162 | .build(); 163 | else 164 | dynamoDB = AmazonDynamoDBClientBuilder.standard().withRegion(benchMarkingSpecs.getRegion()).build(); 165 | 166 | DynamoDB dynamoDBClient = new DynamoDB(dynamoDB); 167 | 168 | // Insert a record to kda_flink_perf_benchmarking_master_job_summary DDB table 169 | for (ChildJob childJob : benchMarkingSpecs.getChildJobs()) { 170 | numInteractionsProcessed += childJob.getNumberofInteractions(); 171 | } 172 | DDBUtil.insertParentJobStatus(dynamoDBClient, benchMarkingSpecs.getParentJobSummaryDDBTableName(), 173 | benchMarkingSpecs.getJobName(), benchMarkingSpecs.getJobId(), numInteractionsProcessed, 174 | benchMarkingSpecs.getJobStartTime(), "Started"); 175 | 176 | // Insert records to kda_flink_perf_benchmarking_child_job_summary DDB Table 177 | for (ChildJob childJob : benchMarkingSpecs.getChildJobs()) { 178 | DDBUtil.insertChildJobStatus(dynamoDBClient, benchMarkingSpecs.getChildJobSummaryDDBTableName(), 179 | childJob.getJobName(), childJob.getJobId(), childJob.getParentJobId(), 180 | childJob.getNumberofInteractions(), benchMarkingSpecs.getJobStartTime(), "In Progress"); 181 | } 182 | } 183 | 184 | public static String parse(String resource) throws IOException { 185 | InputStream stream = BenchmarkScheduler.class.getResourceAsStream(resource); 186 | try { 187 | String json = IOUtils.toString(stream); 188 | return json; 189 | } finally { 190 | stream.close(); 191 | } 192 | } 193 | 194 | } 195 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/KinesisProducerForFlinkSessionWindow.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking; 5 | 6 | import java.time.LocalDateTime; 7 | import java.time.format.DateTimeFormatter; 8 | import java.util.ArrayList; 9 | import java.util.Iterator; 10 | import java.util.List; 11 | import java.util.Optional; 12 | 13 | import org.quartz.Job; 14 | import org.quartz.JobDataMap; 15 | import org.quartz.JobExecutionContext; 16 | import org.quartz.JobExecutionException; 17 | import org.quartz.JobKey; 18 | import org.quartz.PersistJobDataAfterExecution; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | import com.amazonaws.client.builder.AwsClientBuilder; 23 | import com.amazonaws.kda.flink.benchmarking.util.DDBUtil; 24 | import com.amazonaws.kda.flink.benchmarking.util.KDSProducerUtil; 25 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; 26 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; 27 | import com.amazonaws.services.dynamodbv2.document.DynamoDB; 28 | import com.amazonaws.services.kinesis.AmazonKinesis; 29 | import com.amazonaws.services.kinesis.AmazonKinesisClientBuilder; 30 | import com.google.common.collect.Iterables; 31 | 32 | /** 33 | * 34 | * @author Ravi Itha, Amazon Web Services, Inc. 35 | * 36 | */ 37 | @PersistJobDataAfterExecution 38 | public class KinesisProducerForFlinkSessionWindow implements Job { 39 | 40 | Logger logger = LoggerFactory.getLogger(KinesisProducerForFlinkSessionWindow.class); 41 | 42 | public void execute(JobExecutionContext context) throws JobExecutionException { 43 | 44 | String dynamoDBLocalURI = null; 45 | // Get job specific settings 46 | JobKey key = context.getJobDetail().getKey(); 47 | JobDataMap dataMap = context.getJobDetail().getJobDataMap(); 48 | // System.out.println("Job Key: " + key.getName()); 49 | // System.out.println("\nFire Instance Id: " + context.getFireInstanceId()); 50 | 51 | String jobId = dataMap.getString("jobId"); 52 | String jobName = dataMap.getString("jobName"); 53 | boolean isUsingDynamoDBLocal = dataMap.getBoolean("isUsingDynamoDBLocal"); 54 | if (isUsingDynamoDBLocal) 55 | dynamoDBLocalURI = dataMap.getString("dynamoDBLocalURI"); 56 | String childJobSummaryDDBTblName = dataMap.getString("childJobSummaryDDBTblName"); 57 | String region = dataMap.getString("region"); 58 | String interactionsIds = dataMap.getString("interactionsIds"); 59 | String stringSeparator = dataMap.getString("stringSeparator"); 60 | String targetKinesisStreams = dataMap.getString("targetKinesisStreams"); 61 | String startingHashKeys = dataMap.getString("startingHashKeys"); 62 | int batchSize = dataMap.getInt("batchSize"); 63 | 64 | List eventList = new ArrayList(); 65 | AmazonKinesis kinesis = AmazonKinesisClientBuilder.standard().withRegion(region).build(); 66 | List interactionList = KDSProducerUtil.tokenizeStrings(interactionsIds, stringSeparator); 67 | List targetKinesisStreamsList = KDSProducerUtil.tokenizeStrings(targetKinesisStreams, stringSeparator); 68 | List startingHashKeyList = KDSProducerUtil.tokenizeStrings(startingHashKeys, stringSeparator); 69 | Iterator hashKeysIterator = Iterables.cycle(startingHashKeyList).iterator(); 70 | 71 | AmazonDynamoDB dynamoDB = null; 72 | if (isUsingDynamoDBLocal) 73 | dynamoDB = AmazonDynamoDBClientBuilder.standard() 74 | .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(dynamoDBLocalURI, region)) 75 | .build(); 76 | else 77 | dynamoDB = AmazonDynamoDBClientBuilder.standard().withRegion(region).build(); 78 | 79 | DynamoDB dynamoDBClient = new DynamoDB(dynamoDB); 80 | 81 | for (String interactionId : interactionList) { 82 | eventList = KDSProducerUtil.createEvents(eventList, batchSize, interactionId); 83 | for (String targetStream : targetKinesisStreamsList) { 84 | KDSProducerUtil.writeMessagesToKinesis(kinesis, targetStream, eventList, hashKeysIterator); 85 | DDBUtil.insertChildJobDetailedStatus(dynamoDBClient, targetStream, jobId, context.getFireInstanceId(), 86 | targetStream, interactionId, batchSize, System.currentTimeMillis()); 87 | } 88 | } 89 | 90 | // Check if this is the last Job execution. If yes, then prepare for next Hourly 91 | // Window. 92 | if (!Optional.ofNullable(context.getTrigger().getNextFireTime()).isPresent()) { 93 | System.out.printf("The last instance of the job. Job Key: %s, Job Id: %s \n", key.getName(), jobId); 94 | DDBUtil.updateChildJobStatus(dynamoDBClient, childJobSummaryDDBTblName, jobName, jobId, 95 | LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")), "Completed"); 96 | } 97 | } 98 | } -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/model/BenchmarkingSpecs.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking.model; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * 10 | * @author Ravi Itha, Amazon Web Services, Inc. 11 | * 12 | */ 13 | public class BenchmarkingSpecs { 14 | 15 | private String jobName; 16 | private int jobDurationInMinutes; 17 | private String jobId; 18 | private String region; 19 | private String jobStartTime; 20 | private int numberofChildJobs; 21 | private boolean isUsingDynamoDBLocal; 22 | private String dynamoDBLocalURI; 23 | private String parentJobSummaryDDBTableName; 24 | private String childJobSummaryDDBTableName; 25 | private List childJobs; 26 | private List targetKinesisStreams; 27 | 28 | public String getJobName() { 29 | return jobName; 30 | } 31 | public void setJobName(String jobName) { 32 | this.jobName = jobName; 33 | } 34 | public int getJobDurationInMinutes() { 35 | return jobDurationInMinutes; 36 | } 37 | public void setJobDurationInMinutes(int jobDurationInMinutes) { 38 | this.jobDurationInMinutes = jobDurationInMinutes; 39 | } 40 | public String getJobId() { 41 | return jobId; 42 | } 43 | public void setJobId(String jobId) { 44 | this.jobId = jobId; 45 | } 46 | public String getRegion() { 47 | return region; 48 | } 49 | public void setRegion(String region) { 50 | this.region = region; 51 | } 52 | public String getJobStartTime() { 53 | return jobStartTime; 54 | } 55 | public void setJobStartTime(String jobStartTime) { 56 | this.jobStartTime = jobStartTime; 57 | } 58 | public int getNumberofChildJobs() { 59 | return numberofChildJobs; 60 | } 61 | public void setNumberofChildJobs(int numberofChildJobs) { 62 | this.numberofChildJobs = numberofChildJobs; 63 | } 64 | public boolean isUsingDynamoDBLocal() { 65 | return isUsingDynamoDBLocal; 66 | } 67 | public void setUsingDynamoDBLocal(boolean isUsingDynamoDBLocal) { 68 | this.isUsingDynamoDBLocal = isUsingDynamoDBLocal; 69 | } 70 | public String getDynamoDBLocalURI() { 71 | return dynamoDBLocalURI; 72 | } 73 | public void setDynamoDBLocalURI(String dynamoDBLocalURI) { 74 | this.dynamoDBLocalURI = dynamoDBLocalURI; 75 | } 76 | public String getParentJobSummaryDDBTableName() { 77 | return parentJobSummaryDDBTableName; 78 | } 79 | public void setParentJobSummaryDDBTableName(String parentJobSummaryDDBTableName) { 80 | this.parentJobSummaryDDBTableName = parentJobSummaryDDBTableName; 81 | } 82 | public String getChildJobSummaryDDBTableName() { 83 | return childJobSummaryDDBTableName; 84 | } 85 | public void setChildJobSummaryDDBTableName(String childJobSummaryDDBTableName) { 86 | this.childJobSummaryDDBTableName = childJobSummaryDDBTableName; 87 | } 88 | public List getChildJobs() { 89 | return childJobs; 90 | } 91 | public void setChildJobs(List childJobs) { 92 | this.childJobs = childJobs; 93 | } 94 | public List getTargetKinesisStreams() { 95 | return targetKinesisStreams; 96 | } 97 | public void setTargetKinesisStreams(List targetKinesisStreams) { 98 | this.targetKinesisStreams = targetKinesisStreams; 99 | } 100 | 101 | 102 | 103 | 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/model/ChildJob.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking.model; 5 | 6 | /** 7 | * 8 | * @author Ravi Itha, Amazon Web Services, Inc. 9 | * 10 | */ 11 | public class ChildJob { 12 | 13 | private String jobName; 14 | private String jobId; 15 | private String parentJobId; 16 | private int numberofInteractions; 17 | private int batchSize; 18 | private int batchCadence; 19 | private int numberofBatches; 20 | private String batchStartTime; 21 | 22 | public String getJobName() { 23 | return jobName; 24 | } 25 | public void setJobName(String jobName) { 26 | this.jobName = jobName; 27 | } 28 | public int getNumberofInteractions() { 29 | return numberofInteractions; 30 | } 31 | public void setNumberofInteractions(int numberofInteractions) { 32 | this.numberofInteractions = numberofInteractions; 33 | } 34 | public int getBatchSize() { 35 | return batchSize; 36 | } 37 | public void setBatchSize(int batchSize) { 38 | this.batchSize = batchSize; 39 | } 40 | public int getBatchCadence() { 41 | return batchCadence; 42 | } 43 | public void setBatchCadence(int batchCadence) { 44 | this.batchCadence = batchCadence; 45 | } 46 | public int getNumberofBatches() { 47 | return numberofBatches; 48 | } 49 | public void setNumberofBatches(int numberofBatches) { 50 | this.numberofBatches = numberofBatches; 51 | } 52 | public String getBatchStartTime() { 53 | return batchStartTime; 54 | } 55 | public void setBatchStartTime(String batchStartTime) { 56 | this.batchStartTime = batchStartTime; 57 | } 58 | public String getJobId() { 59 | return jobId; 60 | } 61 | public void setJobId(String jobId) { 62 | this.jobId = jobId; 63 | } 64 | public String getParentJobId() { 65 | return parentJobId; 66 | } 67 | public void setParentJobId(String parentJobId) { 68 | this.parentJobId = parentJobId; 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/model/Event.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking.model; 5 | 6 | /** 7 | * This is a POJO represents an Event which will be batched 8 | * 9 | * Each batched event has a format per the file src/main/resources/event_sample.json 10 | * 11 | * @author Ravi Itha 12 | * 13 | */ 14 | public class Event { 15 | 16 | private String attr_1; 17 | private String attr_2; 18 | private String attr_3; 19 | private String attr_4; 20 | private String attr_5; 21 | private String attr_6; 22 | private long attr_7; 23 | private String attr_8; 24 | private String session_id; 25 | private long timestamp; 26 | 27 | public String getAttr_1() { 28 | return attr_1; 29 | } 30 | public void setAttr_1(String attr_1) { 31 | this.attr_1 = attr_1; 32 | } 33 | public String getAttr_2() { 34 | return attr_2; 35 | } 36 | public void setAttr_2(String attr_2) { 37 | this.attr_2 = attr_2; 38 | } 39 | public long getTimestamp() { 40 | return timestamp; 41 | } 42 | public void setTimestamp(long timestamp) { 43 | this.timestamp = timestamp; 44 | } 45 | public String getAttr_3() { 46 | return attr_3; 47 | } 48 | public void setAttr_3(String attr_3) { 49 | this.attr_3 = attr_3; 50 | } 51 | public String getAttr_4() { 52 | return attr_4; 53 | } 54 | public void setAttr_4(String attr_4) { 55 | this.attr_4 = attr_4; 56 | } 57 | public String getAttr_5() { 58 | return attr_5; 59 | } 60 | public void setAttr_5(String attr_5) { 61 | this.attr_5 = attr_5; 62 | } 63 | public String getSession_id() { 64 | return session_id; 65 | } 66 | public void setSession_id(String session_id) { 67 | this.session_id = session_id; 68 | } 69 | public String getAttr_6() { 70 | return attr_6; 71 | } 72 | public void setAttr_6(String attr_6) { 73 | this.attr_6 = attr_6; 74 | } 75 | public long getAttr_7() { 76 | return attr_7; 77 | } 78 | public void setAttr_7(long attr_7) { 79 | this.attr_7 = attr_7; 80 | } 81 | public String getAttr_8() { 82 | return attr_8; 83 | } 84 | public void setAttr_8(String attr_8) { 85 | this.attr_8 = attr_8; 86 | } 87 | 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/model/JobSchedule.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking.model; 5 | 6 | import org.quartz.JobDetail; 7 | import org.quartz.Trigger; 8 | 9 | /** 10 | * 11 | * @author Ravi Itha, Amazon Web Services, Inc. 12 | * 13 | */ 14 | public class JobSchedule { 15 | 16 | private JobDetail jobDetail; 17 | private Trigger trigger; 18 | 19 | public JobDetail getJobDetail() { 20 | return jobDetail; 21 | } 22 | public void setJobDetail(JobDetail jobDetail) { 23 | this.jobDetail = jobDetail; 24 | } 25 | public Trigger getTrigger() { 26 | return trigger; 27 | } 28 | public void setTrigger(Trigger trigger) { 29 | this.trigger = trigger; 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/util/DDBUtil.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking.util; 5 | 6 | import com.amazonaws.services.dynamodbv2.document.AttributeUpdate; 7 | import com.amazonaws.services.dynamodbv2.document.DynamoDB; 8 | import com.amazonaws.services.dynamodbv2.document.Item; 9 | import com.amazonaws.services.dynamodbv2.document.PutItemOutcome; 10 | import com.amazonaws.services.dynamodbv2.document.Table; 11 | import com.amazonaws.services.dynamodbv2.document.UpdateItemOutcome; 12 | import com.amazonaws.services.dynamodbv2.document.spec.UpdateItemSpec; 13 | 14 | public class DDBUtil { 15 | 16 | /** 17 | * This method updates an item in DynamoDB table using Primary Hash Key and Range Key 18 | * @param dynamoDBClient 19 | * @param dynamoDBTblName 20 | * @param jobName 21 | * @param jobId 22 | * @param jobFinishTime 23 | * @param jobStatus 24 | * @return 25 | */ 26 | public static boolean updateChildJobStatus(DynamoDB dynamoDBClient, String dynamoDBTblName, String jobName, 27 | String jobId, String jobFinishTime, String jobStatus) { 28 | boolean itemUpdated = false; 29 | Table table = dynamoDBClient.getTable(dynamoDBTblName); 30 | 31 | AttributeUpdate attributeUpdate = new AttributeUpdate("strAttr").put("Completed"); 32 | UpdateItemSpec updateItemSpec = new UpdateItemSpec().withPrimaryKey("job_name", jobName, "job_id", jobId) 33 | .withAttributeUpdate(attributeUpdate); 34 | UpdateItemOutcome outcome = table.updateItem(updateItemSpec); 35 | int statusCode = outcome.getUpdateItemResult().getSdkHttpMetadata().getHttpStatusCode(); 36 | if (statusCode == 200) { 37 | itemUpdated = true; 38 | } 39 | return itemUpdated; 40 | } 41 | 42 | /** 43 | * This method inserts an item to DynamoDB Table 44 | * @param dynamoDBClient 45 | * @param dynamoDBTblName 46 | * @param jobName 47 | * @param jobId 48 | * @param numInteractionsProcessed 49 | * @param jobStartTime 50 | * @param jobStatus 51 | * @return 52 | */ 53 | public static boolean insertParentJobStatus(DynamoDB dynamoDBClient, String dynamoDBTblName, String jobName, String jobId, 54 | int numInteractionsProcessed, String jobStartTime, String jobStatus) { 55 | 56 | boolean itemInserted = false; 57 | Table table = dynamoDBClient.getTable(dynamoDBTblName); 58 | Item item = new Item().withPrimaryKey("job_name", jobName) 59 | .withString("job_id", jobId) 60 | .withString("job_status", jobStatus) 61 | .withNumber("number_of_interactions_processed", numInteractionsProcessed) 62 | .withString("job_starttime", jobStartTime); 63 | try { 64 | PutItemOutcome outcome = table.putItem(item); 65 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode(); 66 | if (statusCode == 200) { 67 | itemInserted = true; 68 | } 69 | } catch (Exception e) { 70 | e.printStackTrace(); 71 | System.out.println("Item could not be inserted to DynamoDB table."); 72 | } 73 | return itemInserted; 74 | } 75 | 76 | /** 77 | * This method inserts an item to DynamoDB Table 78 | * @param dynamoDBClient 79 | * @param dynamoDBTblName 80 | * @param jobName 81 | * @param jobId 82 | * @param parentJobId 83 | * @param numInteractionsProcessed 84 | * @param jobStartTime 85 | * @param jobStatus 86 | * @return 87 | */ 88 | public static boolean insertChildJobStatus(DynamoDB dynamoDBClient, String dynamoDBTblName, String jobName, String jobId, String parentJobId, 89 | int numInteractionsProcessed, String jobStartTime, String jobStatus) { 90 | 91 | boolean itemInserted = false; 92 | Table table = dynamoDBClient.getTable(dynamoDBTblName); 93 | Item item = new Item().withPrimaryKey("job_name", jobName) 94 | .withString("job_id", jobId) 95 | .withString("parent_job_id", parentJobId) 96 | .withString("job_status", jobStatus) 97 | .withNumber("number_of_interactions_processed", numInteractionsProcessed) 98 | .withString("job_starttime", jobStartTime); 99 | try { 100 | PutItemOutcome outcome = table.putItem(item); 101 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode(); 102 | if (statusCode == 200) { 103 | itemInserted = true; 104 | } 105 | } catch (Exception e) { 106 | e.printStackTrace(); 107 | System.out.println("Item could not be inserted to DynamoDB table."); 108 | } 109 | return itemInserted; 110 | } 111 | 112 | /** 113 | * This method inserts an item to DynamoDB Table 114 | * @param dynamoDBClient 115 | * @param dynamoDBTblName 116 | * @param jobName 117 | * @param jobTriggeringId 118 | * @param targetStream 119 | * @param interactionId 120 | * @param batchSize 121 | * @param executionTime 122 | * @return 123 | */ 124 | public static boolean insertChildJobDetailedStatus(DynamoDB dynamoDBClient, String dynamoDBTblName, String jobName, 125 | String jobTriggeringId, String targetStream, String interactionId, int batchSize, long executionTime) { 126 | 127 | boolean itemInserted = false; 128 | Table table = dynamoDBClient.getTable(dynamoDBTblName); 129 | Item item = new Item().withPrimaryKey("job_name", jobName).withString("job_run_id", jobTriggeringId + "-" + System.currentTimeMillis()) 130 | .withString("interaction_id", interactionId).withString("interaction_id", interactionId) 131 | .withNumber("batch_size", batchSize).withString("stream_name", targetStream); 132 | try { 133 | PutItemOutcome outcome = table.putItem(item); 134 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode(); 135 | if (statusCode == 200) { 136 | itemInserted = true; 137 | } 138 | } catch (Exception e) { 139 | e.printStackTrace(); 140 | System.out.println("Item could not be inserted to DynamoDB table."); 141 | } 142 | return itemInserted; 143 | } 144 | 145 | } 146 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/util/KDSProducerUtil.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking.util; 5 | 6 | import java.nio.ByteBuffer; 7 | import java.util.ArrayList; 8 | import java.util.Collections; 9 | import java.util.Iterator; 10 | import java.util.List; 11 | import java.util.StringTokenizer; 12 | import java.util.UUID; 13 | import java.util.stream.Collectors; 14 | 15 | import com.amazonaws.kda.flink.benchmarking.model.Event; 16 | import com.amazonaws.services.kinesis.AmazonKinesis; 17 | import com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException; 18 | import com.amazonaws.services.kinesis.model.PutRecordRequest; 19 | import com.amazonaws.services.kinesis.model.PutRecordsRequest; 20 | import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry; 21 | import com.amazonaws.services.kinesis.model.PutRecordsResult; 22 | import com.amazonaws.services.kinesis.model.PutRecordsResultEntry; 23 | import com.google.common.collect.Iterables; 24 | import com.google.common.collect.Lists; 25 | import com.google.gson.Gson; 26 | 27 | public class KDSProducerUtil { 28 | 29 | /** 30 | * This method creates sample InteractionIds 31 | * 32 | * @param numInteractions 33 | * @return 34 | */ 35 | public static List createInteractions(int numInteractions) { 36 | List interactionList = new ArrayList(); 37 | for (int i = 0; i < numInteractions; i++) { 38 | String interactionId = UUID.randomUUID().toString(); 39 | // System.out.printf("Interaction_id: %s \n", interactionId); 40 | interactionList.add(interactionId); 41 | } 42 | return interactionList; 43 | } 44 | 45 | public static Iterator createRotatableInteractions(int numInteractions) { 46 | List interactionList = new ArrayList(); 47 | for (int i = 0; i < numInteractions; i++) { 48 | String interactionId = UUID.randomUUID().toString(); 49 | System.out.printf("Interaction_id: %s \n", interactionId); 50 | interactionList.add(interactionId); 51 | } 52 | return Iterables.cycle(interactionList).iterator(); 53 | } 54 | 55 | /** 56 | * This method create a list of sample events 57 | * 58 | * @param eventList 59 | * @param numEvents 60 | * @param interactionId 61 | * @return 62 | */ 63 | public static List createEvents(List eventList, int numEvents, String sessionId) { 64 | // clear the eventList and build it up again! 65 | eventList.clear(); 66 | long createTime = System.currentTimeMillis(); 67 | 68 | for (int i = 1; i < numEvents + 1; i++) { 69 | Event event = new Event(); 70 | event.setAttr_1("my_attr_1_" + i); 71 | event.setAttr_2("my_attr_2_" + i); 72 | event.setAttr_3("my_attr_3_" + i); 73 | event.setAttr_4("my_attr_4_" + i); 74 | event.setAttr_5("my_attr_5_" + i); 75 | event.setAttr_6(sessionId); 76 | event.setAttr_7(createTime); 77 | event.setSession_id(sessionId); 78 | event.setTimestamp(createTime); 79 | eventList.add(new Gson().toJson(event)); 80 | } 81 | return eventList; 82 | } 83 | 84 | /** 85 | * This method demonstrates writing a single messages to Kinesis Data Stream 86 | * using PutRecord API. 87 | * 88 | * Partition key is needed and it can be an empty string. When both Partition 89 | * Key and explicit Hash Key are set, explicit Hash Key takes precedence. 90 | * Calling hashKeyIterator.next() provides a Hash Key belongs to a shard. 91 | * 92 | * Retry logic: PutRecord throws ProvisionedThroughputExceededException when a 93 | * stream is throttled. The retry logic used here handles the exception and 94 | * re-writes the failed record. 95 | * 96 | * @param record 97 | * @param streamName 98 | * @param kinesis 99 | */ 100 | public static void writeSingleMessageToKinesis(String record, String streamName, AmazonKinesis kinesis, String startingHashKey) { 101 | PutRecordRequest putRecReq = new PutRecordRequest(); 102 | try { 103 | putRecReq.setStreamName(streamName); 104 | putRecReq.setData(ByteBuffer.wrap(record.getBytes())); 105 | putRecReq.setExplicitHashKey(startingHashKey); 106 | putRecReq.setPartitionKey("reqiredButHasNoEffect-when-setExplicitHashKey-isUsed"); 107 | kinesis.putRecord(putRecReq); 108 | } catch (ProvisionedThroughputExceededException exception) { 109 | try { 110 | System.out.println("ERROR: Throughput Exception Thrown."); 111 | exception.printStackTrace(); 112 | System.out.println("Retrying after a short delay."); 113 | Thread.sleep(100); 114 | kinesis.putRecord(putRecReq); 115 | } catch (ProvisionedThroughputExceededException e) { 116 | e.printStackTrace(); 117 | System.out.println("Kinesis Put operation failed after re-try due to second consecutive " 118 | + "ProvisionedThroughputExceededException"); 119 | } catch (Exception e) { 120 | e.printStackTrace(); 121 | System.out.println("Exception thrown while writing a record to Kinesis."); 122 | } 123 | } catch (Exception e) { 124 | e.printStackTrace(); 125 | System.out.println("Exception thrown while writing a record to Kinesis."); 126 | } 127 | } 128 | 129 | public static void writeMessagesToKinesis(AmazonKinesis kinesis, String streamName, List recordList, Iterator hashKeyIterator) { 130 | PutRecordsRequest putRecsReq = new PutRecordsRequest(); 131 | List putRecReqEntryList = new ArrayList(); 132 | PutRecordsResult putRecsRes = new PutRecordsResult(); 133 | // Make sure you write messages in a batch of 500 messages 134 | List> listofSmallerLists = Lists.partition(recordList, 500); 135 | for (List smallerList : listofSmallerLists) { 136 | putRecReqEntryList.clear(); 137 | for (String message : smallerList) { 138 | PutRecordsRequestEntry putRecsReqEntry = new PutRecordsRequestEntry(); 139 | putRecsReqEntry.setData(ByteBuffer.wrap(message.getBytes())); 140 | putRecsReqEntry.setPartitionKey("reqiredButHasNoEffect-when-setExplicitHashKey-isUsed"); 141 | putRecsReqEntry.setExplicitHashKey(hashKeyIterator.next()); 142 | putRecReqEntryList.add(putRecsReqEntry); 143 | } 144 | try { 145 | putRecsReq.setStreamName(streamName); 146 | putRecsReq.setRecords(putRecReqEntryList); 147 | putRecsRes = kinesis.putRecords(putRecsReq); 148 | while (putRecsRes.getFailedRecordCount() > 0) { 149 | System.out.println("Processing rejected records"); 150 | // TODO: For simplicity, the backoff implemented as a constant 100ms sleep 151 | // For production-grade, consider using CoralRetry's Exponential Jittered 152 | // Backoff retry strategy 153 | // Ref: 154 | // https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ 155 | Thread.sleep(100); 156 | final List failedRecordsList = new ArrayList(); 157 | final List putRecsResEntryList = putRecsRes.getRecords(); 158 | for (int i = 0; i < putRecsResEntryList.size(); i++) { 159 | final PutRecordsRequestEntry putRecordReqEntry = putRecReqEntryList.get(i); 160 | final PutRecordsResultEntry putRecordsResEntry = putRecsResEntryList.get(i); 161 | if (putRecordsResEntry.getErrorCode() != null) { 162 | failedRecordsList.add(putRecordReqEntry); 163 | } 164 | } 165 | putRecReqEntryList = failedRecordsList; 166 | putRecsReq.setRecords(putRecReqEntryList); 167 | putRecsRes = kinesis.putRecords(putRecsReq); 168 | } // end of while loop 169 | System.out.println("Number of messages written: " + smallerList.size()); 170 | } catch (Exception e) { 171 | System.out.println("Exception in Kinesis Batch Insert: " + e.getMessage()); 172 | } 173 | } 174 | } 175 | 176 | public static List tokenizeStrings(String str, String separator) { 177 | List tokenList = Collections.list(new StringTokenizer(str, separator)).stream() 178 | .map(token -> (String) token).collect(Collectors.toList()); 179 | return tokenList; 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /src/main/java/com/amazonaws/kda/flink/benchmarking/util/KinesisStreamUtil.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.flink.benchmarking.util; 5 | 6 | import java.util.List; 7 | import java.util.Optional; 8 | 9 | import com.amazonaws.services.kinesis.AmazonKinesis; 10 | import com.amazonaws.services.kinesis.model.ListShardsRequest; 11 | import com.amazonaws.services.kinesis.model.ListShardsResult; 12 | import com.amazonaws.services.kinesis.model.Shard; 13 | import com.google.common.collect.Lists; 14 | 15 | /** 16 | *

17 | * This is a utility class with methods to fetch details about a Kinesis Stream. 18 | * The shard details include the following: shard id, starting Hash Key, and 19 | * ending Hash Key. 20 | *

21 | * 22 | * @author Ravi Itha, Amazon Web Service, Inc. 23 | * 24 | */ 25 | public class KinesisStreamUtil { 26 | 27 | /** 28 | * This method describes a Kinesis Data Stream, fetches starting Hash Key for 29 | * all the active shards, and creates a list based on those keys. 30 | * 31 | * @param streamName 32 | * @param region 33 | * @return List 34 | */ 35 | public static List getHashKeysForOpenShards(AmazonKinesis kinesis, String streamName) { 36 | String nextToken = null; 37 | List hashKeyList = Lists.newArrayList(); 38 | // prepare ListShardsRequest 39 | ListShardsRequest listShardsRequest = new ListShardsRequest(); 40 | listShardsRequest.setStreamName(streamName); 41 | // get shards 42 | ListShardsResult listShardResult = kinesis.listShards(listShardsRequest); 43 | List shardList = listShardResult.getShards(); 44 | for (Shard s : shardList) { 45 | if (s.getSequenceNumberRange().getEndingSequenceNumber() == null) { 46 | hashKeyList.add(s.getHashKeyRange().getStartingHashKey()); 47 | } 48 | } 49 | // get 'next token' from ListShardsResult and check its value. 50 | // if it is not null, call listShards until you get a null. 51 | // hint: paginating all shards. 52 | nextToken = listShardResult.getNextToken(); 53 | if (Optional.ofNullable(nextToken).isPresent()) { 54 | do { 55 | // creating a new ListShardsRequest using next token alone. 56 | listShardsRequest = new ListShardsRequest(); 57 | listShardsRequest.setNextToken(nextToken); 58 | listShardResult = kinesis.listShards(listShardsRequest); 59 | shardList = listShardResult.getShards(); 60 | for (Shard s : shardList) { 61 | if (s.getSequenceNumberRange().getEndingSequenceNumber() == null) { 62 | hashKeyList.add(s.getHashKeyRange().getStartingHashKey()); 63 | } 64 | } 65 | nextToken = listShardResult.getNextToken(); 66 | } while (Optional.ofNullable(nextToken).isPresent()); 67 | } 68 | return hashKeyList; 69 | } 70 | 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/main/resources/Amazon-kda-flink-benchmarking-utility-Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-kinesis-data-analytics-flink-benchmarking-utility/ff7af56f9ff19f223c926e018cb449538e1e09c2/src/main/resources/Amazon-kda-flink-benchmarking-utility-Architecture.png -------------------------------------------------------------------------------- /src/main/resources/Amazon-kda-flink-benchmarking-utility-Detailed-Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-kinesis-data-analytics-flink-benchmarking-utility/ff7af56f9ff19f223c926e018cb449538e1e09c2/src/main/resources/Amazon-kda-flink-benchmarking-utility-Detailed-Architecture.png -------------------------------------------------------------------------------- /src/main/resources/amazon-kda-flink-benchmarking-utility.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #This script runs Kinesis Data Analytics Flink Benchmarking Utility 4 | 5 | export TZ='America/Chicago' 6 | echo 'Running Kinesis Data Generator Application' @ $(date) 7 | java -jar /home/ec2-user/kda-flink-benchmarking-utility/amazon-kinesis-data-analytics-flink-benchmarking-utility-0.1.jar \ 8 | /home/ec2-user/kda-flink-benchmarking-utility/benchmarking_specs.json >> /home/ec2-user/kda-flink-benchmarking-utility/logs_new/kdg_log_$(date '+%Y-%m-%d-%H-%M-%S').log -------------------------------------------------------------------------------- /src/main/resources/benchmarking_specs.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "amazon_kda_flink_benchmarking_utility", 3 | "jobDurationInMinutes": 65, 4 | "region": "us-east-1", 5 | "targetKinesisStreams": [ 6 | "kda_flink_benchmarking_kinesis_stream" 7 | ], 8 | "isUsingDynamoDBLocal": true, 9 | "dynamoDBLocalURI": "http://localhost:8000", 10 | "parentJobSummaryDDBTableName": "kda_flink_benchmarking_parent_job_summary", 11 | "childJobSummaryDDBTableName": "kda_flink_benchmarking_child_job_summary", 12 | "childJobs": [ 13 | { 14 | "jobName": "60_Min", 15 | "numberofInteractions": 4, 16 | "batchSize": 20, 17 | "batchCadence": 72, 18 | "numberofBatches": 50 19 | }, 20 | { 21 | "jobName": "40_Min", 22 | "numberofInteractions": 4, 23 | "batchSize": 20, 24 | "batchCadence": 48, 25 | "numberofBatches": 50 26 | }, 27 | { 28 | "jobName": "30_Min", 29 | "numberofInteractions": 6, 30 | "batchSize": 30, 31 | "batchCadence": 18, 32 | "numberofBatches": 100 33 | }, 34 | { 35 | "jobName": "25_Min", 36 | "numberofInteractions": 6, 37 | "batchSize": 30, 38 | "batchCadence": 15, 39 | "numberofBatches": 100 40 | }, 41 | { 42 | "jobName": "20_Min", 43 | "numberofInteractions": 8, 44 | "batchSize": 40, 45 | "batchCadence": 10, 46 | "numberofBatches": 125 47 | }, 48 | { 49 | "jobName": "15_Min", 50 | "numberofInteractions": 8, 51 | "batchSize": 40, 52 | "batchCadence": 7, 53 | "numberofBatches": 125 54 | }, 55 | { 56 | "jobName": "10_Min", 57 | "numberofInteractions": 16, 58 | "batchSize": 50, 59 | "batchCadence": 4, 60 | "numberofBatches": 150 61 | }, 62 | { 63 | "jobName": "5_Min", 64 | "numberofInteractions": 16, 65 | "batchSize": 50, 66 | "batchCadence": 2, 67 | "numberofBatches": 150 68 | } 69 | ] 70 | } 71 | -------------------------------------------------------------------------------- /src/main/resources/create_table_child_job_summary.json: -------------------------------------------------------------------------------- 1 | { 2 | "TableName": "kda_flink_benchmarking_child_job_summary", 3 | "KeySchema": [ 4 | { "AttributeName": "job_name", "KeyType": "HASH" }, 5 | { "AttributeName": "job_id", "KeyType": "RANGE" } 6 | ], 7 | "AttributeDefinitions": [ 8 | { "AttributeName": "job_name", "AttributeType": "S" }, 9 | { "AttributeName": "job_id", "AttributeType": "S" } 10 | ], 11 | "ProvisionedThroughput": { 12 | "ReadCapacityUnits": 5, 13 | "WriteCapacityUnits": 5 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/resources/create_table_kinesis_stream.json: -------------------------------------------------------------------------------- 1 | { 2 | "TableName": "kda_flink_benchmarking_kinesis_stream", 3 | "KeySchema": [ 4 | { "AttributeName": "job_name", "KeyType": "HASH" }, 5 | { "AttributeName": "job_run_id", "KeyType": "RANGE" } 6 | ], 7 | "AttributeDefinitions": [ 8 | { "AttributeName": "job_name", "AttributeType": "S" }, 9 | { "AttributeName": "job_run_id", "AttributeType": "S" } 10 | ], 11 | "ProvisionedThroughput": { 12 | "ReadCapacityUnits": 5, 13 | "WriteCapacityUnits": 5 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/resources/create_table_parent_job_summary.json: -------------------------------------------------------------------------------- 1 | { 2 | "TableName": "kda_flink_benchmarking_parent_job_summary", 3 | "KeySchema": [ 4 | { "AttributeName": "job_name", "KeyType": "HASH" }, 5 | { "AttributeName": "job_id", "KeyType": "RANGE" } 6 | ], 7 | "AttributeDefinitions": [ 8 | { "AttributeName": "job_name", "AttributeType": "S" }, 9 | { "AttributeName": "job_id", "AttributeType": "S" } 10 | ], 11 | "ProvisionedThroughput": { 12 | "ReadCapacityUnits": 5, 13 | "WriteCapacityUnits": 5 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/resources/event_sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "attr_1": "my_attr_1", 3 | "attr_2": "my_attr_2", 4 | "attr_3": "attr_3", 5 | "attr_4": "my_attr_4", 6 | "attr_5": "bdf44161-54fa-4693-8a1f-6bd0050f671b", 7 | "attr_6": "my_attr_6", 8 | "attr_7": 1598994524140, 9 | "attr_8": "my_attr_8", 10 | "timestamp": 1598994524140, 11 | "session_id": "bdf44161-54fa-4693-8a1f-6bd0050f671b" 12 | } 13 | -------------------------------------------------------------------------------- /src/text/java/com/amazonaws/kda/benchmarking/util/GetSampleData.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | package com.amazonaws.kda.benchmarking.util; 5 | 6 | import java.util.Map; 7 | 8 | import com.amazonaws.client.builder.AwsClientBuilder; 9 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; 10 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; 11 | import com.amazonaws.services.dynamodbv2.model.AttributeValue; 12 | import com.amazonaws.services.dynamodbv2.model.ScanRequest; 13 | import com.amazonaws.services.dynamodbv2.model.ScanResult; 14 | 15 | public class GetSampleData { 16 | 17 | public static void main(String[] args) { 18 | AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withEndpointConfiguration( 19 | new AwsClientBuilder.EndpointConfiguration("http://localhost:8000", "us-east-1")).build(); 20 | 21 | // String tableName = "kda_flink_perf_benchmarking_with_s3"; 22 | // String tableName = "kda_flink_perf_benchmarking_without_s3"; 23 | // String tableName = "kda_flink_perf_benchmarking_child_job_summary"; 24 | String tableName = "kda_flink_perf_benchmarking_parent_job_summary"; 25 | 26 | try { 27 | ScanRequest scanRequest = new ScanRequest().withTableName(tableName); 28 | ScanResult result = client.scan(scanRequest); 29 | 30 | for (Map item : result.getItems()) { 31 | Map attributeList = item; 32 | for (Map.Entry item1 : attributeList.entrySet()) { 33 | String attributeName = item1.getKey(); 34 | AttributeValue value = item1.getValue(); 35 | 36 | // if(Optional.ofNullable(value.getN()).isPresent()) 37 | 38 | 39 | 40 | System.out.print(attributeName + ": " + (value.getS() == null ? "N=[" + value.getN() + "] " : "S=[" + value.getS() + "] ")); 41 | } 42 | // Move to next line 43 | System.out.println(); 44 | } 45 | } catch (Exception e) { 46 | System.err.println("Unable to create table: "); 47 | System.err.println(e.getMessage()); 48 | } 49 | 50 | } 51 | 52 | } 53 | --------------------------------------------------------------------------------