tokenList = Collections.list(new StringTokenizer(str, separator)).stream()
178 | .map(token -> (String) token).collect(Collectors.toList());
179 | return tokenList;
180 | }
181 | }
182 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/kda/flink/benchmarking/util/KinesisStreamUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.kda.flink.benchmarking.util;
5 |
6 | import java.util.List;
7 | import java.util.Optional;
8 |
9 | import com.amazonaws.services.kinesis.AmazonKinesis;
10 | import com.amazonaws.services.kinesis.model.ListShardsRequest;
11 | import com.amazonaws.services.kinesis.model.ListShardsResult;
12 | import com.amazonaws.services.kinesis.model.Shard;
13 | import com.google.common.collect.Lists;
14 |
15 | /**
16 | *
17 | * This is a utility class with methods to fetch details about a Kinesis Stream.
18 | * The shard details include the following: shard id, starting Hash Key, and
19 | * ending Hash Key.
20 | *
21 | *
22 | * @author Ravi Itha, Amazon Web Service, Inc.
23 | *
24 | */
25 | public class KinesisStreamUtil {
26 |
27 | /**
28 | * This method describes a Kinesis Data Stream, fetches starting Hash Key for
29 | * all the active shards, and creates a list based on those keys.
30 | *
31 | * @param streamName
32 | * @param region
33 | * @return List
34 | */
35 | public static List getHashKeysForOpenShards(AmazonKinesis kinesis, String streamName) {
36 | String nextToken = null;
37 | List hashKeyList = Lists.newArrayList();
38 | // prepare ListShardsRequest
39 | ListShardsRequest listShardsRequest = new ListShardsRequest();
40 | listShardsRequest.setStreamName(streamName);
41 | // get shards
42 | ListShardsResult listShardResult = kinesis.listShards(listShardsRequest);
43 | List shardList = listShardResult.getShards();
44 | for (Shard s : shardList) {
45 | if (s.getSequenceNumberRange().getEndingSequenceNumber() == null) {
46 | hashKeyList.add(s.getHashKeyRange().getStartingHashKey());
47 | }
48 | }
49 | // get 'next token' from ListShardsResult and check its value.
50 | // if it is not null, call listShards until you get a null.
51 | // hint: paginating all shards.
52 | nextToken = listShardResult.getNextToken();
53 | if (Optional.ofNullable(nextToken).isPresent()) {
54 | do {
55 | // creating a new ListShardsRequest using next token alone.
56 | listShardsRequest = new ListShardsRequest();
57 | listShardsRequest.setNextToken(nextToken);
58 | listShardResult = kinesis.listShards(listShardsRequest);
59 | shardList = listShardResult.getShards();
60 | for (Shard s : shardList) {
61 | if (s.getSequenceNumberRange().getEndingSequenceNumber() == null) {
62 | hashKeyList.add(s.getHashKeyRange().getStartingHashKey());
63 | }
64 | }
65 | nextToken = listShardResult.getNextToken();
66 | } while (Optional.ofNullable(nextToken).isPresent());
67 | }
68 | return hashKeyList;
69 | }
70 |
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/src/main/resources/Amazon-kda-flink-benchmarking-utility-Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-kinesis-data-analytics-flink-benchmarking-utility/ff7af56f9ff19f223c926e018cb449538e1e09c2/src/main/resources/Amazon-kda-flink-benchmarking-utility-Architecture.png
--------------------------------------------------------------------------------
/src/main/resources/Amazon-kda-flink-benchmarking-utility-Detailed-Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-kinesis-data-analytics-flink-benchmarking-utility/ff7af56f9ff19f223c926e018cb449538e1e09c2/src/main/resources/Amazon-kda-flink-benchmarking-utility-Detailed-Architecture.png
--------------------------------------------------------------------------------
/src/main/resources/amazon-kda-flink-benchmarking-utility.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #This script runs Kinesis Data Analytics Flink Benchmarking Utility
4 |
5 | export TZ='America/Chicago'
6 | echo 'Running Kinesis Data Generator Application' @ $(date)
7 | java -jar /home/ec2-user/kda-flink-benchmarking-utility/amazon-kinesis-data-analytics-flink-benchmarking-utility-0.1.jar \
8 | /home/ec2-user/kda-flink-benchmarking-utility/benchmarking_specs.json >> /home/ec2-user/kda-flink-benchmarking-utility/logs_new/kdg_log_$(date '+%Y-%m-%d-%H-%M-%S').log
--------------------------------------------------------------------------------
/src/main/resources/benchmarking_specs.json:
--------------------------------------------------------------------------------
1 | {
2 | "jobName": "amazon_kda_flink_benchmarking_utility",
3 | "jobDurationInMinutes": 65,
4 | "region": "us-east-1",
5 | "targetKinesisStreams": [
6 | "kda_flink_benchmarking_kinesis_stream"
7 | ],
8 | "isUsingDynamoDBLocal": true,
9 | "dynamoDBLocalURI": "http://localhost:8000",
10 | "parentJobSummaryDDBTableName": "kda_flink_benchmarking_parent_job_summary",
11 | "childJobSummaryDDBTableName": "kda_flink_benchmarking_child_job_summary",
12 | "childJobs": [
13 | {
14 | "jobName": "60_Min",
15 | "numberofInteractions": 4,
16 | "batchSize": 20,
17 | "batchCadence": 72,
18 | "numberofBatches": 50
19 | },
20 | {
21 | "jobName": "40_Min",
22 | "numberofInteractions": 4,
23 | "batchSize": 20,
24 | "batchCadence": 48,
25 | "numberofBatches": 50
26 | },
27 | {
28 | "jobName": "30_Min",
29 | "numberofInteractions": 6,
30 | "batchSize": 30,
31 | "batchCadence": 18,
32 | "numberofBatches": 100
33 | },
34 | {
35 | "jobName": "25_Min",
36 | "numberofInteractions": 6,
37 | "batchSize": 30,
38 | "batchCadence": 15,
39 | "numberofBatches": 100
40 | },
41 | {
42 | "jobName": "20_Min",
43 | "numberofInteractions": 8,
44 | "batchSize": 40,
45 | "batchCadence": 10,
46 | "numberofBatches": 125
47 | },
48 | {
49 | "jobName": "15_Min",
50 | "numberofInteractions": 8,
51 | "batchSize": 40,
52 | "batchCadence": 7,
53 | "numberofBatches": 125
54 | },
55 | {
56 | "jobName": "10_Min",
57 | "numberofInteractions": 16,
58 | "batchSize": 50,
59 | "batchCadence": 4,
60 | "numberofBatches": 150
61 | },
62 | {
63 | "jobName": "5_Min",
64 | "numberofInteractions": 16,
65 | "batchSize": 50,
66 | "batchCadence": 2,
67 | "numberofBatches": 150
68 | }
69 | ]
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/resources/create_table_child_job_summary.json:
--------------------------------------------------------------------------------
1 | {
2 | "TableName": "kda_flink_benchmarking_child_job_summary",
3 | "KeySchema": [
4 | { "AttributeName": "job_name", "KeyType": "HASH" },
5 | { "AttributeName": "job_id", "KeyType": "RANGE" }
6 | ],
7 | "AttributeDefinitions": [
8 | { "AttributeName": "job_name", "AttributeType": "S" },
9 | { "AttributeName": "job_id", "AttributeType": "S" }
10 | ],
11 | "ProvisionedThroughput": {
12 | "ReadCapacityUnits": 5,
13 | "WriteCapacityUnits": 5
14 | }
15 | }
--------------------------------------------------------------------------------
/src/main/resources/create_table_kinesis_stream.json:
--------------------------------------------------------------------------------
1 | {
2 | "TableName": "kda_flink_benchmarking_kinesis_stream",
3 | "KeySchema": [
4 | { "AttributeName": "job_name", "KeyType": "HASH" },
5 | { "AttributeName": "job_run_id", "KeyType": "RANGE" }
6 | ],
7 | "AttributeDefinitions": [
8 | { "AttributeName": "job_name", "AttributeType": "S" },
9 | { "AttributeName": "job_run_id", "AttributeType": "S" }
10 | ],
11 | "ProvisionedThroughput": {
12 | "ReadCapacityUnits": 5,
13 | "WriteCapacityUnits": 5
14 | }
15 | }
--------------------------------------------------------------------------------
/src/main/resources/create_table_parent_job_summary.json:
--------------------------------------------------------------------------------
1 | {
2 | "TableName": "kda_flink_benchmarking_parent_job_summary",
3 | "KeySchema": [
4 | { "AttributeName": "job_name", "KeyType": "HASH" },
5 | { "AttributeName": "job_id", "KeyType": "RANGE" }
6 | ],
7 | "AttributeDefinitions": [
8 | { "AttributeName": "job_name", "AttributeType": "S" },
9 | { "AttributeName": "job_id", "AttributeType": "S" }
10 | ],
11 | "ProvisionedThroughput": {
12 | "ReadCapacityUnits": 5,
13 | "WriteCapacityUnits": 5
14 | }
15 | }
--------------------------------------------------------------------------------
/src/main/resources/event_sample.json:
--------------------------------------------------------------------------------
1 | {
2 | "attr_1": "my_attr_1",
3 | "attr_2": "my_attr_2",
4 | "attr_3": "attr_3",
5 | "attr_4": "my_attr_4",
6 | "attr_5": "bdf44161-54fa-4693-8a1f-6bd0050f671b",
7 | "attr_6": "my_attr_6",
8 | "attr_7": 1598994524140,
9 | "attr_8": "my_attr_8",
10 | "timestamp": 1598994524140,
11 | "session_id": "bdf44161-54fa-4693-8a1f-6bd0050f671b"
12 | }
13 |
--------------------------------------------------------------------------------
/src/text/java/com/amazonaws/kda/benchmarking/util/GetSampleData.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.kda.benchmarking.util;
5 |
6 | import java.util.Map;
7 |
8 | import com.amazonaws.client.builder.AwsClientBuilder;
9 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
10 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
11 | import com.amazonaws.services.dynamodbv2.model.AttributeValue;
12 | import com.amazonaws.services.dynamodbv2.model.ScanRequest;
13 | import com.amazonaws.services.dynamodbv2.model.ScanResult;
14 |
15 | public class GetSampleData {
16 |
17 | public static void main(String[] args) {
18 | AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withEndpointConfiguration(
19 | new AwsClientBuilder.EndpointConfiguration("http://localhost:8000", "us-east-1")).build();
20 |
21 | // String tableName = "kda_flink_perf_benchmarking_with_s3";
22 | // String tableName = "kda_flink_perf_benchmarking_without_s3";
23 | // String tableName = "kda_flink_perf_benchmarking_child_job_summary";
24 | String tableName = "kda_flink_perf_benchmarking_parent_job_summary";
25 |
26 | try {
27 | ScanRequest scanRequest = new ScanRequest().withTableName(tableName);
28 | ScanResult result = client.scan(scanRequest);
29 |
30 | for (Map item : result.getItems()) {
31 | Map attributeList = item;
32 | for (Map.Entry item1 : attributeList.entrySet()) {
33 | String attributeName = item1.getKey();
34 | AttributeValue value = item1.getValue();
35 |
36 | // if(Optional.ofNullable(value.getN()).isPresent())
37 |
38 |
39 |
40 | System.out.print(attributeName + ": " + (value.getS() == null ? "N=[" + value.getN() + "] " : "S=[" + value.getS() + "] "));
41 | }
42 | // Move to next line
43 | System.out.println();
44 | }
45 | } catch (Exception e) {
46 | System.err.println("Unable to create table: ");
47 | System.err.println(e.getMessage());
48 | }
49 |
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------