├── LICENSE ├── README.md ├── benchmark ├── .gitignore ├── README.md ├── build.sbt ├── common-benchmark │ └── src │ │ ├── main │ │ ├── resources │ │ │ ├── aws.conf │ │ │ ├── commonsettings.conf │ │ │ ├── docker.conf │ │ │ ├── local.conf │ │ │ └── logback.xml │ │ └── scala │ │ │ └── common │ │ │ ├── benchmark │ │ │ ├── DataScienceMaths.scala │ │ │ ├── ObservationTypes.scala │ │ │ ├── input │ │ │ │ └── Parsers.scala │ │ │ ├── output │ │ │ │ └── JsonPrinter.scala │ │ │ └── stages │ │ │ │ ├── StatefulStagesTemplate.scala │ │ │ │ └── StatelessStagesTemplate.scala │ │ │ └── config │ │ │ ├── GeneralConfig.scala │ │ │ ├── JobExecutionMode.scala │ │ │ └── LastStage.scala │ │ └── test │ │ └── scala │ │ └── common │ │ ├── benchmark │ │ ├── TestObservationTypes.scala │ │ └── TestParsers.scala │ │ └── utils │ │ └── TestObservations.scala ├── docs │ └── images │ │ ├── architecture.png │ │ └── processing_flow.png ├── flink-benchmark │ └── src │ │ ├── main │ │ ├── resources │ │ │ └── flink.conf │ │ └── scala │ │ │ └── flink │ │ │ └── benchmark │ │ │ ├── BenchmarkSettingsForFlink.scala │ │ │ ├── FlinkTrafficAnalyzer.scala │ │ │ └── stages │ │ │ ├── StatefulStages.scala │ │ │ ├── StatelessStages.scala │ │ │ └── customObjectSerdes.scala │ │ └── test │ │ └── scala │ │ └── flink │ │ └── benchmark │ │ ├── stages │ │ ├── AggregationStageTest.scala │ │ ├── NonIncrementalWindowAfterParsingStageTest.scala │ │ ├── ReduceWindowAfterParsingStageTest.scala │ │ └── RelativeChangeStageTest.scala │ │ └── testutils │ │ └── CollectSinks.scala ├── grafana.json ├── kafka-benchmark │ └── src │ │ ├── main │ │ ├── resources │ │ │ └── kafkastreams.conf │ │ └── scala │ │ │ └── kafka │ │ │ └── benchmark │ │ │ ├── BenchmarkSettingsForKafkaStreams.scala │ │ │ ├── KafkaTrafficAnalyzer.scala │ │ │ └── stages │ │ │ ├── CustomObjectSerdes.scala │ │ │ ├── StatefulStages.scala │ │ │ ├── StatelessStages.scala │ │ │ └── customSerdes.scala │ │ └── test │ │ └── scala │ │ └── kafka │ │ └── benchmark │ │ └── stages │ │ ├── AggregationStageTest.scala │ │ ├── NonIncrementalWindowAfterParsingStageTest.scala │ │ ├── ReduceWindowAfterParsingStageTest.scala │ │ ├── SerdeTest.scala │ │ └── SlidingWindowAfterAggregationStageTest.scala ├── project │ ├── Dependencies.scala │ ├── build.properties │ └── plugins.sbt ├── spark-benchmark │ └── src │ │ ├── main │ │ ├── resources │ │ │ └── spark.conf │ │ └── scala │ │ │ └── spark │ │ │ └── benchmark │ │ │ ├── BenchmarkSettingsForSpark.scala │ │ │ ├── SparkTrafficAnalyzer.scala │ │ │ └── stages │ │ │ ├── KafkaSinkForSpark.scala │ │ │ ├── StatefulStages.scala │ │ │ └── StatelessStages.scala │ │ └── test │ │ ├── resources │ │ └── logback-test.xml │ │ └── scala │ │ └── spark │ │ └── benchmark │ │ └── stages │ │ ├── AggregationStageTest.scala │ │ ├── NonIncrementalWindowAfterParsingStageTest.scala │ │ ├── ReduceWindowAfterParsingStageTest.scala │ │ └── SlidingWindowAfterAggregationStageTest.scala └── structured-streaming-benchmark │ └── src │ ├── main │ ├── resources │ │ ├── logback.xml │ │ └── structuredstreaming.conf │ └── scala │ │ └── structuredstreaming │ │ └── benchmark │ │ ├── BenchmarkSettingsForStructuredStreaming.scala │ │ ├── StructuredStreamingTrafficAnalyzer.scala │ │ └── stages │ │ ├── OutputUtils.scala │ │ ├── StatefulStages.scala │ │ └── StatelessStages.scala │ └── test │ └── scala │ └── structuredstreaming.benchmark │ └── stages │ ├── AggregationStageTest.scala │ ├── NonIncrementalWindowAfterParsingStageTest.scala │ └── ReduceWindowAfterParsingStageTest.scala ├── data-stream-generator ├── .gitignore ├── README.md ├── build.sbt ├── project │ ├── Dependencies.scala │ ├── build.properties │ └── plugins.sbt └── src │ └── main │ ├── resources │ ├── data │ │ ├── time1 │ │ │ └── part-00000-47ffbddb-0b79-40f9-b42f-4be8ed49208a-c000.txt │ │ ├── time10 │ │ │ └── part-00000-daca66b5-4070-4277-a987-321d29a99a6c-c000.txt │ │ ├── time11 │ │ │ └── part-00000-70a18ab8-6858-4ad7-afb2-58fd56792412-c000.txt │ │ ├── time12 │ │ │ └── part-00000-f06c8f26-d051-4667-bba3-c03f6fba2a2a-c000.txt │ │ ├── time13 │ │ │ └── part-00000-badc1fa1-46b7-43ae-b868-3cd2dc074729-c000.txt │ │ ├── time14 │ │ │ └── part-00000-12bbc372-d151-45a8-a04c-27660b6c6fa9-c000.txt │ │ ├── time15 │ │ │ └── part-00000-bdcb3a24-c358-42d6-b6d5-ba611290e538-c000.txt │ │ ├── time16 │ │ │ └── part-00000-2eb93ada-07d2-472d-859d-86536ac269a5-c000.txt │ │ ├── time17 │ │ │ └── part-00000-4572f125-48ff-4925-9b94-1c9bce400a56-c000.txt │ │ ├── time18 │ │ │ └── part-00000-bede9a92-9978-48c7-b188-57652451f254-c000.txt │ │ ├── time19 │ │ │ └── part-00000-d9e3cefe-ad8f-4ca7-87a3-f1d3bef9bf1e-c000.txt │ │ ├── time2 │ │ │ └── part-00000-65af0de2-29a7-4a0e-9fd9-118cea1c77e1-c000.txt │ │ ├── time20 │ │ │ └── part-00000-33692ffc-a91c-4a4c-911c-5bf737f4401f-c000.txt │ │ ├── time21 │ │ │ └── part-00000-3a9ed601-0d2c-4367-91bc-67b3ce55f928-c000.txt │ │ ├── time22 │ │ │ └── part-00000-59db9dc5-dab1-4275-a2cf-d12dc74439ce-c000.txt │ │ ├── time23 │ │ │ └── part-00000-cd57dda7-f64f-4b72-99fa-1c428b83dfee-c000.txt │ │ ├── time24 │ │ │ └── part-00000-6808351b-5dcf-4e9e-986f-ec79c3412637-c000.txt │ │ ├── time25 │ │ │ └── part-00000-51698d64-3855-4444-976f-685599606682-c000.txt │ │ ├── time26 │ │ │ └── part-00000-97d1d66c-c0b5-4c17-b3e1-09d34c3e0fa4-c000.txt │ │ ├── time27 │ │ │ └── part-00000-41d81566-b8e0-4045-88d9-fc764bba617c-c000.txt │ │ ├── time28 │ │ │ └── part-00000-22836432-4fb4-4790-befc-4739dfe48153-c000.txt │ │ ├── time29 │ │ │ └── part-00000-24836966-7c16-4ba2-ae74-3c4b93549a4e-c000.txt │ │ ├── time3 │ │ │ └── part-00000-d0b4a54a-29ea-4062-9226-abf91d936936-c000.txt │ │ ├── time30 │ │ │ └── part-00000-d4551e07-5d2b-49f2-9780-35485d18fda0-c000.txt │ │ ├── time31 │ │ │ └── part-00000-d0e16e91-af1b-4377-96d1-849d69168d08-c000.txt │ │ ├── time32 │ │ │ └── part-00000-ab7240a5-e2c8-4482-837f-056556cff842-c000.txt │ │ ├── time33 │ │ │ └── part-00000-ca7fed82-3110-46b9-9d15-27857c9663cc-c000.txt │ │ ├── time34 │ │ │ └── part-00000-ed16f8c4-eebb-4ff1-8779-5a874466ad44-c000.txt │ │ ├── time35 │ │ │ └── part-00000-d9cb5fe1-630e-422a-9ca2-2be397e9b67f-c000.txt │ │ ├── time36 │ │ │ └── part-00000-24e03e00-d43f-4f33-8b46-feecdec2c99c-c000.txt │ │ ├── time37 │ │ │ └── part-00000-f93e1192-3bc1-44d3-af93-433b27622ae6-c000.txt │ │ ├── time38 │ │ │ └── part-00000-d30b39e7-1971-4ab1-ad39-7bec504185f8-c000.txt │ │ ├── time39 │ │ │ └── part-00000-a48cf752-260c-44b6-b9bb-1b038a571fbb-c000.txt │ │ ├── time4 │ │ │ └── part-00000-d0d9bbec-a84f-452f-88e7-4efc7e78b4c5-c000.txt │ │ ├── time40 │ │ │ └── part-00000-2109bf08-6634-4e2d-b810-2da88671871d-c000.txt │ │ ├── time41 │ │ │ └── part-00000-43d1153e-a188-464f-823b-14d09f95d5d4-c000.txt │ │ ├── time42 │ │ │ └── part-00000-cfd277bb-27a8-41ac-8503-ae49e6a739e6-c000.txt │ │ ├── time43 │ │ │ └── part-00000-3f892299-52ea-427f-aea5-ce5b15eb1c84-c000.txt │ │ ├── time44 │ │ │ └── part-00000-269d0eb3-90b4-4fc0-84a0-44c04c058cf2-c000.txt │ │ ├── time45 │ │ │ └── part-00000-d400c274-f9b6-42d4-b2d2-b66671ca3728-c000.txt │ │ ├── time46 │ │ │ └── part-00000-2122ae53-ce41-4a73-9470-46cd5988d9c6-c000.txt │ │ ├── time47 │ │ │ └── part-00000-472cf7e1-9c65-4b83-bcfe-6a27a37357cc-c000.txt │ │ ├── time48 │ │ │ └── part-00000-1b5db027-53dc-4d72-8fb3-90d8d5ebcae4-c000.txt │ │ ├── time49 │ │ │ └── part-00000-a1091424-8055-426a-9143-e9e0a7a76bb4-c000.txt │ │ ├── time5 │ │ │ └── part-00000-c5ddf76f-59d1-43b0-9d3e-d1cf6335d376-c000.txt │ │ ├── time50 │ │ │ └── part-00000-49469c49-1ec0-46cb-93f7-eb9186de5899-c000.txt │ │ ├── time51 │ │ │ └── part-00000-683a8c47-8bde-45de-8f31-972d72266df3-c000.txt │ │ ├── time52 │ │ │ └── part-00000-b5be7c93-828f-49b6-bc62-28e574f4ce67-c000.txt │ │ ├── time53 │ │ │ └── part-00000-f9a75309-b0a2-4ab8-9f23-c4bc1b338f59-c000.txt │ │ ├── time54 │ │ │ └── part-00000-d19dca30-cefd-42a7-a522-044d3e47f573-c000.txt │ │ ├── time55 │ │ │ └── part-00000-8995f921-2949-4a97-8b48-53438d09db55-c000.txt │ │ ├── time56 │ │ │ └── part-00000-75613884-6667-465d-9918-edc4b3b6fb27-c000.txt │ │ ├── time57 │ │ │ └── part-00000-2ea2b375-5b6f-4e29-8b65-a47d92b02759-c000.txt │ │ ├── time58 │ │ │ └── part-00000-3ae99bc7-bc46-4025-9cfb-8ef95563988c-c000.txt │ │ ├── time59 │ │ │ └── part-00000-6786da89-ac39-4bed-931e-d9f94b5f3d3a-c000.txt │ │ ├── time6 │ │ │ └── part-00000-4e2d22d4-0d79-478b-b722-b1f9f1b2a2b9-c000.txt │ │ ├── time60 │ │ │ └── part-00000-e23159e2-8bcb-4cd4-9d73-47ddea5d0c22-c000.txt │ │ ├── time7 │ │ │ └── part-00000-911d4bca-24d5-40bb-bafc-123790ccca94-c000.txt │ │ ├── time8 │ │ │ └── part-00000-00ad0fc1-eb56-4ffc-a2f7-1211c05f5473-c000.txt │ │ └── time9 │ │ │ └── part-00000-d54d64a4-9823-4a56-b94e-a1e46a99c29a-c000.txt │ ├── logback.xml │ └── resources.conf │ └── scala │ └── ingest │ ├── ConfigUtils.scala │ ├── ConstantRatePublisher.scala │ ├── DataUtils.scala │ ├── FaultyEventPublisher.scala │ ├── PeriodicBurstPublisher.scala │ ├── Publisher.scala │ ├── SingleBurstPublisher.scala │ └── StreamProducer.scala ├── deployment ├── .gitignore ├── README.md ├── automation_scripts │ ├── AWS_ACCESS_KEY.template │ ├── AWS_SECRET_KEY.template │ ├── benchmark-input-data-path.template │ ├── benchmark-jars-bucket.template │ ├── benchmark-jars-path.template │ ├── benchmark-metrics-path.template │ ├── benchmark-results-path.template │ ├── bring-up-side-components.sh │ ├── connect.sh │ ├── create-kafka-topic.sh │ ├── describe-kafka-topic.sh │ ├── flink │ │ ├── flink-job-failure.sh │ │ ├── flink-latency-constant-rate.sh │ │ ├── flink-master-failure.sh │ │ ├── flink-periodic-burst.sh │ │ ├── flink-scalability.sh │ │ ├── flink-single-burst.sh │ │ └── flink-worker-failure.sh │ ├── initiate-dcos-tunnel.sh │ ├── kafka-streams │ │ ├── kafka-streams-job-failure.sh │ │ ├── kafka-streams-latency-constant-rate.sh │ │ ├── kafka-streams-master-failure.sh │ │ ├── kafka-streams-periodic-burst.sh │ │ ├── kafka-streams-scalability.sh │ │ ├── kafka-streams-single-burst.sh │ │ └── kafka-streams-worker-failure.sh │ ├── new_cloudformation_template.json │ ├── new_cloudformation_template_SPOT.json │ ├── remove-flink-cluster.sh │ ├── remove-jmx-and-publisher.sh │ ├── remove-kafka-streams-job.sh │ ├── remove-spark-cluster.sh │ ├── run-evaluator.sh │ ├── run-output-consumer.sh │ ├── scrape_metrics.sh │ ├── set-up-kafka-manager.sh │ ├── spark │ │ ├── spark-driver-failure.sh │ │ ├── spark-job-failure.sh │ │ ├── spark-latency-constant-rate.sh │ │ ├── spark-master-failure.sh │ │ ├── spark-periodic-burst.sh │ │ ├── spark-scalability.sh │ │ ├── spark-single-burst.sh │ │ └── spark-worker-failure.sh │ ├── start-flink-cluster-HA.sh │ ├── start-flink-cluster.sh │ ├── start-spark-cluster.sh │ └── structured-streaming │ │ ├── structured-streaming-driver-failure.sh │ │ ├── structured-streaming-job-failure.sh │ │ ├── structured-streaming-latency-constant-rate.sh │ │ ├── structured-streaming-master-failure.sh │ │ ├── structured-streaming-periodic-burst.sh │ │ ├── structured-streaming-scalability.sh │ │ ├── structured-streaming-single-burst.sh │ │ └── structured-streaming-worker-failure.sh ├── aws_marathon_files │ ├── aws-hdfs.json │ ├── aws-influx-db.json │ ├── aws-kafka-brokers-2.1-with-env.json │ ├── aws-kafka-manager.json │ ├── aws-publisher-with-env.json │ ├── aws_with_env.conf │ ├── cadvisor-benchmark-with-env.json │ ├── flink-jobmanager-standby.json │ ├── flink-jobmanager-with-env.json │ ├── flink-taskmanager-with-env.json │ ├── jmx-exporter-with-env.json │ ├── kafka-thread-with-env.json │ ├── spark-master.json │ ├── spark-submit-with-env.json │ └── spark-worker-with-env.json ├── flink_cluster_1.11.1 │ ├── README.md │ ├── jobmanager │ │ ├── Dockerfile │ │ ├── KEYS │ │ ├── cancel-job.sh │ │ ├── docker-entrypoint.sh │ │ ├── flink-conf.yaml │ │ ├── push.sh │ │ ├── submit-job.sh │ │ └── version │ ├── taskmanager-large │ │ ├── Dockerfile │ │ ├── KEYS │ │ ├── docker-entrypoint.sh │ │ ├── flink-conf.yaml │ │ ├── push.sh │ │ └── version │ ├── taskmanager-small │ │ ├── Dockerfile │ │ ├── KEYS │ │ ├── docker-entrypoint.sh │ │ ├── flink-conf.yaml │ │ ├── push.sh │ │ └── version │ ├── taskmanager-smallest │ │ ├── Dockerfile │ │ ├── KEYS │ │ ├── docker-entrypoint.sh │ │ ├── flink-conf.yaml │ │ ├── push.sh │ │ └── version │ └── taskmanager │ │ ├── Dockerfile │ │ ├── KEYS │ │ ├── docker-entrypoint.sh │ │ ├── flink-conf.yaml │ │ ├── push.sh │ │ └── version └── spark_cluster_3.0.0 │ ├── README.md │ ├── docker-spark-master │ ├── .travis.yml │ ├── Dockerfile │ ├── LICENSE │ ├── active-driver-check.sh │ ├── entrypoint.sh │ ├── evaluator-submit-job.sh │ ├── output-consumer-submit-job.sh │ ├── push.sh │ ├── spark-defaults.conf │ ├── spark-kill-job.sh │ └── version │ ├── docker-spark-submit │ ├── Dockerfile │ ├── executejar.sh │ ├── push.sh │ ├── spark-defaults.conf │ └── version │ └── docker-spark-worker │ ├── .travis.yml │ ├── Dockerfile │ ├── LICENSE │ ├── entrypoint.sh │ ├── push.sh │ ├── spark-defaults.conf │ └── version ├── evaluator ├── .gitignore ├── README.md ├── build.sbt ├── project │ ├── Dependencies.scala │ ├── build.properties │ └── plugins.sbt ├── src │ └── main │ │ ├── resources │ │ ├── cadvisor-log.json │ │ ├── hdfs-cadvisor-logs.json │ │ ├── hdfs-cadvisor-spark.json │ │ └── resources.conf │ │ └── scala │ │ └── evaluation │ │ ├── EvaluationMain.scala │ │ ├── config │ │ └── EvaluationConfig.scala │ │ ├── metrics │ │ ├── LatencyUtils.scala │ │ ├── ThroughputUtils.scala │ │ ├── cadvisor │ │ │ ├── CadvisorCpuUtils.scala │ │ │ ├── CadvisorDiskUtils.scala │ │ │ ├── CadvisorMemoryUtils.scala │ │ │ ├── CadvisorNetworkUtils.scala │ │ │ ├── CadvisorQueryClient.scala │ │ │ └── CadvisorResourceUtils.scala │ │ ├── cadvisorextended │ │ │ ├── CadvisorResourceComputer.scala │ │ │ ├── HdfsResourceComputer.scala │ │ │ └── KafkaResourceComputer.scala │ │ └── jmx │ │ │ ├── JmxGCUtils.scala │ │ │ └── JmxMemoryUtils.scala │ │ ├── modes │ │ ├── ConstantRateEvaluator.scala │ │ ├── FaultyEventEvaluator.scala │ │ ├── LatencyConstantRateEvaluator.scala │ │ ├── MasterFailureEvaluator.scala │ │ ├── PeriodicBurstEvaluator.scala │ │ ├── SingleBurstEvaluator.scala │ │ └── WorkerFailureEvaluator.scala │ │ └── utils │ │ ├── DataTypes.scala │ │ ├── IOUtils.scala │ │ └── MetricUtils.scala └── version.sbt ├── kafka-cluster-tools ├── grafana-dashboard.json ├── read-from-topic.sh └── setup-kafka.sh ├── metrics-exporter ├── .gitignore ├── README.md ├── build.sbt ├── project │ ├── Dependencies.scala │ ├── build.properties │ └── plugins.sbt └── src │ └── main │ ├── resources │ └── logback.xml │ └── scala │ └── benchmark │ └── metrics │ └── exporter │ ├── CadvisorExporter.scala │ ├── ConfigUtils.scala │ ├── ExporterMain.scala │ ├── JmxExporter.scala │ └── MetricTypes.scala ├── output-consumer ├── .gitignore ├── README.md ├── build.sbt ├── project │ ├── Dependencies.scala │ ├── build.properties │ └── plugins.sbt ├── src │ └── main │ │ ├── resources │ │ ├── logback.xml │ │ └── resources.conf │ │ └── scala │ │ └── output │ │ └── consumer │ │ ├── ConfigUtils.scala │ │ ├── LocalModeWriter.scala │ │ ├── OutputConsumer.scala │ │ └── SingleBatchWriter.scala └── version.sbt └── result-analysis ├── .gitignore ├── Detailed run visualizer - report generator.ipynb ├── Driver Failure.ipynb ├── Latency.ipynb ├── Master Failure.ipynb ├── Periodic Burst.ipynb ├── README.md ├── Scalability and sustainable throughput.ipynb ├── Single Burst.ipynb └── Worker Failure.ipynb /benchmark/.gitignore: -------------------------------------------------------------------------------- 1 | #ignore all target (files and folders) 2 | 3 | **/target 4 | **/.idea 5 | project/project/ 6 | kafka-logs 7 | spark-benchmark/checkpointdir 8 | /checkpointdir/ 9 | /benchmark.ipr 10 | /benchmark.iws 11 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # OSPBench: Open Stream Processing Benchmark 2 | 3 | Currently the benchmark includes Apache Spark (Spark Streaming and Structured Streaming), Apache Flink and Kafka Streams. 4 | 5 | Please consult the wiki of the repository to see details on deployment and running locally. 6 | -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/resources/aws.conf: -------------------------------------------------------------------------------- 1 | #AWS CONFIG 2 | #_______________________________________________________________________________________________________________ 3 | environment { 4 | mode = "$MODE" 5 | is.running.in.docker = "false" 6 | } 7 | 8 | general { 9 | last.stage = "$LAST_STAGE" 10 | stream.source { 11 | volume = "$DATA_VOLUME" 12 | } 13 | } 14 | 15 | kafka { 16 | groupid = "ndwgroup" 17 | bootstrap.servers = "$KAFKA_BOOTSTRAP_SERVERS" 18 | zookeeper.server = "zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181" 19 | output.topic = "$TOPICNAME" 20 | auto.offset.reset.strategy = "$KAFKA_AUTO_OFFSET_RESET_STRATEGY" 21 | flow.topic = "$FLOWTOPIC" 22 | speed.topic = "$SPEEDTOPIC" 23 | } 24 | 25 | hdfs { 26 | active.name.node = "$ACTIVE_HDFS_NAME_NODE" 27 | } 28 | 29 | monitoring { 30 | graphite.host = "localhost" 31 | graphite.port = 2003 32 | print.output = false 33 | } 34 | 35 | spark { 36 | master = "spark://spark-master.marathon.mesos:7077" 37 | checkpoint.dir = "/checkpointdir/" 38 | default.parallelism = ${?DEFAULT_PARALLELISM} 39 | sql.shuffle.partitions = ${?SQL_SHUFFLE_PARTITIONS} 40 | } 41 | 42 | storm { 43 | workers = 4 44 | } 45 | 46 | flink { 47 | checkpoint.dir = "hdfs://$ACTIVE_HDFS_NAME_NODE/checkpointDirStructured" 48 | partitions = "$NUM_PARTITIONS" 49 | } 50 | 51 | kafkastreams { 52 | checkpoint.dir = "./kafka-logs/" 53 | streams.threads.per.instance = ${?NUM_THREADS_PER_INSTANCE} 54 | } 55 | -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/resources/commonsettings.conf: -------------------------------------------------------------------------------- 1 | general { 2 | sliding.window.after.aggregation { 3 | short.term.batches.lookback = 1 4 | long.term.batches.lookback = 2 5 | } 6 | 7 | window.after.parsing { 8 | window.duration = 300000 9 | slide.duration = 60000 10 | } 11 | } 12 | 13 | periodic-burst { 14 | stream.source { 15 | publish.interval.millis = 1000 16 | } 17 | } 18 | 19 | single-burst { 20 | stream.source { 21 | publish.interval.millis = 1000 22 | } 23 | } 24 | 25 | constant-rate { 26 | stream.source { 27 | publish.interval.millis = 1000 28 | } 29 | } 30 | 31 | latency-constant-rate { 32 | stream.source { 33 | publish.interval.millis = 1000 34 | } 35 | } 36 | 37 | worker-failure { 38 | stream.source { 39 | publish.interval.millis = 1000 40 | } 41 | } 42 | 43 | faulty-event { 44 | stream.source { 45 | publish.interval.millis = 1000 46 | } 47 | } 48 | 49 | master-failure { 50 | stream.source { 51 | publish.interval.millis = 1000 52 | } 53 | } -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/resources/docker.conf: -------------------------------------------------------------------------------- 1 | #DOCKER CONFIG 2 | #_______________________________________________________________________________________________________________ 3 | environment { 4 | mode = "constant-rate" 5 | is.running.in.docker = "true" 6 | } 7 | 8 | general { 9 | last.stage = "3" 10 | partitions = 2 11 | stream.source { 12 | volume = "1" 13 | } 14 | } 15 | 16 | kafka { 17 | groupid = "ndwgroup" 18 | bootstrap.servers = "kafka:9092" 19 | zookeeper.server = "zookeeper:2181" 20 | output.topic = "metrics" 21 | auto.offset.reset.strategy = "latest" 22 | flow.topic = "ndwflow" 23 | speed.topic = "ndwspeed" 24 | } 25 | 26 | hdfs { 27 | active.name.node = "" 28 | } 29 | 30 | monitoring { 31 | graphite.host = "graphite_grafana" 32 | graphite.port = 2003 33 | print.output = true 34 | } 35 | 36 | spark { 37 | master = "local[4]" 38 | checkpoint.dir = "/opt/docker/checkpointdir/" 39 | default.parallelism = 1 40 | sql.shuffle.partitions = 1 41 | } 42 | 43 | storm { 44 | workers = 4 45 | } 46 | 47 | flink { 48 | checkpoint.dir = "./checkpointdir/" 49 | partitions = "20" 50 | } 51 | 52 | kafkastreams { 53 | checkpoint.dir = "./kafka-logs/" 54 | num.partitions = 1 55 | streams.threads.per.instance = 1 56 | } -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/resources/local.conf: -------------------------------------------------------------------------------- 1 | #LOCAL CONFIG 2 | #_______________________________________________________________________________________________________________ 3 | environment { 4 | mode = "constant-rate" 5 | is.running.in.docker = "false" 6 | } 7 | 8 | general { 9 | last.stage = "3" 10 | partitions = 2 11 | stream.source { 12 | volume = "1" 13 | } 14 | } 15 | 16 | kafka { 17 | groupid = "ndwgroup" 18 | bootstrap.servers = ${?KAFKA_BOOTSTRAP_SERVERS} 19 | zookeeper.server = ${?ZOOKEEPER_SERVER} 20 | output.topic = "metrics" 21 | auto.offset.reset.strategy = "latest" 22 | flow.topic = "ndwflow" 23 | speed.topic = "ndwspeed" 24 | } 25 | 26 | hdfs { 27 | active.name.node = "" 28 | } 29 | 30 | monitoring { 31 | graphite.host = "localhost" 32 | graphite.port = 2003 33 | print.output = false 34 | } 35 | 36 | spark { 37 | master = "local[1]" 38 | checkpoint.dir = "./spark-benchmark/checkpointdir/" 39 | default.parallelism = 1 40 | sql.shuffle.partitions = 1 41 | } 42 | 43 | storm { 44 | workers = 4 45 | } 46 | 47 | flink { 48 | checkpoint.dir = "./checkpointdir/" 49 | partitions = "20" 50 | } 51 | 52 | kafkastreams { 53 | checkpoint.dir = "./kafka-logs/" 54 | streams.threads.per.instance = 1 55 | } -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/scala/common/benchmark/DataScienceMaths.scala: -------------------------------------------------------------------------------- 1 | package common.benchmark 2 | 3 | import java.sql.Timestamp 4 | 5 | import common.benchmark.input.Parsers 6 | 7 | object DataScienceMaths { 8 | def calculateRelativeChangeBetweenObservations(newest: AggregatableObservation, oldReference: Option[AggregatableObservation]): Option[RelativeChangePercentages] = { 9 | oldReference.map(oldValue => RelativeChangePercentages( 10 | flowPct = (newest.accumulatedFlow - oldValue.accumulatedFlow) / oldValue.accumulatedFlow.toDouble, 11 | speedPct = (newest.averageSpeed - oldValue.averageSpeed) / oldValue.averageSpeed.toDouble 12 | )) 13 | } 14 | 15 | def lookbackInTime(nbSeconds: Int, history: Seq[AggregatableObservation], referenceTimestampMillis: Long): Option[AggregatableObservation] = { 16 | history.find(x => x.roundedTimestamp == Parsers.roundMillisToSeconds(referenceTimestampMillis) - (nbSeconds * 1000)) 17 | } 18 | 19 | def calculateRelativeChangeBetweenObservationsForStructuredStreaming(newest: AggregatableObservationWithTimestamp, oldReference: Option[AggregatableObservationWithTimestamp]): Option[RelativeChangePercentages] = { 20 | oldReference.map(oldValue => RelativeChangePercentages( 21 | flowPct = (newest.accumulatedFlow - oldValue.accumulatedFlow) / oldValue.accumulatedFlow.toDouble, 22 | speedPct = (newest.averageSpeed - oldValue.averageSpeed) / oldValue.averageSpeed.toDouble 23 | )) 24 | } 25 | 26 | def lookbackInTimeForStructuredStreaming(nbSeconds: Int, history: Seq[AggregatableObservationWithTimestamp], referenceTimestamp: Timestamp): Option[AggregatableObservationWithTimestamp] = { 27 | history .find(x => x.roundedTimestamp == Parsers.roundMillisToSeconds(referenceTimestamp.getTime) - (nbSeconds * 1000)) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/scala/common/benchmark/output/JsonPrinter.scala: -------------------------------------------------------------------------------- 1 | package common.benchmark.output 2 | 3 | import common.benchmark.{AggregatableFlowObservation, AggregatableObservation, FlowObservation, RelativeChangeObservation, SpeedObservation} 4 | import io.circe.generic.auto._ 5 | import io.circe.syntax._ 6 | 7 | object JsonPrinter { 8 | def jsonFor(rawInputRecord: (String, String, String, Long), jobProfile: String): (String, String) = { 9 | (rawInputRecord._1, s"""{"publishTimestamp":"${rawInputRecord._4.toString}", "jobProfile":"${jobProfile}"}}""") 10 | } 11 | 12 | def jsonFor(rawInputRecord: (String, String, Long), jobProfile: String): (String, String) = { 13 | (rawInputRecord._1, s"""{"publishTimestamp":"${rawInputRecord._3.toString}", "jobProfile":"${jobProfile}"}""") 14 | } 15 | 16 | def jsonFor(obs: FlowObservation): (String, String) = (obs.measurementId, obs.asJson.noSpaces) 17 | 18 | def jsonFor(obs: SpeedObservation): (String, String) = (obs.measurementId, obs.asJson.noSpaces) 19 | 20 | def jsonFor(obs: AggregatableObservation): (String, String) = (obs.measurementId, obs.asJson.noSpaces) 21 | 22 | def jsonFor(obs: AggregatableFlowObservation): (String, String) = (obs.measurementId, obs.asJson.noSpaces) 23 | 24 | def jsonFor(obs: RelativeChangeObservation): (String, String) = (obs.measurementId, obs.asJson.noSpaces) 25 | } -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/scala/common/benchmark/stages/StatelessStagesTemplate.scala: -------------------------------------------------------------------------------- 1 | package common.benchmark.stages 2 | 3 | /** 4 | * Each benchmark tech stack should have an implementation 5 | * of the following methods, 6 | * but the type signatures in the different implemenations 7 | * differ too much to use actual inheritance for enforcing this. 8 | */ 9 | trait StatelessStagesTemplate { 10 | /** 11 | * Consumes from Kafka from flow and speed topic 12 | * 13 | * @return raw kafka stream 14 | */ 15 | // def ingestStage 16 | 17 | 18 | /** 19 | * Parses data from both streams 20 | * 21 | * @return stream of [[common.benchmark.FlowObservation]] and stream of [[common.benchmark.SpeedObservation]] 22 | */ 23 | // def parsingStage 24 | 25 | /** 26 | * Consumes from Kafka from flow topic 27 | * 28 | * @return raw kafka stream 29 | */ 30 | // def ingestFlowStreamStage 31 | 32 | /** 33 | * Parses only flow events 34 | * 35 | * @return stream of [[common.benchmark.FlowObservation]] 36 | */ 37 | // def parsingFlowStreamStage 38 | 39 | } 40 | -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/scala/common/config/JobExecutionMode.scala: -------------------------------------------------------------------------------- 1 | package common.config 2 | 3 | sealed abstract class JobExecutionMode(val name: String) extends Serializable 4 | 5 | object JobExecutionMode { 6 | 7 | case object CONSTANT_RATE extends JobExecutionMode("constant-rate") 8 | 9 | case object LATENCY_CONSTANT_RATE extends JobExecutionMode("latency-constant-rate") 10 | 11 | case object SINGLE_BURST extends JobExecutionMode("single-burst") 12 | 13 | case object PERIODIC_BURST extends JobExecutionMode("periodic-burst") 14 | 15 | case object WORKER_FAILURE extends JobExecutionMode("worker-failure") 16 | 17 | case object MASTER_FAILURE extends JobExecutionMode("master-failure") 18 | 19 | case object FAULTY_EVENT extends JobExecutionMode("faulty-event") 20 | 21 | val values = Seq(CONSTANT_RATE, LATENCY_CONSTANT_RATE, SINGLE_BURST, PERIODIC_BURST, WORKER_FAILURE, MASTER_FAILURE, FAULTY_EVENT) 22 | 23 | def withName(nameOfMode: String): JobExecutionMode = { 24 | values.find { value => value.name == nameOfMode }.get 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/main/scala/common/config/LastStage.scala: -------------------------------------------------------------------------------- 1 | package common.config 2 | 3 | sealed abstract class LastStage(val value: Int) extends Serializable 4 | 5 | object LastStage { 6 | 7 | // Complex pipeline 8 | 9 | case object UNTIL_INGEST extends LastStage(0) 10 | 11 | case object UNTIL_PARSE extends LastStage(1) 12 | 13 | case object UNTIL_JOIN extends LastStage(2) 14 | 15 | case object UNTIL_TUMBLING_WINDOW extends LastStage(3) 16 | 17 | case object UNTIL_SLIDING_WINDOW extends LastStage(4) 18 | 19 | case object UNTIL_LOWLEVEL_TUMBLING_WINDOW extends LastStage(5) 20 | 21 | case object UNTIL_LOWLEVEL_SLIDING_WINDOW extends LastStage(6) 22 | 23 | 24 | // Simple stateful pipelines 25 | 26 | case object REDUCE_WINDOW_WITHOUT_JOIN extends LastStage(100) 27 | 28 | case object NON_INCREMENTAL_WINDOW_WITHOUT_JOIN extends LastStage(101) 29 | 30 | val values = Seq( 31 | // complex pipelines 32 | UNTIL_INGEST, 33 | UNTIL_PARSE, 34 | UNTIL_JOIN, 35 | UNTIL_TUMBLING_WINDOW, 36 | UNTIL_SLIDING_WINDOW, 37 | UNTIL_LOWLEVEL_TUMBLING_WINDOW, 38 | UNTIL_LOWLEVEL_SLIDING_WINDOW, 39 | 40 | // simple stateful pipelines 41 | REDUCE_WINDOW_WITHOUT_JOIN, 42 | NON_INCREMENTAL_WINDOW_WITHOUT_JOIN 43 | ) 44 | 45 | def withName(valueOfStage: Int): LastStage = { 46 | values.find { value => value.value == valueOfStage }.get 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /benchmark/common-benchmark/src/test/scala/common/benchmark/TestObservationTypes.scala: -------------------------------------------------------------------------------- 1 | package common.benchmark 2 | 3 | import org.scalatest.{Inside, Matchers, WordSpec} 4 | 5 | class TestObservationTypes extends WordSpec with Matchers with Inside { 6 | val firstObservation = AggregatableObservation( 7 | "u/1/5/r/f/x/4/h/7/f/s/c/PZH01_MST_0690_00", 8 | List("lane1"), 9 | 1488461880000l, 10 | 52.0265, 11 | 4.68309, 12 | 840, 13 | 60, 14 | 95, 15 | 48, 16 | 95, 17 | 2, 18 | "jobprofile1" 19 | ) 20 | 21 | //observation 5 minutes ago 22 | val secondObservation = AggregatableObservation( 23 | "u/1/5/r/f/x/4/h/7/f/s/c/PZH01_MST_0690_00", 24 | List("lane2"), 25 | 1488461880000l, 26 | 52.0265, 27 | 4.68309, 28 | 880, 29 | 60, 30 | 95, 31 | 70, 32 | 95, 33 | 2, 34 | "jobprofile2" 35 | ) 36 | 37 | "Combining aggregated observations" should { 38 | "result in an aggregated observation" in { 39 | val aggregatedObservation = firstObservation.combineObservations(secondObservation) 40 | 41 | inside(aggregatedObservation) { 42 | case agg: AggregatableObservation => 43 | agg.measurementId shouldBe "u/1/5/r/f/x/4/h/7/f/s/c/PZH01_MST_0690_00" 44 | agg.lanes shouldBe List("lane1", "lane2") 45 | agg.publishTimestamp shouldBe 1488461880000l 46 | agg.latitude shouldBe 52.0265 47 | agg.longitude shouldBe 4.68309 48 | agg.accumulatedFlow shouldBe 840 + 880 49 | agg.period shouldBe 60 50 | agg.flowAccuracy shouldBe 95 51 | agg.averageSpeed shouldBe (48d + 70d) / 2d 52 | agg.speedAccuracy shouldBe 95 53 | agg.jobProfile shouldBe "jobprofile1" 54 | } 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /benchmark/docs/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Klarrio/open-stream-processing-benchmark/8839de8101f3ac5338c02dc5dcfdd57c7aeec612/benchmark/docs/images/architecture.png -------------------------------------------------------------------------------- /benchmark/docs/images/processing_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Klarrio/open-stream-processing-benchmark/8839de8101f3ac5338c02dc5dcfdd57c7aeec612/benchmark/docs/images/processing_flow.png -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/main/resources/flink.conf: -------------------------------------------------------------------------------- 1 | # Flink specific configuration for the different modes 2 | flink { 3 | latency-constant-rate { 4 | auto.watermark.interval = 50 5 | max.out.of.orderness = 50 6 | buffer.timeout = 0 7 | checkpoint.interval = 10000 8 | exactly.once = false 9 | } 10 | 11 | constant-rate { 12 | auto.watermark.interval = 50 13 | max.out.of.orderness = 50 14 | buffer.timeout = 100 15 | checkpoint.interval = 60000 16 | exactly.once = false 17 | } 18 | 19 | periodic-burst { 20 | auto.watermark.interval = 50 21 | max.out.of.orderness = 50 22 | buffer.timeout = 100 23 | checkpoint.interval = 10000 24 | exactly.once = false 25 | } 26 | 27 | single-burst { 28 | auto.watermark.interval = 50 29 | max.out.of.orderness = 50 30 | buffer.timeout = 100 31 | checkpoint.interval = 10000 32 | exactly.once = false 33 | } 34 | 35 | worker-failure { 36 | auto.watermark.interval = 50 37 | max.out.of.orderness = 50 38 | buffer.timeout = 100 39 | checkpoint.interval = 10000 40 | exactly.once = false 41 | } 42 | 43 | faulty-event { 44 | auto.watermark.interval = 50 45 | max.out.of.orderness = 50 46 | buffer.timeout = 100 47 | checkpoint.interval = 10000 48 | exactly.once = false 49 | } 50 | 51 | master-failure { 52 | auto.watermark.interval = 50 53 | max.out.of.orderness = 50 54 | buffer.timeout = 100 55 | checkpoint.interval = 10000 56 | exactly.once = true 57 | } 58 | } -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/main/scala/flink/benchmark/BenchmarkSettingsForFlink.scala: -------------------------------------------------------------------------------- 1 | package flink.benchmark 2 | 3 | import java.io.File 4 | 5 | import com.typesafe.config.{Config, ConfigFactory} 6 | import common.config.GeneralConfig 7 | import org.apache.flink.util.FileUtils 8 | 9 | import scala.collection.JavaConverters._ 10 | import scala.util.Try 11 | 12 | class BenchmarkSettingsForFlink(overrides: Map[String, Any] = Map()) extends Serializable { 13 | val general = new GeneralConfig(overrides) 14 | 15 | object specific extends Serializable { 16 | private val flinkProperties: Config = ConfigFactory.load() 17 | .withFallback(ConfigFactory.parseMap(overrides.asJava)) 18 | .withFallback(ConfigFactory.load("flink.conf")) 19 | .getConfig("flink") 20 | .getConfig(general.mode.name) 21 | 22 | val partitions: Int = general.configProperties.getString("flink.partitions").toInt 23 | val autoWatermarkInterval: Int = flinkProperties.getInt("auto.watermark.interval") 24 | val maxOutOfOrderness: Int = flinkProperties.getInt("max.out.of.orderness") 25 | val bufferTimeout: Long = flinkProperties.getLong("buffer.timeout") 26 | val checkpointInterval: Int = flinkProperties.getInt("checkpoint.interval") 27 | val exactlyOnce: Boolean = flinkProperties.getBoolean("exactly.once") 28 | 29 | // Checkpointing 30 | val checkpointDir: String = if (general.local) { 31 | val checkpointDir = new File(general.configProperties.getString("flink.checkpoint.dir")) 32 | Try(FileUtils.cleanDirectory(checkpointDir)) 33 | "file://" + checkpointDir.getCanonicalPath 34 | } else general.configProperties.getString("flink.checkpoint.dir") + general.outputTopic + "/" 35 | 36 | 37 | val jobProfileKey: String = general.mkJobProfileKey("flink", bufferTimeout) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/main/scala/flink/benchmark/stages/customObjectSerdes.scala: -------------------------------------------------------------------------------- 1 | package flink.benchmark.stages 2 | 3 | import java.lang 4 | 5 | import common.benchmark.AggregatableObservation 6 | import flink.benchmark.BenchmarkSettingsForFlink 7 | import org.apache.flink.api.common.typeinfo.TypeInformation 8 | import org.apache.flink.streaming.api.scala.createTypeInformation 9 | import org.apache.flink.streaming.connectors.kafka.{KafkaDeserializationSchema, KafkaSerializationSchema} 10 | import org.apache.kafka.clients.consumer.ConsumerRecord 11 | import org.apache.kafka.clients.producer.ProducerRecord 12 | import io.circe.generic.auto._ 13 | import io.circe.syntax._ 14 | 15 | /** 16 | * Deserializers for the flow and speed events 17 | * Returns key, topic and value for each event 18 | */ 19 | class RawObservationDeserializer extends KafkaDeserializationSchema[(String, String, Long)] { 20 | 21 | override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): (String, String, Long) = { 22 | val key: String = new String(record.key(), "UTF-8") 23 | val value: String = new String(record.value(), "UTF-8") 24 | (key, value, record.timestamp()) 25 | } 26 | 27 | 28 | override def isEndOfStream(t: (String, String, Long)): Boolean = { 29 | false 30 | } 31 | 32 | override def getProducedType: TypeInformation[(String, String, Long)] = {createTypeInformation[(String, String, Long)]} 33 | 34 | } 35 | 36 | /** 37 | * Serializer for Kafka messages 38 | * 39 | * @param settings 40 | */ 41 | class OutputMessageSerializer(settings: BenchmarkSettingsForFlink) extends KafkaSerializationSchema[(String, String)] { 42 | 43 | override def serialize(element: (String, String), timestamp: lang.Long): ProducerRecord[Array[Byte], Array[Byte]] = { 44 | new ProducerRecord(settings.general.outputTopic, element._1.getBytes(), element._2.getBytes()) 45 | } 46 | } 47 | 48 | 49 | -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/test/scala/flink/benchmark/stages/AggregationStageTest.scala: -------------------------------------------------------------------------------- 1 | package flink.benchmark.stages 2 | 3 | import common.benchmark.{AggregatableObservation, FlowObservation, SpeedObservation} 4 | import common.utils.TestObservations 5 | import flink.benchmark.BenchmarkSettingsForFlink 6 | import flink.benchmark.testutils.AggregatableObservationCollectSink 7 | import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration 8 | import org.apache.flink.streaming.api.TimeCharacteristic 9 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 10 | import org.apache.flink.streaming.api.functions.source.SourceFunction 11 | import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext 12 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} 13 | import org.apache.flink.streaming.api.watermark.Watermark 14 | import org.apache.flink.test.util.MiniClusterWithClientResource 15 | import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} 16 | 17 | class AggregationStageTest extends FlatSpec with Matchers with BeforeAndAfter { 18 | val settings = new BenchmarkSettingsForFlink() 19 | 20 | val flinkCluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder() 21 | .setNumberSlotsPerTaskManager(1) 22 | .setNumberTaskManagers(1) 23 | .build()) 24 | 25 | val statefulStages = new StatefulStages(settings) 26 | 27 | val expectedResult: Seq[AggregatableObservation] = TestObservations.observationsAfterAggregationStage.flatten 28 | .sortBy { f: AggregatableObservation => (f.measurementId, f.publishTimestamp) } 29 | 30 | before { 31 | flinkCluster.before() 32 | } 33 | 34 | after { 35 | flinkCluster.after() 36 | } 37 | 38 | "aggregation stage" should " produce correct output" in { 39 | val env = StreamExecutionEnvironment.getExecutionEnvironment 40 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 41 | 42 | val source1 = env.addSource(new SourceFunction[(FlowObservation, SpeedObservation)]() { 43 | override def run(ctx: SourceContext[(FlowObservation, SpeedObservation)]) { 44 | TestObservations.observationsAfterJoinStage.foreach { next => 45 | next.foreach { obs => 46 | ctx.collectWithTimestamp(obs._2, obs._2._1.publishTimestamp) 47 | ctx.emitWatermark(new Watermark(obs._2._1.publishTimestamp-50)) 48 | } 49 | } 50 | ctx.close() 51 | } 52 | override def cancel(): Unit = () 53 | }) 54 | 55 | statefulStages.aggregationAfterJoinStage(source1 56 | .map{event: (FlowObservation, SpeedObservation) => new AggregatableObservation(event._1, event._2) }) 57 | .addSink(new AggregatableObservationCollectSink()) 58 | 59 | env.execute("aggregation-stage-test") 60 | 61 | AggregatableObservationCollectSink.values should contain allElementsOf(expectedResult) 62 | } 63 | } 64 | 65 | -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/test/scala/flink/benchmark/stages/NonIncrementalWindowAfterParsingStageTest.scala: -------------------------------------------------------------------------------- 1 | package flink.benchmark.stages 2 | 3 | import common.benchmark.{AggregatableFlowObservation, FlowObservation} 4 | import common.utils.TestObservations 5 | import flink.benchmark.BenchmarkSettingsForFlink 6 | import flink.benchmark.testutils.AggregatableFlowCollectSink 7 | import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration 8 | import org.apache.flink.streaming.api.TimeCharacteristic 9 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 10 | import org.apache.flink.streaming.api.functions.source.SourceFunction 11 | import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext 12 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} 13 | import org.apache.flink.streaming.api.watermark.Watermark 14 | import org.apache.flink.test.util.MiniClusterWithClientResource 15 | import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} 16 | 17 | import scala.collection.JavaConverters._ 18 | 19 | class NonIncrementalWindowAfterParsingStageTest extends FlatSpec with Matchers with BeforeAndAfter { 20 | val overrides: Map[String, Any] = Map("general.last.stage" -> "6", 21 | "general.window.after.parsing.window.duration" -> 300000, 22 | "general.window.after.parsing.slide.duration" -> 60000) 23 | val settings = new BenchmarkSettingsForFlink(overrides) 24 | 25 | val flinkCluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder() 26 | .setNumberSlotsPerTaskManager(1) 27 | .setNumberTaskManagers(1) 28 | .build()) 29 | 30 | before { 31 | flinkCluster.before() 32 | } 33 | 34 | after { 35 | flinkCluster.after() 36 | } 37 | 38 | "window after parsing stage " should " have produce correct output " in { 39 | val env = StreamExecutionEnvironment.getExecutionEnvironment 40 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 41 | 42 | val source1 = env.addSource(new SourceFunction[FlowObservation]() { 43 | override def run(ctx: SourceContext[FlowObservation]) { 44 | TestObservations.flowObservationsAfterParsingStage.foreach { next => 45 | next.foreach { case (_: String, obs: FlowObservation) => 46 | ctx.collectWithTimestamp(obs, obs.publishTimestamp) 47 | ctx.emitWatermark(new Watermark(obs.publishTimestamp-50)) 48 | } 49 | } 50 | ctx.close() 51 | } 52 | override def cancel(): Unit = () 53 | }) 54 | 55 | val statefulStages = new StatefulStages(settings) 56 | val aggregatedStream = statefulStages.nonIncrementalWindowAfterParsingStage(source1) 57 | aggregatedStream.addSink(new AggregatableFlowCollectSink()) 58 | 59 | env.execute("window-after-parsing-stage-test") 60 | 61 | val expectedResult = TestObservations.outputWindowAfterParsingStage.flatten.map(_._2) 62 | .sortBy { f: AggregatableFlowObservation => (f.measurementId, f.publishTimestamp) } 63 | 64 | val collectedValues = AggregatableFlowCollectSink.values.asScala 65 | .sortBy { f: AggregatableFlowObservation => (f.measurementId, f.publishTimestamp) } 66 | 67 | collectedValues should contain allElementsOf(expectedResult) 68 | } 69 | } 70 | 71 | -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/test/scala/flink/benchmark/stages/ReduceWindowAfterParsingStageTest.scala: -------------------------------------------------------------------------------- 1 | package flink.benchmark.stages 2 | 3 | import common.benchmark.{AggregatableFlowObservation, FlowObservation} 4 | import common.utils.TestObservations 5 | import flink.benchmark.BenchmarkSettingsForFlink 6 | import flink.benchmark.testutils.AggregatableFlowCollectSink 7 | import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration 8 | import org.apache.flink.streaming.api.TimeCharacteristic 9 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 10 | import org.apache.flink.streaming.api.functions.source.SourceFunction 11 | import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext 12 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} 13 | import org.apache.flink.streaming.api.watermark.Watermark 14 | import org.apache.flink.test.util.MiniClusterWithClientResource 15 | import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} 16 | 17 | import scala.collection.JavaConverters._ 18 | 19 | 20 | class ReduceWindowAfterParsingStageTest extends FlatSpec with Matchers with BeforeAndAfter { 21 | val overrides: Map[String, Any] = Map("general.last.stage" -> "5", 22 | "general.window.after.parsing.window.duration" -> 300000, 23 | "general.window.after.parsing.slide.duration" -> 60000) 24 | val settings = new BenchmarkSettingsForFlink(overrides) 25 | 26 | val flinkCluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder() 27 | .setNumberSlotsPerTaskManager(1) 28 | .setNumberTaskManagers(1) 29 | .build()) 30 | 31 | before { 32 | flinkCluster.before() 33 | } 34 | 35 | after { 36 | flinkCluster.after() 37 | } 38 | 39 | "window after parsing stage " should " have produce correct output " in { 40 | val env = StreamExecutionEnvironment.getExecutionEnvironment 41 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 42 | 43 | val source1 = env.addSource(new SourceFunction[FlowObservation]() { 44 | override def run(ctx: SourceContext[FlowObservation]) { 45 | TestObservations.flowObservationsAfterParsingStage.foreach { next => 46 | next.foreach { case (_: String, obs: FlowObservation) => 47 | ctx.collectWithTimestamp(obs, obs.publishTimestamp) 48 | ctx.emitWatermark(new Watermark(obs.publishTimestamp-50)) 49 | } 50 | } 51 | ctx.close() 52 | } 53 | override def cancel(): Unit = () 54 | }) 55 | 56 | val statefulStages = new StatefulStages(settings) 57 | val aggregatedStream = statefulStages.reduceWindowAfterParsingStage(source1) 58 | aggregatedStream.addSink(new AggregatableFlowCollectSink()) 59 | 60 | env.execute("window-after-parsing-stage-test") 61 | 62 | val expectedResult = TestObservations.outputWindowAfterParsingStage.flatten.map(_._2) 63 | .sortBy { f: AggregatableFlowObservation => (f.measurementId, f.publishTimestamp) } 64 | 65 | val collectedValues = AggregatableFlowCollectSink.values.asScala 66 | .sortBy { f: AggregatableFlowObservation => (f.measurementId, f.publishTimestamp) } 67 | 68 | collectedValues should contain allElementsOf(expectedResult) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/test/scala/flink/benchmark/stages/RelativeChangeStageTest.scala: -------------------------------------------------------------------------------- 1 | package flink.benchmark.stages 2 | 3 | import common.benchmark.{AggregatableObservation, RelativeChangeObservation} 4 | import common.utils.TestObservations 5 | import flink.benchmark.BenchmarkSettingsForFlink 6 | import flink.benchmark.testutils.RelativeChangeCollectSink 7 | import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration 8 | import org.apache.flink.streaming.api.TimeCharacteristic 9 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 10 | import org.apache.flink.streaming.api.functions.source.SourceFunction 11 | import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext 12 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} 13 | import org.apache.flink.streaming.api.watermark.Watermark 14 | import org.apache.flink.test.util.MiniClusterWithClientResource 15 | import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} 16 | 17 | class RelativeChangeStageTest extends FlatSpec with Matchers with BeforeAndAfter { 18 | val settings = new BenchmarkSettingsForFlink 19 | 20 | val flinkCluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder() 21 | .setNumberSlotsPerTaskManager(1) 22 | .setNumberTaskManagers(1) 23 | .build()) 24 | 25 | before { 26 | flinkCluster.before() 27 | } 28 | 29 | after { 30 | flinkCluster.after() 31 | } 32 | 33 | "relative change stage" should " produce correct output" in { 34 | val env = StreamExecutionEnvironment.getExecutionEnvironment 35 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 36 | 37 | val source1 = env.addSource(new SourceFunction[AggregatableObservation]() { 38 | override def run(ctx: SourceContext[AggregatableObservation]) { 39 | TestObservations.observationsInputRelativeChangeStage.foreach { next => 40 | next.distinct.foreach { obs => 41 | ctx.collectWithTimestamp(obs, obs.publishTimestamp) 42 | ctx.emitWatermark(new Watermark(obs.publishTimestamp-50)) 43 | } 44 | } 45 | ctx.close() 46 | } 47 | override def cancel(): Unit = () 48 | }) 49 | 50 | val statefulStages = new StatefulStages(settings) 51 | statefulStages.slidingWindowAfterAggregationStage(source1) 52 | .addSink(new RelativeChangeCollectSink()) 53 | 54 | env.execute("relative-change-stage-test") 55 | 56 | val expectedResult = TestObservations.observationsAfterRelativeChangeStage 57 | .flatten.sortBy { f: RelativeChangeObservation => (f.measurementId, f.aggregatedObservation.publishTimestamp) } 58 | 59 | RelativeChangeCollectSink.values should contain allElementsOf(expectedResult) 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /benchmark/flink-benchmark/src/test/scala/flink/benchmark/testutils/CollectSinks.scala: -------------------------------------------------------------------------------- 1 | package flink.benchmark.testutils 2 | 3 | import common.benchmark.{AggregatableFlowObservation, AggregatableObservation, RelativeChangeObservation} 4 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 5 | 6 | // create a testing sink 7 | class AggregatableObservationCollectSink extends SinkFunction[AggregatableObservation] { 8 | 9 | override def invoke(value: AggregatableObservation, context: SinkFunction.Context[_]): Unit = { 10 | synchronized { 11 | AggregatableObservationCollectSink.values.add(value) 12 | } 13 | } 14 | } 15 | 16 | object AggregatableObservationCollectSink { 17 | // must be static 18 | val values: java.util.List[AggregatableObservation] = new java.util.ArrayList() 19 | } 20 | 21 | // create a testing sink 22 | class AggregatableFlowCollectSink extends SinkFunction[AggregatableFlowObservation] { 23 | 24 | override def invoke(value: AggregatableFlowObservation, context: SinkFunction.Context[_]): Unit = { 25 | synchronized { 26 | AggregatableFlowCollectSink.values.add(value) 27 | } 28 | } 29 | } 30 | 31 | object AggregatableFlowCollectSink { 32 | // must be static 33 | val values: java.util.List[AggregatableFlowObservation] = new java.util.ArrayList() 34 | } 35 | 36 | // create a testing sink 37 | class RelativeChangeCollectSink extends SinkFunction[RelativeChangeObservation] { 38 | 39 | override def invoke(value: RelativeChangeObservation, context: SinkFunction.Context[_]): Unit = { 40 | synchronized { 41 | RelativeChangeCollectSink.values.add(value) 42 | } 43 | } 44 | } 45 | 46 | object RelativeChangeCollectSink { 47 | // must be static 48 | val values: java.util.List[RelativeChangeObservation] = new java.util.ArrayList() 49 | } -------------------------------------------------------------------------------- /benchmark/kafka-benchmark/src/main/resources/kafkastreams.conf: -------------------------------------------------------------------------------- 1 | include "general.conf" 2 | 3 | kafkastreams { 4 | 5 | latency-constant-rate { 6 | memory = 20 7 | batch.size = 16384 8 | buffer.memory.bytes = 33554432 9 | cache.max.bytes.buffering = 10485760 10 | compression.type.config = "none" 11 | exactly.once = false 12 | fetch.min.bytes = 1 13 | grace.period.ms = 5000 14 | linger.ms = 0 15 | max.task.idle.ms = "0" 16 | } 17 | 18 | constant-rate { 19 | memory = 20 20 | batch.size = 204800 21 | buffer.memory.bytes = 33554432 22 | cache.max.bytes.buffering = 536870912 23 | compression.type.config = "lz4" 24 | exactly.once = false 25 | fetch.min.bytes = 10240 26 | grace.period.ms = 1000 27 | linger.ms = 100 28 | max.task.idle.ms = "0" 29 | } 30 | 31 | periodic-burst { 32 | memory = 20 33 | batch.size = 204800 34 | buffer.memory.bytes = 33554432 35 | cache.max.bytes.buffering = 10485760 36 | compression.type.config = "lz4" 37 | exactly.once = false 38 | fetch.min.bytes = 1 39 | grace.period.ms = 5000 40 | linger.ms = 100 41 | max.task.idle.ms = "0" 42 | } 43 | 44 | single-burst { 45 | memory = 20 46 | batch.size = 204800 47 | buffer.memory.bytes = 33554432 48 | cache.max.bytes.buffering = 10485760 49 | compression.type.config = "lz4" 50 | exactly.once = false 51 | fetch.min.bytes = 1 52 | grace.period.ms = 90000 53 | linger.ms = 100 54 | max.task.idle.ms = "300000" 55 | } 56 | 57 | worker-failure { 58 | memory = 20 59 | batch.size = 204800 60 | buffer.memory.bytes = 33554432 61 | cache.max.bytes.buffering = 10485760 62 | compression.type.config = "lz4" 63 | exactly.once = true 64 | fetch.min.bytes = 1 65 | grace.period.ms = 30000 66 | linger.ms = 100 67 | max.task.idle.ms = "300000" 68 | } 69 | 70 | faulty-event { 71 | memory = 20 72 | batch.size = 204800 73 | buffer.memory.bytes = 33554432 74 | cache.max.bytes.buffering = 10485760 75 | compression.type.config = "lz4" 76 | exactly.once = false 77 | fetch.min.bytes = 1 78 | grace.period.ms = 5000 79 | linger.ms = 100 80 | max.task.idle.ms = "0" 81 | } 82 | } -------------------------------------------------------------------------------- /benchmark/kafka-benchmark/src/main/scala/kafka/benchmark/stages/CustomObjectSerdes.scala: -------------------------------------------------------------------------------- 1 | package kafka.benchmark.stages 2 | 3 | import common.benchmark._ 4 | import org.apache.kafka.common.serialization.{Serde, Serdes} 5 | import org.apache.kafka.streams.kstream.{TimeWindowedDeserializer, TimeWindowedSerializer, Windowed} 6 | 7 | object CustomObjectSerdes { 8 | //Serializers and Deserializers for Kafka 9 | implicit val StringSerde: Serde[String] = Serdes.String() 10 | implicit val FlowObservationSerde: Serde[FlowObservation] = Serdes.serdeFrom(new FlowSerializer, new FlowDeserializer) 11 | implicit val SpeedObservationSerde: Serde[SpeedObservation] = Serdes.serdeFrom(new SpeedSerializer, new SpeedDeserializer) 12 | implicit val AggregatableObservationSerde: Serde[AggregatableObservation] = Serdes.serdeFrom(new AggregatableObservationSerializer, new AggregatableObservationDeserializer) 13 | implicit val AggregatableFlowObservationSerde: Serde[AggregatableFlowObservation] = Serdes.serdeFrom(new AggregatableFlowSerializer, new AggregatableFlowDeserializer) 14 | implicit val AggregatableFlowObservationListSerde: Serde[List[AggregatableFlowObservation]] = Serdes.serdeFrom(new AggregatableFlowListSerializer, new AggregatableFlowListDeserializer) 15 | implicit val AggregatedObservationListSerde: Serde[List[AggregatableObservation]] = Serdes.serdeFrom(new AggregationListSerializer, new AggregationListDeserializer) 16 | implicit val RelativeChangeObservationSerde: Serde[RelativeChangeObservation] = Serdes.serdeFrom(new RelativeChangeSerializer, new RelativeChangeDeserializer) 17 | implicit val WindowedStringSerde: Serde[Windowed[String]] = Serdes.serdeFrom(new TimeWindowedSerializer[String](), new TimeWindowedDeserializer[String]()) 18 | } 19 | -------------------------------------------------------------------------------- /benchmark/kafka-benchmark/src/test/scala/kafka/benchmark/stages/ReduceWindowAfterParsingStageTest.scala: -------------------------------------------------------------------------------- 1 | package kafka.benchmark.stages 2 | 3 | import java.util.Properties 4 | 5 | import common.benchmark.{AggregatableFlowObservation, FlowObservation} 6 | import common.utils.TestObservations 7 | import kafka.benchmark.{BenchmarkSettingsForKafkaStreams, KafkaTrafficAnalyzer} 8 | import org.apache.kafka.common.serialization._ 9 | import org.apache.kafka.streams.{KeyValue, TopologyTestDriver} 10 | import org.apache.kafka.streams.kstream._ 11 | import org.apache.kafka.streams.scala.StreamsBuilder 12 | import org.apache.kafka.streams.scala.kstream.Consumed 13 | import org.scalatest._ 14 | import org.slf4j.{Logger, LoggerFactory} 15 | 16 | import collection.JavaConverters._ 17 | 18 | 19 | class ReduceWindowAfterParsingStageTest extends FlatSpec with Matchers { 20 | val logger: Logger = LoggerFactory.getLogger(this.getClass) 21 | 22 | val overrides: Map[String, Any] = Map("general.last.stage" -> "5", 23 | "general.window.after.parsing.window.duration" -> 300000, 24 | "general.window.after.parsing.slide.duration" -> 60000) 25 | val settings: BenchmarkSettingsForKafkaStreams = new BenchmarkSettingsForKafkaStreams(overrides) 26 | 27 | val props: Properties = KafkaTrafficAnalyzer.initKafka(settings) 28 | val builder = new StreamsBuilder() 29 | 30 | val statefulStages = new StatefulStages(settings) 31 | val expectedOutput: Seq[AggregatableFlowObservation] = TestObservations.outputWindowAfterParsingStage.flatten 32 | .map(_._2) 33 | .sortBy { f: AggregatableFlowObservation => (f.measurementId, f.publishTimestamp) } 34 | 35 | "window after parsing stage" should " produce correct output" in { 36 | val inputStream = builder.stream("input-topic")(Consumed.`with`(CustomObjectSerdes.StringSerde, CustomObjectSerdes.FlowObservationSerde)) 37 | 38 | statefulStages.reduceWindowAfterParsingStage(inputStream) 39 | .map[String, AggregatableFlowObservation] { (key: Windowed[String], obs: AggregatableFlowObservation) => (obs.measurementId, obs) } 40 | .to("output-topic")(Produced.`with`(CustomObjectSerdes.StringSerde, CustomObjectSerdes.AggregatableFlowObservationSerde)) 41 | 42 | val topology = builder.build() 43 | val topologyTestDriver = new TopologyTestDriver(topology, props) 44 | 45 | val inputTopic = topologyTestDriver.createInputTopic[String, FlowObservation]("input-topic", new StringSerializer, new FlowSerializer) 46 | val outputTopic = topologyTestDriver.createOutputTopic[String, AggregatableFlowObservation]("output-topic", new StringDeserializer, new AggregatableFlowDeserializer) 47 | 48 | TestObservations.flowObservationsAfterParsingStage 49 | .foreach { next => 50 | next.foreach{ obs => 51 | inputTopic.pipeInput(obs._1, obs._2, obs._2.publishTimestamp) 52 | } 53 | } 54 | val myOutputList = outputTopic.readValuesToList().asScala 55 | .sortBy { f: AggregatableFlowObservation => (f.measurementId, f.publishTimestamp) } 56 | 57 | println(myOutputList.mkString("\n")) 58 | myOutputList should contain allElementsOf expectedOutput 59 | topologyTestDriver.close() 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /benchmark/kafka-benchmark/src/test/scala/kafka/benchmark/stages/SerdeTest.scala: -------------------------------------------------------------------------------- 1 | package kafka.benchmark.stages 2 | 3 | import common.benchmark.{FlowObservation, SpeedObservation} 4 | import org.scalatest.{Inside, Matchers, WordSpec} 5 | 6 | class SerdeTest extends WordSpec with Matchers with Inside { 7 | 8 | "serializing and deserializing " should { 9 | "result in the same speed observation" in { 10 | val speedObs = SpeedObservation("GEO02_PNHTI532r", "lane1", 1489584240000l, 52.431985, 4.64708, 95.0, 95, 2, "") 11 | 12 | val speedSerializer = new SpeedSerializer 13 | val speedDeserializer = new SpeedDeserializer 14 | 15 | val serializedSpeedObs = speedSerializer.serialize("topic-1", speedObs) 16 | val deserializedSpeedObs = speedDeserializer.deserialize("topic-1", serializedSpeedObs) 17 | 18 | inside(deserializedSpeedObs) { 19 | case speed: SpeedObservation => 20 | speed.measurementId should be("GEO02_PNHTI532r") 21 | speed.internalId should be("lane1") 22 | speed.publishTimestamp should be(1489584240000l) 23 | speed.latitude should be(52.431985) 24 | speed.longitude should be(4.64708) 25 | speed.speed should be(95.0) 26 | speed.accuracy should be(95) 27 | speed.numLanes should be(2) 28 | } 29 | } 30 | 31 | 32 | "result in the same flow observation" in { 33 | val flowObs = FlowObservation("GEO02_PNHTI532r", "lane1", 1489584240000l, 52.431985, 4.64708, 180, 60, 95, 2, "") 34 | 35 | val flowSerializer = new FlowSerializer 36 | val flowDeserializer = new FlowDeserializer 37 | 38 | val serializedFlowObs = flowSerializer.serialize("topic-1", flowObs) 39 | val deserializedFlowObs = flowDeserializer.deserialize("topic-1", serializedFlowObs) 40 | 41 | println(flowObs) 42 | println(deserializedFlowObs) 43 | 44 | inside(deserializedFlowObs) { 45 | case flow: FlowObservation => 46 | flow.measurementId should be("GEO02_PNHTI532r") 47 | flow.internalId should be("lane1") 48 | flow.publishTimestamp should be(1489584240000l) 49 | flow.latitude should be(52.431985) 50 | flow.longitude should be(4.64708) 51 | flow.flow should be(180) 52 | flow.period should be(60) 53 | flow.accuracy should be(95) 54 | flow.numLanes should be(2) 55 | } 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /benchmark/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.18 -------------------------------------------------------------------------------- /benchmark/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | logLevel := Level.Warn 2 | resolvers := Seq( 3 | "otto-bintray" at "https://dl.bintray.com/ottogroup/maven", 4 | "Sbt plugins" at "https://dl.bintray.com/sbt/sbt-plugin-releases" 5 | ) 6 | 7 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2") 8 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") 9 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.7.0") 10 | addSbtPlugin("com.typesafe.sbt" %% "sbt-native-packager" % "1.3.2") -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/main/resources/spark.conf: -------------------------------------------------------------------------------- 1 | spark { 2 | latency-constant-rate { 3 | spark.streaming.receiver.writeAheadLog.enable = false 4 | locality.wait = 10 5 | } 6 | 7 | constant-rate { 8 | spark.streaming.receiver.writeAheadLog.enable = false 9 | locality.wait = 0 10 | } 11 | 12 | periodic-burst { 13 | spark.streaming.receiver.writeAheadLog.enable = false 14 | locality.wait = 10 15 | } 16 | 17 | single-burst { 18 | spark.streaming.receiver.writeAheadLog.enable = false 19 | locality.wait = 10 20 | } 21 | 22 | worker-failure { 23 | streaming.batchInterval = 1000 24 | spark.streaming.receiver.writeAheadLog.enable = false 25 | locality.wait = 10 26 | } 27 | 28 | faulty-event { 29 | streaming.batchInterval = 100 30 | sql.streaming.minBatchesToRetain = 2 31 | spark.streaming.receiver.writeAheadLog.enable = false 32 | locality.wait = 10 33 | } 34 | 35 | master-failure { 36 | streaming.batchInterval = 1000 37 | spark.streaming.receiver.writeAheadLog.enable = true 38 | locality.wait = 10 39 | } 40 | } -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/main/scala/spark/benchmark/BenchmarkSettingsForSpark.scala: -------------------------------------------------------------------------------- 1 | package spark.benchmark 2 | 3 | import com.typesafe.config.{Config, ConfigFactory} 4 | import common.config.GeneralConfig 5 | import common.config.JobExecutionMode.{CONSTANT_RATE, LATENCY_CONSTANT_RATE} 6 | import common.config.LastStage.{NON_INCREMENTAL_WINDOW_WITHOUT_JOIN, REDUCE_WINDOW_WITHOUT_JOIN} 7 | 8 | import scala.collection.JavaConverters._ 9 | 10 | object BenchmarkSettingsForSpark { 11 | 12 | implicit class OptionValue(val value: Option[String]) extends AnyVal { 13 | def keyedWith(key: String): Option[(String, String)] = value.map(v => key -> v) 14 | } 15 | 16 | } 17 | 18 | class BenchmarkSettingsForSpark(overrides: Map[String, Any] = Map()) extends Serializable { 19 | 20 | val general = new GeneralConfig(overrides) 21 | 22 | object specific extends Serializable { 23 | private val sparkProperties: Config = ConfigFactory.load() 24 | .withFallback(ConfigFactory.parseMap(overrides.asJava)) 25 | .withFallback(ConfigFactory.load("spark.conf")) 26 | .getConfig("spark") 27 | .getConfig(general.mode.name) 28 | 29 | val checkpointDir: String = if (general.local) general.configProperties.getString("spark.checkpoint.dir") 30 | else "hdfs://" + general.hdfsActiveNameNode + "/checkpointDirSpark" + general.outputTopic + "/" 31 | 32 | val sparkMaster: String = general.configProperties.getString("spark.master") 33 | 34 | val batchInterval: Int = if (general.lastStage.value < 2 & general.mode.equals(LATENCY_CONSTANT_RATE)) 200 35 | else if (general.lastStage == REDUCE_WINDOW_WITHOUT_JOIN || general.lastStage == NON_INCREMENTAL_WINDOW_WITHOUT_JOIN) general.slideDurationMsOfWindowAfterParse 36 | else 1000 37 | 38 | val defaultParallelism: Int = general.configProperties.getInt("spark.default.parallelism") 39 | val sqlShufflePartitions: Int = general.configProperties.getInt("spark.sql.shuffle.partitions") 40 | val blockInterval: Int = Math.min(batchInterval/defaultParallelism, 50) 41 | val localityWait: Int = sparkProperties.getInt("locality.wait") 42 | val writeAheadLogEnabled: Boolean = sparkProperties.getBoolean("spark.streaming.receiver.writeAheadLog.enable") 43 | val jobProfileKey: String = general.mkJobProfileKey("spark", batchInterval) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/main/scala/spark/benchmark/stages/KafkaSinkForSpark.scala: -------------------------------------------------------------------------------- 1 | package spark.benchmark.stages 2 | 3 | import java.util.Properties 4 | 5 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} 6 | import org.apache.kafka.common.serialization.StringSerializer 7 | 8 | class KafkaSinkForSpark(producerFactory: () => KafkaProducer[String, String], topic: String) extends Serializable { 9 | lazy val producerForThisExecutor = producerFactory() 10 | 11 | def send( observation: (String,String)) = 12 | producerForThisExecutor.send(new ProducerRecord(topic, observation._1, observation._2)) 13 | 14 | } 15 | 16 | object KafkaSinkForSpark { 17 | def apply(bootstrapServers: String, outputTopic: String): KafkaSinkForSpark = { 18 | val producerFactory = () => { 19 | val kafkaProperties = new Properties() 20 | kafkaProperties.setProperty("bootstrap.servers", bootstrapServers) 21 | val producer = new KafkaProducer(kafkaProperties, new StringSerializer, new StringSerializer) 22 | sys.addShutdownHook { 23 | producer.close() 24 | } 25 | producer 26 | } 27 | new KafkaSinkForSpark(producerFactory, outputTopic) 28 | } 29 | } -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg %ex{0}%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/test/scala/spark/benchmark/stages/AggregationStageTest.scala: -------------------------------------------------------------------------------- 1 | 2 | package spark.benchmark.stages 3 | 4 | import com.holdenkarau.spark.testing.StreamingSuiteBase 5 | import common.utils.TestObservations 6 | import org.scalatest.FunSuite 7 | import spark.benchmark.BenchmarkSettingsForSpark 8 | import spark.benchmark.stages.StatefulStages 9 | 10 | /** 11 | * Test aggregation phase 12 | * 13 | * - Uses test observations of common-benchmark/src/test/scala/common/utils/TestObservations.scala 14 | **/ 15 | class AggregationStageTest extends FunSuite with StreamingSuiteBase { 16 | 17 | // Due to net.jpountz.lz4 version incompatibility we switch to snappy for the tests 18 | System.setProperty("spark.io.compression.codec", "snappy") 19 | 20 | // Execute test 21 | test("aggregate over lanes per measurement ID") { 22 | // Setup environment 23 | // Initialize Apache Spark 24 | val settings = new BenchmarkSettingsForSpark() 25 | // Test data which is the result of the join phase 26 | val testDataAfterJoinPhase = TestObservations.observationsAfterJoinStage.flatten 27 | 28 | // The expected result of the aggregation phase 29 | val expectedResultOfAggregation = TestObservations.observationsAfterAggregationStage.flatten 30 | 31 | val statefulStages = new StatefulStages(settings) 32 | // Aggregate over lanes per measurement ID 33 | testOperation(Seq(testDataAfterJoinPhase), statefulStages.aggregationAfterJoinStage _, Seq(expectedResultOfAggregation), ordered=false) 34 | 35 | } 36 | 37 | } 38 | 39 | -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/test/scala/spark/benchmark/stages/NonIncrementalWindowAfterParsingStageTest.scala: -------------------------------------------------------------------------------- 1 | package spark.benchmark.stages 2 | 3 | import com.holdenkarau.spark.testing.StreamingSuiteBase 4 | import common.utils.TestObservations 5 | import org.apache.spark.streaming.Duration 6 | import org.scalatest.FunSuite 7 | import spark.benchmark.BenchmarkSettingsForSpark 8 | 9 | class NonIncrementalWindowAfterParsingStageTest extends FunSuite with StreamingSuiteBase { 10 | 11 | override def batchDuration: Duration = Duration.apply(60000) 12 | 13 | // Due to net.jpountz.lz4 version incompatibility we switch to snappy for the tests 14 | System.setProperty("spark.io.compression.codec", "snappy") 15 | 16 | // Execute test 17 | test("aggregate over sliding window per measurement ID") { 18 | // Setup environment 19 | // Initialize Apache Spark 20 | // Set the window to the right size for the test data 21 | val overrides: Map[String, Any] = Map("general.last.stage" -> "5", 22 | "general.window.after.parsing.window.duration" -> 300000, 23 | "general.window.after.parsing.slide.duration" -> 60000) 24 | val settings = new BenchmarkSettingsForSpark(overrides) 25 | 26 | // Test data which is the result of the join phase 27 | val parsedFlowEvents= TestObservations.flowObservationsAfterParsingStage 28 | 29 | // The expected result of the aggregation phase 30 | // Due to the processing time semantics of Spark it will not output values when the key is not in the slide window 31 | // These are the four values at the end of the test data. The length of the window is 300 000 ms and the slide interval is 60 000 ms. 32 | // So the last four values are the results that actually come after the last real input batch. 33 | val expectedResultOfAggregation = TestObservations.outputWindowAfterParsingStage 34 | .dropRight(4) 35 | 36 | val statefulStages = new StatefulStages(settings) 37 | 38 | testOperation(parsedFlowEvents, statefulStages.nonIncrementalWindowAfterParsingStage _, expectedResultOfAggregation, ordered = false) 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/test/scala/spark/benchmark/stages/ReduceWindowAfterParsingStageTest.scala: -------------------------------------------------------------------------------- 1 | package spark.benchmark.stages 2 | 3 | import com.holdenkarau.spark.testing.StreamingSuiteBase 4 | import common.utils.TestObservations 5 | import org.apache.spark.streaming.Duration 6 | import org.scalatest.FunSuite 7 | import spark.benchmark.BenchmarkSettingsForSpark 8 | 9 | /** 10 | * Test aggregation phase 11 | * 12 | * - Uses test observations of common-benchmark/src/test/scala/common/utils/TestObservations.scala 13 | **/ 14 | class ReduceWindowAfterParsingStageTest extends FunSuite with StreamingSuiteBase { 15 | 16 | override def batchDuration: Duration = Duration.apply(60000) 17 | 18 | // Due to net.jpountz.lz4 version incompatibility we switch to snappy for the tests 19 | System.setProperty("spark.io.compression.codec", "snappy") 20 | 21 | // Execute test 22 | test("aggregate over sliding window per measurement ID") { 23 | // Setup environment 24 | // Initialize Apache Spark 25 | // Set the window to the right size for the test data 26 | val overrides: Map[String, Any] = Map("general.last.stage" -> "5", 27 | "general.window.after.parsing.window.duration" -> 300000, 28 | "general.window.after.parsing.slide.duration" -> 60000) 29 | val settings = new BenchmarkSettingsForSpark(overrides) 30 | 31 | // Test data which is the result of the join phase 32 | val parsedFlowEvents= TestObservations.flowObservationsAfterParsingStage 33 | 34 | // The expected result of the aggregation phase 35 | // Due to the processing time semantics of Spark it will not output values when the key is not in the slide window 36 | // These are the four values at the end of the test data. The length of the window is 300 000 ms and the slide interval is 60 000 ms. 37 | // So the last four values are the results that actually come after the last real input batch. 38 | val expectedResultOfAggregation = TestObservations.outputWindowAfterParsingStage 39 | .dropRight(4) 40 | 41 | val statefulStages = new StatefulStages(settings) 42 | 43 | testOperation(parsedFlowEvents, statefulStages.reduceWindowAfterParsingStage _, expectedResultOfAggregation, ordered = false) 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /benchmark/spark-benchmark/src/test/scala/spark/benchmark/stages/SlidingWindowAfterAggregationStageTest.scala: -------------------------------------------------------------------------------- 1 | 2 | package spark.benchmark.stages 3 | 4 | import com.holdenkarau.spark.testing.StreamingSuiteBase 5 | import common.benchmark.{AggregatableObservation, RelativeChangeObservation} 6 | import common.utils.TestObservations 7 | import org.scalatest.FunSuite 8 | import spark.benchmark.BenchmarkSettingsForSpark 9 | import spark.benchmark.stages.StatefulStages 10 | 11 | /** 12 | * Test relative change calculation phase 13 | * 14 | * - Uses test observations of common-benchmark/src/test/scala/common/utils/TestObservations.scala 15 | * */ 16 | class SlidingWindowAfterAggregationStageTest extends FunSuite with StreamingSuiteBase { 17 | 18 | // Due to net.jpountz.lz4 version incompatibility we switch to snappy for the tests 19 | System.setProperty("spark.io.compression.codec", "snappy") 20 | 21 | private val settings = new BenchmarkSettingsForSpark() 22 | val testDataAfterAggregationPhase = TestObservations.observationsInputRelativeChangeStage 23 | 24 | // For Spark only complete results will be returned, otherwise there will be too much output for the single burst workload 25 | // This is due to the lach of event time processing for Spark 26 | val expectedResultOfRelativeChangePhase: Seq[List[RelativeChangeObservation]] = TestObservations.observationsAfterRelativeChangeStage 27 | .map{ elements: List[RelativeChangeObservation] => elements.filter(el => el.shortDiff.isDefined & el.longDiff.isDefined)} 28 | 29 | // Execute test 30 | test("compute relative change per measurement ID"){ 31 | // Aggregate over lanes per measurement ID 32 | val statefulStages = new StatefulStages(settings) 33 | testOperation(testDataAfterAggregationPhase, statefulStages.slidingWindowAfterAggregationStage _, expectedResultOfRelativeChangePhase) 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /benchmark/structured-streaming-benchmark/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /benchmark/structured-streaming-benchmark/src/main/resources/structuredstreaming.conf: -------------------------------------------------------------------------------- 1 | structuredstreaming { 2 | latency-constant-rate { 3 | watermark.ms = 0 4 | spark.streaming.receiver.writeAheadLog.enable = false 5 | locality.wait = 100 6 | } 7 | 8 | constant-rate { 9 | watermark.ms = 50 10 | spark.streaming.receiver.writeAheadLog.enable = false 11 | locality.wait = 100 12 | } 13 | 14 | periodic-burst { 15 | watermark.ms = 50 16 | spark.streaming.receiver.writeAheadLog.enable = false 17 | locality.wait = 100 18 | } 19 | 20 | single-burst { 21 | watermark.ms = 50 22 | spark.streaming.receiver.writeAheadLog.enable = false 23 | locality.wait = 100 24 | } 25 | 26 | worker-failure { 27 | watermark.ms = 100 28 | spark.streaming.receiver.writeAheadLog.enable = true 29 | locality.wait = 20 30 | } 31 | 32 | faulty-event { 33 | watermark.ms = 100 34 | spark.streaming.receiver.writeAheadLog.enable = false 35 | locality.wait = 20 36 | } 37 | 38 | 39 | master-failure { 40 | watermark.ms = 100 41 | spark.streaming.receiver.writeAheadLog.enable = true 42 | locality.wait = 20 43 | } 44 | } -------------------------------------------------------------------------------- /benchmark/structured-streaming-benchmark/src/main/scala/structuredstreaming/benchmark/BenchmarkSettingsForStructuredStreaming.scala: -------------------------------------------------------------------------------- 1 | package structuredstreaming.benchmark 2 | 3 | import com.typesafe.config.{Config, ConfigFactory} 4 | import common.config.GeneralConfig 5 | import common.config.JobExecutionMode.{CONSTANT_RATE, LATENCY_CONSTANT_RATE} 6 | import common.config.LastStage.{NON_INCREMENTAL_WINDOW_WITHOUT_JOIN, REDUCE_WINDOW_WITHOUT_JOIN} 7 | import org.apache.spark.sql.streaming.Trigger 8 | 9 | import scala.collection.JavaConverters._ 10 | 11 | object BenchmarkSettingsForStructuredStreaming { 12 | 13 | implicit class OptionValue(val value: Option[String]) extends AnyVal { 14 | def keyedWith(key: String): Option[(String, String)] = value.map(v => key -> v) 15 | } 16 | 17 | } 18 | 19 | class BenchmarkSettingsForStructuredStreaming(overrides: Map[String, Any] = Map()) extends Serializable { 20 | 21 | val general = new GeneralConfig(overrides) 22 | 23 | object specific extends Serializable{ 24 | private val sparkProperties: Config = ConfigFactory.load() 25 | .withFallback(ConfigFactory.parseMap(overrides.asJava)) 26 | .withFallback(ConfigFactory.load("structuredstreaming.conf")) 27 | .getConfig("structuredstreaming") 28 | .getConfig(general.mode.name) 29 | 30 | val checkpointDir: String = if (general.local) general.configProperties.getString("spark.checkpoint.dir") 31 | else "hdfs://" + general.hdfsActiveNameNode + "/checkpointDirStructured" + general.outputTopic + "/" 32 | 33 | val defaultParallelism: Int = general.configProperties.getInt("spark.default.parallelism") 34 | val sqlShufflePartitions: Int = general.configProperties.getInt("spark.sql.shuffle.partitions") 35 | val blockInterval: Int = 1000/defaultParallelism 36 | val sparkMaster: String = general.configProperties.getString("spark.master") 37 | val watermarkMillis: Long = sparkProperties.getLong("watermark.ms") 38 | val localityWait: Int = sparkProperties.getInt("locality.wait") 39 | val writeAheadLogEnabled: Boolean = sparkProperties.getBoolean("spark.streaming.receiver.writeAheadLog.enable") 40 | 41 | val trigger: Trigger = if(general.lastStage == REDUCE_WINDOW_WITHOUT_JOIN || general.lastStage == NON_INCREMENTAL_WINDOW_WITHOUT_JOIN) Trigger.ProcessingTime(general.slideDurationMsOfWindowAfterParse) 42 | else Trigger.ProcessingTime(0) // processing as fast as possible 43 | 44 | val jobProfileKey: String = general.mkJobProfileKey("structuredstreaming", general.publishIntervalMillis) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /benchmark/structured-streaming-benchmark/src/main/scala/structuredstreaming/benchmark/stages/OutputUtils.scala: -------------------------------------------------------------------------------- 1 | package structuredstreaming.benchmark.stages 2 | 3 | import java.sql.Timestamp 4 | 5 | import org.apache.spark.sql.functions.udf 6 | import org.apache.spark.sql.streaming.Trigger 7 | import org.apache.spark.sql.{Dataset, SparkSession} 8 | import structuredstreaming.benchmark.BenchmarkSettingsForStructuredStreaming 9 | 10 | class OutputUtils(sparkSession: SparkSession, settings: BenchmarkSettingsForStructuredStreaming) { 11 | val timeUDF = udf((time: Timestamp) => time.getTime) 12 | 13 | val r = new scala.util.Random 14 | val randomLong: Long = r.nextLong() 15 | 16 | def writeToKafka(dataSet: Dataset[_], queryNbr: Int = 0, awaitTermination: Boolean = false): Unit = { 17 | val kafkaWriter = dataSet 18 | .writeStream 19 | .outputMode("append") 20 | .format("kafka") 21 | .option("kafka.bootstrap.servers", settings.general.kafkaBootstrapServers) 22 | .option("topic", settings.general.outputTopic) 23 | .option("checkpointLocation", settings.specific.checkpointDir) 24 | .trigger(settings.specific.trigger) 25 | .start() 26 | if (awaitTermination) kafkaWriter.awaitTermination() 27 | } 28 | 29 | def printToConsole(dataSet: Dataset[_], awaitTermination: Boolean = false): Unit = { 30 | val consolePrinter = dataSet 31 | .writeStream 32 | .outputMode("append") 33 | .format("console") 34 | .option("truncate", false) 35 | .trigger(settings.specific.trigger) 36 | .start() 37 | 38 | if (awaitTermination) consolePrinter.awaitTermination() 39 | } 40 | } 41 | 42 | case class KafkaOutputObservation(key: String, value: String) 43 | -------------------------------------------------------------------------------- /benchmark/structured-streaming-benchmark/src/test/scala/structuredstreaming.benchmark/stages/AggregationStageTest.scala: -------------------------------------------------------------------------------- 1 | package structuredstreaming.benchmark.stages 2 | 3 | import java.sql.Timestamp 4 | 5 | import com.holdenkarau.spark.testing.{DataFrameSuiteBase, RDDComparisons, StreamingSuiteBase} 6 | import common.benchmark.{FlowObservation, SpeedObservation} 7 | import common.utils.TestObservations 8 | import org.apache.spark.sql.functions._ 9 | import org.apache.spark.sql.types._ 10 | import org.scalatest.FunSuite 11 | import structuredstreaming.benchmark.BenchmarkSettingsForStructuredStreaming 12 | 13 | /** 14 | * Test aggregation phase 15 | * 16 | * - Uses test observations of common-benchmark/src/test/scala/common/utils/TestObservations.scala 17 | */ 18 | class AggregationStageTest extends FunSuite with DataFrameSuiteBase with StreamingSuiteBase with RDDComparisons { 19 | 20 | // Due to net.jpountz.lz4 version incompatibility we switch to snappy for the tests 21 | System.setProperty("spark.io.compression.codec", "snappy") 22 | 23 | import spark.implicits._ 24 | 25 | private val settings = new BenchmarkSettingsForStructuredStreaming 26 | 27 | test("aggregate over lanes per measurement ID") { 28 | // Aggregate over lanes per measurement ID 29 | val analytics = new StatefulStages(spark, settings) 30 | 31 | val inputAggregationStage = sc.parallelize(TestObservations.observationsAfterJoinStage.flatten) 32 | .map { case (key: String, (flowObservation: FlowObservation, speedObservation: SpeedObservation)) => 33 | (flowObservation.measurementId, flowObservation.internalId, new Timestamp(flowObservation.publishTimestamp), flowObservation.latitude, 34 | flowObservation.longitude, flowObservation.flow, flowObservation.period, flowObservation.accuracy, 35 | speedObservation.speed, speedObservation.accuracy, flowObservation.numLanes 36 | ) 37 | }.toDF("measurementId", "lanes", "publishTimestamp", "latitude", "longitude", "accumulatedFlow", "period", 38 | "flowAccuracy", "averageSpeed", "speedAccuracy", "numLanes") 39 | .withColumn("timestamp", date_trunc("second", col("publishTimestamp"))) 40 | 41 | val expectedOutputAggregationStage = TestObservations.observationsAfterAggregationStage.flatten 42 | .toDF() 43 | .withColumn("publishTimestamp", (col("publishTimestamp") / 1000).cast(TimestampType)) 44 | .orderBy("publishTimestamp", "measurementId", "lanes") 45 | .withColumn("lanes", sort_array(col("lanes"))) 46 | .drop("jobprofile") 47 | expectedOutputAggregationStage.show() 48 | 49 | val realOutputAggregationStage = analytics.aggregationAfterJoinStage(inputAggregationStage) 50 | .select("measurementId", "lanes", "publishTimestamp", "latitude", "longitude", "accumulatedFlow", 51 | "period", "flowAccuracy", "averageSpeed", "speedAccuracy", "numLanes") 52 | .orderBy("publishTimestamp", "measurementId", "lanes") 53 | .withColumn("lanes", sort_array(col("lanes"))) 54 | realOutputAggregationStage.show() 55 | 56 | assertRDDEquals(expectedOutputAggregationStage.rdd, realOutputAggregationStage.rdd) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /benchmark/structured-streaming-benchmark/src/test/scala/structuredstreaming.benchmark/stages/ReduceWindowAfterParsingStageTest.scala: -------------------------------------------------------------------------------- 1 | package structuredstreaming.benchmark.stages 2 | 3 | import java.sql.Timestamp 4 | 5 | import com.holdenkarau.spark.testing.{DataFrameSuiteBase, RDDComparisons, StreamingSuiteBase} 6 | import common.benchmark.AggregatableFlowObservation 7 | import common.utils.TestObservations 8 | import org.apache.spark.sql.functions.{col, date_trunc, udf} 9 | import org.apache.spark.sql.types.TimestampType 10 | import org.scalatest.FunSuite 11 | import structuredstreaming.benchmark.BenchmarkSettingsForStructuredStreaming 12 | 13 | class ReduceWindowAfterParsingStageTest extends FunSuite with DataFrameSuiteBase with StreamingSuiteBase with RDDComparisons { 14 | 15 | // Due to net.jpountz.lz4 version incompatibility we switch to snappy for the tests 16 | System.setProperty("spark.io.compression.codec", "snappy") 17 | 18 | import spark.implicits._ 19 | 20 | val overrides: Map[String, Any] = Map("general.last.stage" -> "5", 21 | "general.window.after.parsing.window.duration" -> 300000, 22 | "general.window.after.parsing.slide.duration" -> 60000) 23 | private val settings = new BenchmarkSettingsForStructuredStreaming(overrides) 24 | 25 | test("aggregate flow events per measurement ID") { 26 | // Aggregate over lanes per measurement ID 27 | val analytics = new StatefulStages(spark, settings) 28 | 29 | val getTimestamp = udf((flowTimestamp: Long) => new Timestamp(flowTimestamp)) 30 | val inputwindowAfterParsingStage = sc.parallelize(TestObservations.flowObservationsAfterParsingStage.flatten) 31 | .map(el => new AggregatableFlowObservation(el._2)) 32 | .toDF() 33 | .withColumn("publishTimestamp", getTimestamp(col("publishTimestamp"))) 34 | .withColumn("timestamp", date_trunc("second", col("publishTimestamp"))) 35 | inputwindowAfterParsingStage.show(100) 36 | 37 | val expectedOutputWindowAfterParsingStage = TestObservations.outputWindowAfterParsingStage.flatten.map(_._2) 38 | .toDF() 39 | .withColumn("publishTimestamp", (col("publishTimestamp") / 1000).cast(TimestampType)) 40 | .drop("jobProfile") 41 | .orderBy("publishTimestamp", "measurementId", "laneCount") 42 | expectedOutputWindowAfterParsingStage.show() 43 | 44 | val outputWindowAfterParsingStage = analytics.reduceWindowAfterParsingStage(inputwindowAfterParsingStage) 45 | .select("measurementId", "laneCount", "publishTimestamp", "latitude", "longitude", "accumulatedFlow", 46 | "period", "flowAccuracy", "numLanes") 47 | .orderBy("publishTimestamp", "measurementId", "laneCount") 48 | outputWindowAfterParsingStage.show() 49 | 50 | assertRDDEquals(expectedOutputWindowAfterParsingStage.rdd, outputWindowAfterParsingStage.rdd) 51 | } 52 | } -------------------------------------------------------------------------------- /data-stream-generator/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | /.idea/ 3 | /data-stream-generator.ipr 4 | /data-stream-generator.iws 5 | /project/target/ 6 | /project/project/ 7 | -------------------------------------------------------------------------------- /data-stream-generator/README.md: -------------------------------------------------------------------------------- 1 | # Data Stream Generator for OSPBench 2 | 3 | This repository contains code to run a data stream generator for the open stream processing benchmark. 4 | 5 | This generator can run in several modes: 6 | - constant rate: publishes data at a constant rate on the two Kafka topics. This mode is used for the following workloads: constant-rate, latency-constant-rate, worker-failure, master-failure 7 | - with periodic bursts: publishes data at a constant rate with periodic bursts every 10 seconds on two Kafka topics. 8 | - with single burst: publishes increased amount of data in the first five minutes of the run and afterwards, a smaller volume. 9 | - with faulty events: publishes a constant rate stream and publishes a faulty event after ten minutes. 10 | 11 | The original timestamps in the data are replaced by the current time. The generator publishes data at a faster pace (one minute original stream -> one second output stream) than the original stream. 12 | 13 | ## Running the data stream generator 14 | 15 | The following environment variables should be set: 16 | 17 | S3_ACCESS_KEY=xxx # S3 access key of data input 18 | S3_SECRET_KEY=xxx # S3 secret key of data input 19 | KAFKA_BOOTSTRAP_SERVERS=$(hostname -I | head -n1 | awk '{print $1\;}'):9092 # list of Kafka brokers in the form of "host1:port1,host2:port2" 20 | DATA_VOLUME=0 # inflation factor for the data 21 | MODE=constant-rate # data characteristics of the input stream (explained further) 22 | FLOWTOPIC=ndwflow # Kafka topic name for flow data 23 | SPEEDTOPIC=ndwspeed # Kafka topic name for speed data 24 | RUNS_LOCAL=true # whether we run locally or on a platform 25 | 26 | To run do: 27 | 28 | sbt compile 29 | sbt run -------------------------------------------------------------------------------- /data-stream-generator/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt.Keys.javaOptions 2 | 3 | name := "ospbench-data-stream-generator" 4 | 5 | version := "3.0" 6 | 7 | scalaVersion := "2.11.8" 8 | dockerBaseImage := "openjdk:8-jdk" 9 | val extJvmOpts = Seq( 10 | "-J-Xmx5g", 11 | "-J-Xms5g" 12 | ) 13 | 14 | libraryDependencies ++= Dependencies.rootDependencies 15 | 16 | assemblyMergeStrategy in assembly := { 17 | case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard 18 | case _ => MergeStrategy.first 19 | } 20 | mainClass in assembly := Some("ingest.StreamProducer") 21 | mainClass in(Compile, run) := Some("ingest.StreamProducer") 22 | 23 | // JVM options 24 | javaOptions in Universal ++= extJvmOpts 25 | javaOptions in Test ++= extJvmOpts 26 | // Docker configs 27 | javaOptions in Docker ++= extJvmOpts 28 | enablePlugins(JavaAppPackaging) 29 | 30 | 31 | -------------------------------------------------------------------------------- /data-stream-generator/project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Versions { 4 | val dropwizardMetrics = "3.2.2" 5 | val typeSafe = "1.3.1" 6 | val scalaBinary = "2.11" 7 | val kafka = "0.10.2.1" 8 | val logback = "1.2.2" 9 | val spark = "2.2.1" 10 | } 11 | 12 | object Dependencies { 13 | val sparkDependencies = Seq( 14 | "org.apache.spark" %% s"spark-core" % Versions.spark, 15 | "org.apache.spark" %% s"spark-sql" % Versions.spark 16 | ).map(_.exclude("org.slf4j", "slf4j-log4j12")) 17 | val rootDependencies: Seq[ModuleID] = Seq( 18 | "com.typesafe" % "config" % Versions.typeSafe, 19 | "io.dropwizard.metrics" % "metrics-core" % Versions.dropwizardMetrics, 20 | "org.apache.kafka" % s"kafka_${Versions.scalaBinary}" % Versions.kafka, 21 | "ch.qos.logback" % "logback-classic" % Versions.logback, 22 | "org.apache.hadoop" % "hadoop-aws" % "3.0.0-alpha2", 23 | "org.apache.hadoop" % "hadoop-hdfs" % "2.8.1" 24 | ).map(_.exclude("log4j", "log4j") 25 | .exclude("org.slf4j", "slf4j-log4j12").exclude("com.fasterxml.jackson.core", "jackson-core") 26 | .exclude("com.fasterxml.jackson.core", "jackson-annotations") 27 | .exclude("com.fasterxml.jackson.core", "jackson-databind")) ++ sparkDependencies 28 | } -------------------------------------------------------------------------------- /data-stream-generator/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.13 -------------------------------------------------------------------------------- /data-stream-generator/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | logLevel := Level.Warn 2 | resolvers := Seq( 3 | "otto-bintray" at "https://dl.bintray.com/ottogroup/maven", 4 | "Sbt plugins" at "https://dl.bintray.com/sbt/sbt-plugin-releases" 5 | 6 | ) 7 | 8 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2") 9 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") 10 | addSbtPlugin("com.typesafe.sbt" %% "sbt-native-packager" % "1.3.2") -------------------------------------------------------------------------------- /data-stream-generator/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /data-stream-generator/src/main/resources/resources.conf: -------------------------------------------------------------------------------- 1 | general { 2 | run.local = "true" 3 | run.local = ${RUNS_LOCAL} 4 | mode = "constant-rate" 5 | mode = ${MODE} 6 | last.stage = 4 7 | last.stage = ${LAST_STAGE} 8 | data.volume = 1 9 | data.volume = ${DATA_VOLUME} 10 | publisher.nb = 1 11 | publisher.nb = ${PUBLISHER_NB} 12 | 13 | local.path = "./src/main/resources/data/time*/*" 14 | } 15 | 16 | kafka { 17 | bootstrap.servers = ${KAFKA_BOOTSTRAP_SERVERS} 18 | 19 | flow.topic = "ndwflow" 20 | flow.topic = ${FLOWTOPIC} 21 | speed.topic = "ndwspeed" 22 | speed.topic = ${SPEEDTOPIC} 23 | } 24 | 25 | aws.s3 { 26 | access.key = ${S3_ACCESS_KEY} 27 | secret.key = ${S3_SECRET_KEY} 28 | path = ${INPUT_DATA_PATH} 29 | } -------------------------------------------------------------------------------- /data-stream-generator/src/main/scala/ingest/ConfigUtils.scala: -------------------------------------------------------------------------------- 1 | package ingest 2 | 3 | import com.typesafe.config.{Config, ConfigFactory} 4 | 5 | import scala.util.Try 6 | 7 | object ConfigUtils extends Serializable { 8 | val configProperties: Config = ConfigFactory.load("resources.conf") 9 | configProperties.resolve() 10 | 11 | val generalConfigProps: Config = configProperties.getConfig("general") 12 | generalConfigProps.resolve() 13 | 14 | // General settings 15 | val local: Boolean = generalConfigProps.getString("run.local").equals("true") 16 | val mode: String = generalConfigProps.getString("mode") 17 | val lastStage: Int = generalConfigProps.getInt("last.stage").toInt 18 | val localPath: String = generalConfigProps.getString("local.path") 19 | val dataVolume: Int = generalConfigProps.getInt("data.volume") 20 | val publisherNb: String = generalConfigProps.getString("publisher.nb") 21 | 22 | // Kafka settings 23 | val kafkaBootstrapServers: String = configProperties.getString("kafka.bootstrap.servers") 24 | val flowTopic: String = configProperties.getString("kafka.flow.topic") 25 | val speedTopic: String = configProperties.getString("kafka.speed.topic") 26 | 27 | // AWS settings 28 | val s3Path: String = configProperties.getString("aws.s3.path") +"/time*.txt/part-00000-*.txt" 29 | val s3AccessKey: String = configProperties.getString("aws.s3.access.key") 30 | val s3SecretKey: String = configProperties.getString("aws.s3.secret.key") 31 | } 32 | -------------------------------------------------------------------------------- /data-stream-generator/src/main/scala/ingest/DataUtils.scala: -------------------------------------------------------------------------------- 1 | package ingest 2 | 3 | import java.sql.{Date, Timestamp} 4 | import java.text.{DateFormat, SimpleDateFormat} 5 | 6 | object DataUtils extends Serializable { 7 | 8 | def getTime(timeString: String): Long = { 9 | // Date format of data 10 | val dateFormat: DateFormat = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss") 11 | dateFormat.parse(timeString).getTime 12 | } 13 | 14 | def extractTimestamp(line: String): Long = getTime(line.split("\"timestamp\":\"")(1).substring(0, line.split("\"timestamp\":\"")(1).indexOf("\""))) 15 | 16 | def splitLineInKeyAndValue(line: String): (String, String) = { 17 | val splittedLine = line.split("=") 18 | (splittedLine(0), splittedLine(1).replace(" ", "")) 19 | } 20 | 21 | // we need to add the lane to the body to keep the same partitioning for input and output data and get correct latency meausurements 22 | def putLaneNumberInBody(key: String, message: String): (String, String) = { 23 | val indexOfLaneNumber = key.lastIndexOf("/lane") 24 | 25 | val lane = if(indexOfLaneNumber != -1) key.substring(indexOfLaneNumber + 1) else "UNKNOWN" 26 | 27 | val msg = s"""{"internalId": "$lane", ${message.substring(1)}""" 28 | 29 | (key.substring(0, indexOfLaneNumber), msg) 30 | } 31 | } 32 | 33 | case class Observation(timestamp: Long, key: String, message: String) extends Serializable { 34 | 35 | def replaceTimestampWithCurrentTimestamp(): Observation = { 36 | val timestampToReplace: String = message.split("\"timestamp\":\"")(1).substring(0, message.split("\"timestamp\":\"")(1).indexOf("\"")) 37 | 38 | val dateFormat: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") 39 | val currentTimeString: String = dateFormat.format(new Timestamp(1000 * Math.round(System.currentTimeMillis()/1000.0))) 40 | val newMsg = message.replaceFirst(timestampToReplace, currentTimeString) 41 | Observation(timestamp, key, newMsg) 42 | } 43 | } -------------------------------------------------------------------------------- /data-stream-generator/src/main/scala/ingest/Publisher.scala: -------------------------------------------------------------------------------- 1 | package ingest 2 | 3 | import java.util.concurrent.Executors 4 | 5 | import com.codahale.metrics.Meter 6 | import org.slf4j.LoggerFactory 7 | 8 | import scala.annotation.tailrec 9 | import scala.concurrent.{ExecutionContext, Future} 10 | 11 | trait Publisher extends Serializable { 12 | implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(8)) 13 | val logger = LoggerFactory.getLogger(getClass) 14 | 15 | // log some stats every 5 seconds 16 | private val statsLogger = Future { 17 | @tailrec def logStats(): Unit = { 18 | logger.info(f"ingest stats - flow (count:${flowStats.getCount}, rate:${flowStats.getOneMinuteRate}%.1f), speed (count:${speedStats.getCount}, rate:${speedStats.getOneMinuteRate}%.1f)") 19 | Thread.sleep(5000) 20 | logStats() 21 | } 22 | 23 | logStats() 24 | } 25 | 26 | val flowStats = new Meter() 27 | val speedStats = new Meter() 28 | def publish(index: Int): Future[Unit] 29 | } 30 | -------------------------------------------------------------------------------- /data-stream-generator/src/main/scala/ingest/StreamProducer.scala: -------------------------------------------------------------------------------- 1 | package ingest 2 | 3 | import java.util.Properties 4 | import java.util.concurrent.Executors 5 | 6 | import org.apache.spark.sql.SparkSession 7 | import org.slf4j.LoggerFactory 8 | 9 | import scala.concurrent.duration.Duration 10 | import scala.concurrent.{Await, ExecutionContext, Future} 11 | import scala.util.Try 12 | 13 | /** 14 | * Produces a stream on Kafka 15 | * Four possible configurations: 16 | * - single-burst: publishes all the messages onto Kafka as quickly as possible 17 | * - periodic-burst: publishes a load of messages each minute 18 | * - constant-rate: publishes a constant rate of messages (each 100ms) 19 | * - faulty-event: publishes a faulty event after a time period to make the job crash 20 | */ 21 | object StreamProducer extends App { 22 | val logger = LoggerFactory.getLogger(getClass) 23 | 24 | val sparkSession = SparkSession.builder 25 | .master("local[*]") 26 | .appName("ndw-publisher") 27 | .config("spark.driver.memory", "5g") 28 | .getOrCreate() 29 | 30 | val hadoopConf = sparkSession.sparkContext.hadoopConfiguration 31 | hadoopConf.set("fs.s3a.endpoint", "s3-eu-central-1.amazonaws.com") 32 | hadoopConf.set("fs.s3a.access.key", ConfigUtils.s3AccessKey) 33 | hadoopConf.set("fs.s3a.secret.key", ConfigUtils.s3SecretKey) 34 | 35 | val kafkaProperties = new Properties() 36 | kafkaProperties.setProperty("bootstrap.servers", ConfigUtils.kafkaBootstrapServers) 37 | kafkaProperties.setProperty("linger.ms", "20") 38 | // kafkaProperties.setProperty("batch.size", "8000") 39 | 40 | implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(3)) 41 | 42 | val publisherImpl: Publisher = { 43 | if (ConfigUtils.mode == "single-burst") { 44 | new SingleBurstPublisher(sparkSession, kafkaProperties) 45 | } else if (ConfigUtils.mode == "periodic-burst") { 46 | new PeriodicBurstPublisher(sparkSession, kafkaProperties) 47 | } else if (ConfigUtils.mode == "constant-rate" || ConfigUtils.mode == "latency-constant-rate" || ConfigUtils.mode == "worker-failure" || ConfigUtils.mode == "master-failure") { 48 | new ConstantRatePublisher(sparkSession, kafkaProperties) 49 | } else if (ConfigUtils.mode == "faulty-event") { 50 | new FaultyEventPublisher(sparkSession, kafkaProperties) 51 | } else { 52 | throw new RuntimeException(s"Unsupported app mode ${ConfigUtils.mode}.") 53 | } 54 | } 55 | val ndwPublishers = 0.to(2).map(index => publisherImpl.publish(index: Int)) 56 | 57 | // wait for all ingesters to complete 58 | Await.ready(Future.sequence(ndwPublishers), Duration.Inf) 59 | 60 | logger.info("END OF FILE") 61 | Thread.sleep(60000 * 3) 62 | 63 | System.exit(0) 64 | } -------------------------------------------------------------------------------- /deployment/.gitignore: -------------------------------------------------------------------------------- 1 | automation_scripts/AWS_ACCESS_KEY 2 | automation_scripts/AWS_SECRET_KEY 3 | automation_scripts/benchmark-jars-path 4 | automation_scripts/benchmark-jars-bucket 5 | automation_scripts/benchmark-results-path 6 | automation_scripts/benchmark-input-data-path 7 | automation_scripts/benchmark-metrics-path 8 | aws_marathon_files/spark-worker-without-env*.json 9 | aws_marathon_files/aws-publisher-without-env*.json 10 | aws_marathon_files/aws.conf 11 | aws_marathon_files/cadvisor-benchmark.json 12 | aws_marathon_files/flink-benchmark*.jar 13 | aws_marathon_files/flink-benchmark*.jar~ 14 | aws_marathon_files/flink-jobmanager.json 15 | aws_marathon_files/flink-taskmanager-without-env*.json 16 | aws_marathon_files/jmx-exporter-without-env.json 17 | aws_marathon_files/aws-kafka-brokers-2.1.json 18 | aws_marathon_files/spark-submit.json 19 | aws_marathon_files/kafka-thread-1.json 20 | aws_marathon_files/kafka-thread-2.json 21 | aws_marathon_files/kafka-thread-3.json 22 | aws_marathon_files/kafka-thread-4.json 23 | aws_marathon_files/kafka-thread-5.json 24 | -------------------------------------------------------------------------------- /deployment/automation_scripts/AWS_ACCESS_KEY.template: -------------------------------------------------------------------------------- 1 | put-access-key 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/AWS_SECRET_KEY.template: -------------------------------------------------------------------------------- 1 | put-secret-key 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/benchmark-input-data-path.template: -------------------------------------------------------------------------------- 1 | s3a://bucket/path/to/data 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/benchmark-jars-bucket.template: -------------------------------------------------------------------------------- 1 | bucketname 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/benchmark-jars-path.template: -------------------------------------------------------------------------------- 1 | https://s3.eu-central-1.amazonaws.com/bucketname 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/benchmark-metrics-path.template: -------------------------------------------------------------------------------- 1 | s3a://bucketname/path/to/metrics 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/benchmark-results-path.template: -------------------------------------------------------------------------------- 1 | s3a://bucketname/path/to/results 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/connect.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | DCOS_DNS_ADDRESS=$(aws cloudformation describe-stacks --region eu-west-1 --stack-name=streaming-benchmark | jq '.Stacks[0].Outputs | .[] | select(.Description=="Master") | .OutputValue' | awk '{print tolower($0)}') 4 | export DCOS_DNS_ADDRESS="http://${DCOS_DNS_ADDRESS//\"}" 5 | dcos config set core.dcos_url $DCOS_DNS_ADDRESS 6 | dcos auth login 7 | -------------------------------------------------------------------------------- /deployment/automation_scripts/create-kafka-topic.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export TOPICNAME=$1 3 | export NUM_PARTITIONS=${2:-20} # uses the input argument and otherwise 20 partitions 4 | export TIMESTAMP_TYPE=${3:-LogAppendTime} 5 | 6 | 7 | # configure Kafka manager 8 | # First find the node on which the Kafka manager is running 9 | KAFKA_BROKER_1=$(dcos task kafka-brokers | awk '{ print $2 }' | grep 10 | head -n1 | awk '{print $1;}') 10 | echo "KAFKA_BROKER_1=$KAFKA_BROKER_1" 11 | 12 | export KAFKA_BROKER_1_DOCKER_ID=$(ssh -oStrictHostKeyChecking=no core@$KAFKA_BROKER_1 docker ps | grep kafka-broker | awk '{print $1}') 13 | echo "KAFKA_BROKER_1_DOCKER_ID=$KAFKA_BROKER_1_DOCKER_ID" 14 | 15 | 16 | export TOPIC_CREATE_CMD="(/opt/kafka_2.11-2.1.0/bin/kafka-topics.sh --create --topic $TOPICNAME --partitions $NUM_PARTITIONS --replication-factor 1 --config message.timestamp.type=$TIMESTAMP_TYPE --zookeeper zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181/kafka)" 17 | export METRICS_TOPIC_CREATE_CMD="(/opt/kafka_2.11-2.1.0/bin/kafka-topics.sh --create --topic metrics-$TOPICNAME --partitions $NUM_PARTITIONS --replication-factor 1 --zookeeper zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181/kafka)" 18 | 19 | 20 | ssh -oStrictHostKeyChecking=no core@$KAFKA_BROKER_1 docker exec -i $KAFKA_BROKER_1_DOCKER_ID 'bash -c "'"$TOPIC_CREATE_CMD"'"' 21 | ssh -oStrictHostKeyChecking=no core@$KAFKA_BROKER_1 docker exec -i $KAFKA_BROKER_1_DOCKER_ID 'bash -c "'"$METRICS_TOPIC_CREATE_CMD"'"' 22 | -------------------------------------------------------------------------------- /deployment/automation_scripts/describe-kafka-topic.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export TOPICNAME=$1 3 | 4 | 5 | # configure Kafka manager 6 | # First find the node on which the Kafka manager is running 7 | KAFKA_BROKER_1=$(dcos task kafka-brokers | awk '{ print $2 }' | grep 10 | head -n1 | awk '{print $1;}') 8 | echo "KAFKA_BROKER_1=$KAFKA_BROKER_1" 9 | 10 | export KAFKA_BROKER_1_DOCKER_ID=$(ssh -oStrictHostKeyChecking=no core@$KAFKA_BROKER_1 docker ps | grep kafka-cluster | awk '{print $1}') 11 | echo "KAFKA_BROKER_1_DOCKER_ID=$KAFKA_BROKER_1_DOCKER_ID" 12 | 13 | 14 | export TOPIC_DESCRIBE_CMD="(/opt/kafka_2.11-2.0.0/bin/kafka-topics.sh --describe --topic $TOPICNAME --zookeeper zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181/kafka)" 15 | ssh -oStrictHostKeyChecking=no core@$KAFKA_BROKER_1 docker exec -i $KAFKA_BROKER_1_DOCKER_ID 'bash -c "'"$TOPIC_DESCRIBE_CMD"'"' 16 | -------------------------------------------------------------------------------- /deployment/automation_scripts/initiate-dcos-tunnel.sh: -------------------------------------------------------------------------------- 1 | gnome-terminal -e 'bash -c "sudo killall openvpn; ssh-add ~/.ssh/id_rsa_benchmark; sudo dcos auth login; sudo dcos package install tunnel-cli --cli --yes; sudo SSH_AUTH_SOCK=$SSH_AUTH_SOCK dcos tunnel --verbose vpn; bash"' 2 | -------------------------------------------------------------------------------- /deployment/automation_scripts/remove-flink-cluster.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export FLINK_SERVICES=$(dcos marathon app list | grep flink- | awk '{ print $1 }') 3 | 4 | for SERVICE in $FLINK_SERVICES 5 | do 6 | echo "dcos marathon app stop $SERVICE" 7 | echo "dcos marathon app remove $SERVICE" 8 | dcos marathon app stop $SERVICE 9 | dcos marathon app remove $SERVICE 10 | done 11 | -------------------------------------------------------------------------------- /deployment/automation_scripts/remove-jmx-and-publisher.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | dcos marathon app remove jmx-exporter 3 | 4 | export PUBLISHER_SERVICES=$(dcos marathon app list | grep /benchmark/ | awk '{ print $1 }') 5 | 6 | for SERVICE in $PUBLISHER_SERVICES 7 | do 8 | echo "dcos marathon app stop $SERVICE" 9 | echo "dcos marathon app remove $SERVICE" 10 | dcos marathon app stop $SERVICE 11 | dcos marathon app remove $SERVICE 12 | done 13 | -------------------------------------------------------------------------------- /deployment/automation_scripts/remove-kafka-streams-job.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export KAFKA_STREAMS_SERVICES=$(dcos marathon app list | grep kafka-streams/kafka-thread | awk '{ print $1 }') 3 | 4 | for SERVICE in $KAFKA_STREAMS_SERVICES 5 | do 6 | echo "dcos marathon app stop $SERVICE" 7 | echo "dcos marathon app remove $SERVICE" 8 | dcos marathon app stop $SERVICE 9 | dcos marathon app remove $SERVICE 10 | done 11 | -------------------------------------------------------------------------------- /deployment/automation_scripts/remove-spark-cluster.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export SPARK_SERVICES=$(dcos marathon app list | grep spark- | awk '{ print $1 }') 3 | 4 | for SERVICE in $SPARK_SERVICES 5 | do 6 | echo "dcos marathon app stop $SERVICE" 7 | echo "dcos marathon app remove $SERVICE" 8 | dcos marathon app stop $SERVICE 9 | dcos marathon app remove $SERVICE 10 | done 11 | -------------------------------------------------------------------------------- /deployment/automation_scripts/run-output-consumer.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export FRAMEWORK=$1 3 | export MODE=$2 4 | export JOBUUID=$3 5 | export AMT_WORKERS=${4:-5} 6 | export WORKER_CPU=${5:-4} 7 | export WORKER_MEM=${6:-20} 8 | export AWS_ACCESS_KEY=`cat AWS_ACCESS_KEY` 9 | export AWS_SECRET_KEY=`cat AWS_SECRET_KEY` 10 | export JAR_PATH=`cat benchmark-jars-path` 11 | export JAR_NAME=$JAR_PATH/benchmark-output-consumer-assembly-3.1.jar; echo "- JAR_NAME = $JAR_NAME" 12 | export OUTPUT_METRICS_PATH=`cat benchmark-metrics-path` 13 | 14 | # Requesting the values for the required environment variables for ndw publisher and output consumer 15 | ## get the Kafka brokers 16 | BOOTSTRAP_SERVER_LIST=($(dcos task kafka-brokers | awk '{ print $2 }' | grep 10)) 17 | BROKER_LIST_STRING="${BOOTSTRAP_SERVER_LIST[*]}" 18 | export KAFKA_BOOTSTRAP_SERVERS=$(echo "${BROKER_LIST_STRING//${IFS:0:1}/,}" | sed -E "s/([^,]+)/\1:10000/g") 19 | echo "Kafka bootstrap servers configured as: $KAFKA_BOOTSTRAP_SERVERS" 20 | 21 | export SPARK_MASTER_IP_ADDR=$(dcos task | grep spark-master | awk '{print $2}') 22 | echo "SPARK_MASTER_IP_ADDR=$SPARK_MASTER_IP_ADDR" 23 | export SPARK_MASTER_DOCKER_ID=$(ssh -oStrictHostKeyChecking=no core@$SPARK_MASTER_IP_ADDR docker ps | grep spark-master | awk '{print $1}') 24 | echo "SPARK_MASTER_DOCKER_ID=$SPARK_MASTER_DOCKER_ID" 25 | 26 | #driver host 27 | export SPARK_WORKER2_HOST=$(dcos task | grep spark-worker-2 | awk '{print $2}') 28 | 29 | echo "starting output consumer for $JOBUUID" 30 | # Submit output consumer 31 | export OUTPUT_CONSUMER_SUBMIT_JOB_CMD="(./output-consumer-submit-job.sh $FRAMEWORK $MODE $KAFKA_BOOTSTRAP_SERVERS $AWS_ACCESS_KEY $AWS_SECRET_KEY $JOBUUID $SPARK_WORKER2_HOST $AMT_WORKERS $WORKER_CPU $WORKER_MEM $JAR_NAME $OUTPUT_METRICS_PATH)" 32 | echo $OUTPUT_CONSUMER_SUBMIT_JOB_CMD 33 | ssh -oStrictHostKeyChecking=no core@$SPARK_MASTER_IP_ADDR docker exec -i $SPARK_MASTER_DOCKER_ID 'bash -c "'"$OUTPUT_CONSUMER_SUBMIT_JOB_CMD"'"' 34 | 35 | # stay in this script while the output consumer is running 36 | sleep 1m 37 | ACTIVE_DRIVER_ID=$(ssh -oStrictHostKeyChecking=no core@$SPARK_MASTER_IP_ADDR docker exec -i $SPARK_MASTER_DOCKER_ID 'bash -c "'"(./active-driver-check.sh $SPARK_MASTER_IP_ADDR)"'"') 38 | 39 | k=1 40 | while [[ $ACTIVE_DRIVER_ID == *"driver"* ]]; 41 | do 42 | sleep 1m 43 | k=$(( $k + 1 )) 44 | if ! ((k % 5)); then 45 | echo "output consumer running for $k minutes for $JOBUUID" 46 | fi 47 | ACTIVE_DRIVER_ID=$(ssh -oStrictHostKeyChecking=no core@$SPARK_MASTER_IP_ADDR docker exec -i $SPARK_MASTER_DOCKER_ID 'bash -c "'"(./active-driver-check.sh $SPARK_MASTER_IP_ADDR)"'"') 48 | done 49 | 50 | echo "output consumer finished in $k minutes" 51 | -------------------------------------------------------------------------------- /deployment/automation_scripts/scrape_metrics.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export DAY=4 3 | export DAY1=$($DAY+1) 4 | echo $DAY 5 | echo $DAY1 6 | export BEGINTIME=2020-06-0${DAY}T01:00:00Z 7 | export ENDTIME=2020-06-0${DAY1}T01:00:00Z 8 | export BEGINDATE=2020-06-04 9 | export ENDDATE=2020-06-05 10 | 11 | #export the network metrics of the run 12 | AWS_INSTANCES_WITH_ID=(i-07f1a3e6ce1f0ff21 i-0701daf9df9cbebc4 i-039066e4556bd23c6 i-0e919e070ab450d2d i-0a6205b0d14716f5c i-01154a7ed1f20809d i-00cb3cc924dbe239d i-0fa6a2d39d1d3c828 i-0ddfcf7e382ab6559 i-049037d7ecf57c959 i-0a29d4d232dc99e61 i-0c2c97b3413f86291 i-0a0d39e01a10f63b1 i-0f4727f9a9061d3f6 i-01a862689d37c8aab i-0620481f2701b876b i-0dab73255c8b499b0 i-0f3ccedd7c7632733 i-055c929f428ec5284 i-0a3c9807c4e69eb1a i-0de7c538fe8fb2c20 i-0673a3e0864e2e486 i-07f354d53ba51c473) 13 | METRICS_PATH="~/networkdata" 14 | for instance_id in "${AWS_INSTANCES_WITH_ID[@]}" 15 | do 16 | echo $(aws cloudwatch get-metric-statistics --metric-name NetworkIn --region eu-west-1 --start-time $BEGINTIME --end-time $ENDTIME --period 60 --statistics Average --namespace AWS/EC2 --dimensions Name=InstanceId,Value=$instance_id | jq '.["Datapoints"]') > "${BEGINDATE}_${ENDDATE}_${instance_id}_networkin_average.json" 17 | echo $(aws cloudwatch get-metric-statistics --metric-name NetworkOut --region eu-west-1 --start-time $BEGINTIME --end-time $ENDTIME --period 60 --statistics Average --namespace AWS/EC2 --dimensions Name=InstanceId,Value=$instance_id | jq '.["Datapoints"]') > "${BEGINDATE}_${ENDDATE}_${instance_id}_networkout_average.json" 18 | done 19 | -------------------------------------------------------------------------------- /deployment/automation_scripts/set-up-kafka-manager.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export KAFKA_PARTITIONS=${1:-20} 3 | # configure Kafka manager 4 | # First find the node on which the Kafka manager is running 5 | KAFKA_MANAGER_HOST=$(dcos task kafka-manager | awk '{ print $2 }' | grep 10) 6 | # Add Kafka cluster to Kafka manager 7 | curl "http://$KAFKA_MANAGER_HOST:9000/clusters" -H "Host: $KAFKA_MANAGER_HOST:9000" -H "Referer: http://$KAFKA_MANAGER_HOST:9000/addCluster" --data "name=benchmark&zkHosts=zk-1.zk%3A2181%2Czk-2.zk%3A2181%2Czk-3.zk%3A2181%2Czk-4.zk%3A2181%2Czk-5.zk%3A2181%2Fkafka&kafkaVersion=0.10.2.1&jmxEnabled=true&jmxUser=&jmxPass=&pollConsumers=true&tuning.brokerViewUpdatePeriodSeconds=30&tuning.clusterManagerThreadPoolSize=2&tuning.clusterManagerThreadPoolQueueSize=100&tuning.kafkaCommandThreadPoolSize=2&tuning.kafkaCommandThreadPoolQueueSize=100&tuning.logkafkaCommandThreadPoolSize=2&tuning.logkafkaCommandThreadPoolQueueSize=100&tuning.logkafkaUpdatePeriodSeconds=30&tuning.partitionOffsetCacheTimeoutSecs=5&tuning.brokerViewThreadPoolSize=4&tuning.brokerViewThreadPoolQueueSize=1000&tuning.offsetCacheThreadPoolSize=4&tuning.offsetCacheThreadPoolQueueSize=1000&tuning.kafkaAdminClientThreadPoolSize=4&tuning.kafkaAdminClientThreadPoolQueueSize=1000&securityProtocol=PLAINTEXT" 8 | 9 | # create all kafka topics 10 | ./create-kafka-topic.sh ndwflow $KAFKA_PARTITIONS CreateTime 11 | ./create-kafka-topic.sh ndwspeed $KAFKA_PARTITIONS CreateTime 12 | ./create-kafka-topic.sh ndwspeedburst $KAFKA_PARTITIONS CreateTime 13 | ./create-kafka-topic.sh ndwflowburst $KAFKA_PARTITIONS CreateTime 14 | ./create-kafka-topic.sh ndwflowburst $KAFKA_PARTITIONS CreateTime #used by Kafka streams non incremental window after parsing tumbling window 15 | ./create-kafka-topic.sh aggregation-data-topic $KAFKA_PARTITIONS CreateTime #used by Kafka streams custom tumbling window 16 | ./create-kafka-topic.sh relative-change-data-topic $KAFKA_PARTITIONS CreateTime #used by Kafka Streams custom sliding window 17 | ./create-kafka-topic.sh lane-aggregator-state-store $KAFKA_PARTITIONS CreateTime #used by Kafka streams custom tumbling window 18 | ./create-kafka-topic.sh relative-change-state-store $KAFKA_PARTITIONS CreateTime #used by Kafka Streams custom sliding window 19 | ./create-kafka-topic.sh flow-through-topic $KAFKA_PARTITIONS CreateTime #used by Kafka streams custom tumbling window 20 | ./create-kafka-topic.sh speed-through-topic $KAFKA_PARTITIONS CreateTime #used by Kafka Streams custom sliding window 21 | -------------------------------------------------------------------------------- /deployment/automation_scripts/start-flink-cluster-HA.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export AMT_WORKERS=${1:-5} 3 | export WORKER_CPU=${2:-4} 4 | export WORKER_MEM=${3:-20} 5 | export WORKER_MEM_MB=$(($WORKER_MEM*1024)) 6 | export DISK_MB=10240 7 | 8 | export SIZE="-standby" 9 | 10 | cd ../aws_marathon_files 11 | echo 'Starting Flink' 12 | envsubst < flink-jobmanager-with-env.json > flink-jobmanager.json 13 | dcos marathon app add flink-jobmanager.json 14 | sleep 20 15 | dcos marathon app add flink-jobmanager-standby.json 16 | echo 'Waiting for Flink jobmanager to start.' 17 | sleep 30 18 | echo 'Starting Flink taskmanagers' 19 | for TASKMANAGER_NB in $(seq 1 $AMT_WORKERS) 20 | do 21 | export TASKMANAGER_NB=$TASKMANAGER_NB 22 | envsubst < flink-taskmanager-with-env.json > flink-taskmanager-${TASKMANAGER_NB}.json 23 | dcos marathon app add flink-taskmanager-${TASKMANAGER_NB}.json 24 | done 25 | 26 | echo "will sleep 1 minutes for Flink to start up" 27 | sleep 60 28 | -------------------------------------------------------------------------------- /deployment/automation_scripts/start-flink-cluster.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export AMT_WORKERS=${1:-5} 3 | export WORKER_CPU=${2:-4} 4 | export WORKER_MEM=$(($WORKER_CPU*5)) 5 | export WORKER_MEM_MB=$(($WORKER_MEM*1024)) 6 | export DISK_MB=20480 7 | 8 | if [[ $WORKER_CPU == 1 ]]; then 9 | export SIZE="-smallest" 10 | elif [[ $WORKER_CPU == 2 ]]; then 11 | export SIZE="-small" 12 | elif [[ $WORKER_CPU == 6 ]]; then 13 | export SIZE="-large" 14 | else 15 | export SIZE="" 16 | fi 17 | echo "Size of the Flink cluster will be $SIZE" 18 | 19 | cd ../aws_marathon_files 20 | echo "Starting Flink with $AMT_WORKERS workers and $WORKER_CPU cpus" 21 | envsubst < flink-jobmanager-with-env.json > flink-jobmanager.json 22 | dcos marathon app add flink-jobmanager.json 23 | echo 'Waiting for Flink jobmanager to start.' 24 | sleep 30 25 | echo 'Starting Flink taskmanagers' 26 | for TASKMANAGER_NB in $(seq 1 $AMT_WORKERS) 27 | do 28 | export TASKMANAGER_NB=$TASKMANAGER_NB 29 | envsubst < flink-taskmanager-with-env.json > flink-taskmanager-without-env-${TASKMANAGER_NB}.json 30 | dcos marathon app add flink-taskmanager-without-env-${TASKMANAGER_NB}.json 31 | done 32 | 33 | echo "will sleep 1 minutes for Flink to start up" 34 | sleep 60 35 | -------------------------------------------------------------------------------- /deployment/automation_scripts/start-spark-cluster.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export AMT_WORKERS=${1:-5} 3 | export WORKER_CPU=${2:-4} 4 | export WORKER_MEM=$(($WORKER_CPU*5)) 5 | export WORKER_MEM_MB=$(($WORKER_MEM*1024)) 6 | export DISK_MB=20480 7 | 8 | cd ../aws_marathon_files 9 | echo 'Starting Spark' 10 | dcos marathon app add spark-master.json 11 | echo 'Waiting for Spark master to start.' 12 | sleep 20 13 | echo 'Starting Spark workers' 14 | 15 | for WORKER_NB in $(seq 1 $AMT_WORKERS) 16 | do 17 | export WORKER_NB=$WORKER_NB 18 | envsubst < spark-worker-with-env.json > spark-worker-without-env-$WORKER_NB.json 19 | dcos marathon app add spark-worker-without-env-$WORKER_NB.json 20 | done 21 | echo "will sleep 1 minutes for spark to start up" 22 | sleep 60 23 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/aws-influx-db.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/influxdb", 3 | "cmd": null, 4 | "cpus": 1, 5 | "mem": 2024, 6 | "disk": 0, 7 | "instances": 1, 8 | "container": { 9 | "type": "DOCKER", 10 | "docker": { 11 | "forcePullImage": true, 12 | "image": "influxdb:0.13", 13 | "parameters": [], 14 | "privileged": false 15 | }, 16 | "volumes": [ 17 | { 18 | "containerPath": "/data", 19 | "hostPath": "/tmp/influxdb", 20 | "mode": "RW" 21 | } 22 | ], 23 | "portMappings": [ 24 | { 25 | "containerPort": 8083, 26 | "hostPort": 8083, 27 | "labels": { 28 | "VIP_0": "/influxdb:8083" 29 | }, 30 | "name": "influxdb-admin", 31 | "protocol": "tcp", 32 | "servicePort": 8083 33 | }, 34 | { 35 | "containerPort": 8086, 36 | "hostPort": 8086, 37 | "labels": { 38 | "VIP_0": "/influxdb:8086", 39 | "VIP_1": "/influxdb:8086" 40 | }, 41 | "name": "influxdb-api", 42 | "protocol": "tcp", 43 | "servicePort": 8086 44 | } 45 | ] 46 | }, 47 | "env": { 48 | "PRE_CREATE_DB": "cadvisor", 49 | "INFLUXDB_ADMIN_ENABLED": "true" 50 | }, 51 | "networks": [ 52 | { 53 | "name": "dcos", 54 | "mode": "container" 55 | } 56 | ], 57 | "portDefinitions": [], 58 | "upgradeStrategy": { 59 | "maximumOverCapacity": 0, 60 | "minimumHealthCapacity": 0 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/aws-kafka-brokers-2.1-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/kafka/kafka-brokers", 3 | "acceptedResourceRoles": [ 4 | "*", 5 | "slave_public" 6 | ], 7 | "backoffFactor": 1.15, 8 | "backoffSeconds": 1, 9 | "container": { 10 | "type": "DOCKER", 11 | "volumes": [], 12 | "docker": { 13 | "image": "gisellevd/ospbench-kafka-broker:2.1.0", 14 | "forcePullImage": false, 15 | "privileged": true, 16 | "parameters": [] 17 | } 18 | }, 19 | "cpus": 4, 20 | "disk": 30000, 21 | "env": { 22 | "KAFKA_ZOOKEEPER_CONNECT": "zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181/kafka", 23 | "KAFKA_HEAP_OPTS":"-Xmx9G -Xms9G" 24 | }, 25 | "instances": $KAFKA_BROKER_COUNT, 26 | "maxLaunchDelaySeconds": 3600, 27 | "mem": 10240, 28 | "gpus": 0, 29 | "networks": [ 30 | { 31 | "mode": "host" 32 | } 33 | ], 34 | "portDefinitions": [ 35 | { 36 | "name": "kafka", 37 | "protocol": "udp,tcp", 38 | "port": 10000 39 | }, 40 | { 41 | "name": "jmx", 42 | "protocol": "udp,tcp", 43 | "port": 10001 44 | } 45 | ], 46 | "requirePorts": true, 47 | "upgradeStrategy": { 48 | "maximumOverCapacity": 1, 49 | "minimumHealthCapacity": 1 50 | }, 51 | "killSelection": "YOUNGEST_FIRST", 52 | "unreachableStrategy": { 53 | "inactiveAfterSeconds": 0, 54 | "expungeAfterSeconds": 0 55 | }, 56 | "healthChecks": [], 57 | "constraints": [] 58 | } 59 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/aws-publisher-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/benchmark/$MODE-publisher-$PUBLISHER_NB", 3 | "acceptedResourceRoles": [ 4 | "*", 5 | "slave_public" 6 | ], 7 | "backoffFactor": 1.15, 8 | "backoffSeconds": 1, 9 | "container": { 10 | "type": "DOCKER", 11 | "volumes": [], 12 | "docker": { 13 | "image": "gisellevd/ospbench-data-stream-generator:3.0", 14 | "forcePullImage": true, 15 | "privileged": false, 16 | "parameters": [] 17 | } 18 | }, 19 | "cpus": 3, 20 | "disk": 0, 21 | "env": { 22 | "PUBLISHER_NB": "$PUBLISHER_NB", 23 | "MODE": "$MODE", 24 | "LAST_STAGE": "$LAST_STAGE", 25 | "KAFKA_BOOTSTRAP_SERVERS": "$KAFKA_BOOTSTRAP_SERVERS", 26 | "DATA_VOLUME": "$VOLUME_PER_PUBLISHER", 27 | "FLOWTOPIC": "$FLOWTOPIC", 28 | "SPEEDTOPIC": "$SPEEDTOPIC", 29 | "NUM_PARTITIONS": "$NUM_PARTITIONS", 30 | "S3_ACCESS_KEY": "$AWS_ACCESS_KEY", 31 | "S3_SECRET_KEY": "$AWS_SECRET_KEY", 32 | "RUNS_LOCAL":"false", 33 | "INPUT_DATA_PATH": "$INPUT_DATA_PATH" 34 | }, 35 | "instances": 0, 36 | "maxLaunchDelaySeconds": 200, 37 | "mem": 6000, 38 | "gpus": 0, 39 | "networks": [ 40 | { 41 | "mode": "host" 42 | } 43 | ], 44 | "requirePorts": true, 45 | "upgradeStrategy": { 46 | "maximumOverCapacity": 1, 47 | "minimumHealthCapacity": 1 48 | }, 49 | "killSelection": "YOUNGEST_FIRST", 50 | "unreachableStrategy": { 51 | "inactiveAfterSeconds": 0, 52 | "expungeAfterSeconds": 0 53 | }, 54 | "healthChecks": [], 55 | "constraints": [] 56 | } 57 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/aws_with_env.conf: -------------------------------------------------------------------------------- 1 | #AWS CONFIG 2 | #_______________________________________________________________________________________________________________ 3 | environment { 4 | mode = "$MODE" 5 | is.running.in.docker = "false" 6 | } 7 | 8 | general { 9 | last.stage = "$LAST_STAGE" 10 | stream.source { 11 | volume = "$DATA_VOLUME" 12 | } 13 | buffer.timeout = "$BUFFER_TIMEOUT" 14 | } 15 | 16 | kafka { 17 | groupid = "ndwgroup" 18 | bootstrap.servers = "$KAFKA_BOOTSTRAP_SERVERS" 19 | zookeeper.server = "zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181" 20 | output.topic = "$TOPICNAME" 21 | flow.topic: "$FLOWTOPIC", 22 | speed.topic: "$SPEEDTOPIC", 23 | auto.offset.reset.strategy = "$KAFKA_AUTO_OFFSET_RESET_STRATEGY" 24 | } 25 | 26 | hdfs { 27 | active.name.node = "$ACTIVE_HDFS_NAME_NODE" 28 | } 29 | 30 | monitoring { 31 | graphite.host = "localhost" 32 | graphite.port = 2003 33 | print.output = false 34 | } 35 | 36 | spark { 37 | master = "spark://spark-master.marathon.mesos:7077" 38 | checkpoint.dir = "/checkpointdir/" 39 | } 40 | 41 | storm { 42 | workers = 4 43 | } 44 | 45 | flink { 46 | checkpoint.dir = "hdfs://$ACTIVE_HDFS_NAME_NODE/checkpointDirStructured" 47 | partitions = "$NUM_PARTITIONS" 48 | } 49 | 50 | kafkastreams { 51 | checkpoint.dir = "./kafka-logs/" 52 | } 53 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/cadvisor-benchmark-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/cadvisor", 3 | "cmd": "/usr/bin/cadvisor -storage_driver=influxdb -storage_driver_db=cadvisor -storage_driver_host=influxdb.marathon.l4lb.thisdcos.directory:8086", 4 | "cpus": 0.25, 5 | "mem": 256, 6 | "disk": 0, 7 | "instances": $AMT_SLAVES, 8 | "constraints": [ 9 | [ 10 | "hostname", 11 | "UNIQUE" 12 | ] 13 | ], 14 | "acceptedResourceRoles": [ 15 | "*", 16 | "slave_public" 17 | ], 18 | "container": { 19 | "type": "DOCKER", 20 | "docker": { 21 | "forcePullImage": true, 22 | "image": "gisellevd/ospbench-cadvisor:1.0", 23 | "parameters": [], 24 | "privileged": true 25 | }, 26 | "volumes": [ 27 | { 28 | "containerPath": "/rootfs", 29 | "hostPath": "/", 30 | "mode": "RO" 31 | }, 32 | { 33 | "containerPath": "/var/run", 34 | "hostPath": "/var/run", 35 | "mode": "RW" 36 | }, 37 | { 38 | "containerPath": "/sys", 39 | "hostPath": "/sys", 40 | "mode": "RO" 41 | }, 42 | { 43 | "containerPath": "/var/lib/docker", 44 | "hostPath": "/var/lib/docker", 45 | "mode": "RO" 46 | } 47 | ], 48 | "portMappings": [ 49 | { 50 | "containerPort": 8080, 51 | "hostPort": 8888, 52 | "labels": {}, 53 | "name": "cadvisor", 54 | "protocol": "tcp", 55 | "servicePort": 18080 56 | } 57 | ] 58 | }, 59 | "healthChecks": [ 60 | { 61 | "gracePeriodSeconds": 300, 62 | "ignoreHttp1xx": false, 63 | "intervalSeconds": 60, 64 | "maxConsecutiveFailures": 3, 65 | "path": "/", 66 | "portIndex": 0, 67 | "protocol": "HTTP", 68 | "ipProtocol": "IPv4", 69 | "timeoutSeconds": 20, 70 | "delaySeconds": 15 71 | } 72 | ], 73 | "labels": { 74 | "HAPROXY_0_STICKY": "true", 75 | "HAPROXY_GROUP": "external" 76 | }, 77 | "networks": [ 78 | { 79 | "mode": "container/bridge" 80 | } 81 | ], 82 | "portDefinitions": [] 83 | } 84 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/flink-jobmanager-standby.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/flink-jobmanager-standby", 3 | "backoffFactor": 1.15, 4 | "backoffSeconds": 1, 5 | "container": { 6 | "type": "DOCKER", 7 | "volumes": [], 8 | "docker": { 9 | "image": "gisellevd/ospbench-flink-jobmanager:1.11.1-standby", 10 | "forcePullImage": true, 11 | "privileged": false, 12 | "parameters": [ 13 | { 14 | "key": "label", 15 | "value": "application=flink-jobmanager" 16 | } 17 | ] 18 | } 19 | }, 20 | "cpus": 2, 21 | "disk": 10240, 22 | "env": { 23 | "APP_ID": "flink" 24 | }, 25 | "instances": 1, 26 | "maxLaunchDelaySeconds": 3600, 27 | "mem": 8192, 28 | "gpus": 0, 29 | "networks": [ 30 | { 31 | "mode": "host" 32 | } 33 | ], 34 | "portDefinitions": [ 35 | { 36 | "name": "rpc-job-manager", 37 | "protocol": "udp,tcp", 38 | "port": 6123 39 | }, 40 | { 41 | "name": "rpc-job-manager-ha-1", 42 | "protocol": "udp,tcp", 43 | "port": 9500 44 | }, 45 | { 46 | "name": "rpc-job-manager-ha-2", 47 | "protocol": "udp,tcp", 48 | "port": 9501 49 | }, 50 | { 51 | "name": "flink-ui", 52 | "protocol": "udp,tcp", 53 | "port": 8089 54 | } 55 | ], 56 | "requirePorts": true, 57 | "upgradeStrategy": { 58 | "maximumOverCapacity": 1, 59 | "minimumHealthCapacity": 1 60 | }, 61 | "killSelection": "YOUNGEST_FIRST", 62 | "unreachableStrategy": { 63 | "inactiveAfterSeconds": 0, 64 | "expungeAfterSeconds": 0 65 | }, 66 | "healthChecks": [], 67 | "constraints": [] 68 | } 69 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/flink-jobmanager-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/flink-jobmanager", 3 | "backoffFactor": 1.15, 4 | "backoffSeconds": 1, 5 | "container": { 6 | "type": "DOCKER", 7 | "volumes": [], 8 | "docker": { 9 | "image": "gisellevd/ospbench-flink-jobmanager:1.11.1${SIZE}", 10 | "forcePullImage": true, 11 | "privileged": false, 12 | "parameters": [ 13 | { 14 | "key": "label", 15 | "value": "application=flink-jobmanager" 16 | } 17 | ] 18 | } 19 | }, 20 | "cpus": 2, 21 | "disk": 10240, 22 | "env": { 23 | "APP_ID": "flink" 24 | }, 25 | "instances": 1, 26 | "maxLaunchDelaySeconds": 3600, 27 | "mem": 8192, 28 | "gpus": 0, 29 | "networks": [ 30 | { 31 | "mode": "host" 32 | } 33 | ], 34 | "portDefinitions": [ 35 | { 36 | "name": "rpc-job-manager", 37 | "protocol": "udp,tcp", 38 | "port": 6123 39 | }, 40 | { 41 | "name": "rpc-job-manager-ha-1", 42 | "protocol": "udp,tcp", 43 | "port": 9500 44 | }, 45 | { 46 | "name": "rpc-job-manager-ha-2", 47 | "protocol": "udp,tcp", 48 | "port": 9501 49 | }, 50 | { 51 | "name": "flink-ui", 52 | "protocol": "udp,tcp", 53 | "port": 8089 54 | } 55 | ], 56 | "requirePorts": true, 57 | "upgradeStrategy": { 58 | "maximumOverCapacity": 1, 59 | "minimumHealthCapacity": 1 60 | }, 61 | "killSelection": "YOUNGEST_FIRST", 62 | "unreachableStrategy": { 63 | "inactiveAfterSeconds": 0, 64 | "expungeAfterSeconds": 0 65 | }, 66 | "healthChecks": [], 67 | "constraints": [] 68 | } 69 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/flink-taskmanager-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/flink-taskmanager-${TASKMANAGER_NB}", 3 | "acceptedResourceRoles": [ 4 | "*", 5 | "slave_public" 6 | ], 7 | "backoffFactor": 1.15, 8 | "backoffSeconds": 1, 9 | "container": { 10 | "type": "DOCKER", 11 | "volumes": [], 12 | "docker": { 13 | "image": "gisellevd/ospbench-flink-taskmanager:1.11.1${SIZE}", 14 | "forcePullImage": true, 15 | "privileged": false, 16 | "parameters": [ 17 | { 18 | "key": "label", 19 | "value": "application=flink-taskmanager-${TASKMANAGER_NB}" 20 | } 21 | ] 22 | } 23 | }, 24 | "cpus": $WORKER_CPU, 25 | "disk": $DISK_MB, 26 | "instances": 1, 27 | "maxLaunchDelaySeconds": 3600, 28 | "mem": $WORKER_MEM_MB, 29 | "gpus": 0, 30 | "networks": [ 31 | { 32 | "mode": "host" 33 | } 34 | ], 35 | "portDefinitions": [ 36 | { 37 | "name": "data", 38 | "protocol": "udp,tcp", 39 | "port": 6121 40 | }, 41 | { 42 | "name": "rpc", 43 | "protocol": "udp,tcp", 44 | "port": 6122 45 | } 46 | ], 47 | "requirePorts": true, 48 | "upgradeStrategy": { 49 | "maximumOverCapacity": 1, 50 | "minimumHealthCapacity": 1 51 | }, 52 | "killSelection": "YOUNGEST_FIRST", 53 | "unreachableStrategy": { 54 | "inactiveAfterSeconds": 0, 55 | "expungeAfterSeconds": 0 56 | }, 57 | "healthChecks": [], 58 | "constraints": [] 59 | } 60 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/jmx-exporter-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "instances": 1, 3 | "portDefinitions": [], 4 | "container": { 5 | "type": "DOCKER", 6 | "volumes": [], 7 | "docker": { 8 | "forcePullImage": true, 9 | "image": "gisellevd/ospbench-metrics-exporter:3.0" 10 | } 11 | }, 12 | "requirePorts": false, 13 | "networks": [], 14 | "healthChecks": [], 15 | "constraints": [], 16 | "cpus": 1, 17 | "mem": 2024, 18 | "id": "jmx-exporter", 19 | "env": { 20 | "FRAMEWORK": "$FRAMEWORK", 21 | "TOPICNAME": "$TOPICNAME", 22 | "JMX_HOSTS": "$JMX_HOSTS", 23 | "CADVISOR_HOSTS": "$CADVISOR_HOSTS", 24 | "CLUSTER_URL": "$CLUSTER_URL", 25 | "DCOS_ACCESS_TOKEN": "$DCOS_ACCESS_TOKEN", 26 | "CADVISOR_HOSTS": "$CADVISOR_HOSTS", 27 | "KAFKA_BOOTSTRAP_SERVERS": "$KAFKA_BOOTSTRAP_SERVERS" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/kafka-thread-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/kafka-streams/kafka-thread-${KAFKA_THREAD_NB}", 3 | "acceptedResourceRoles": [ 4 | "*", 5 | "slave_public" 6 | ], 7 | "backoffFactor": 1.15, 8 | "backoffSeconds": 1, 9 | "container": { 10 | "type": "DOCKER", 11 | "volumes": [], 12 | "docker": { 13 | "image": "gisellevd/ospbench-kafka-benchmark:3.0", 14 | "forcePullImage": true, 15 | "privileged": false, 16 | "parameters": [ 17 | { 18 | "key": "label", 19 | "value": "application=kafka-thread-${KAFKA_THREAD_NB}" 20 | } 21 | ] 22 | } 23 | }, 24 | "cpus": $WORKER_CPU, 25 | "disk": 20480, 26 | "env": { 27 | "JAVA_OPTS":"-Xms${WORKER_HEAP_MEM_MB}m -Xmx${WORKER_HEAP_MEM_MB}m", 28 | "MODE":"$MODE", 29 | "KAFKA_BOOTSTRAP_SERVERS": "$KAFKA_BOOTSTRAP_SERVERS", 30 | "LAST_STAGE": "$LAST_STAGE", 31 | "KAFKA_AUTO_OFFSET_RESET_STRATEGY": "$KAFKA_AUTO_OFFSET_RESET_STRATEGY", 32 | "METRICS_TOPIC":"$TOPICNAME", 33 | "FLOWTOPIC": "$FLOWTOPIC", 34 | "SPEEDTOPIC": "$SPEEDTOPIC", 35 | "VOLUME": "$DATA_VOLUME", 36 | "BUFFER_TIMEOUT": "$BUFFER_TIMEOUT", 37 | "AMT_WORKERS":"$AMT_WORKERS", 38 | "WORKER_CPU":"$WORKER_CPU", 39 | "WORKER_MEM":"$WORKER_MEM", 40 | "NUM_THREADS_PER_INSTANCE":"$NUM_THREADS_PER_INSTANCE" 41 | }, 42 | "instances": 1, 43 | "maxLaunchDelaySeconds": 3600, 44 | "mem": $WORKER_MEM_MB, 45 | "gpus": 0, 46 | "networks": [ 47 | { 48 | "mode": "host" 49 | } 50 | ], 51 | "portDefinitions": [ 52 | { 53 | "name": "jmx-reporter", 54 | "protocol": "udp,tcp", 55 | "port": 8500 56 | } 57 | ], 58 | "requirePorts": true, 59 | "upgradeStrategy": { 60 | "maximumOverCapacity": 1, 61 | "minimumHealthCapacity": 1 62 | }, 63 | "killSelection": "YOUNGEST_FIRST", 64 | "unreachableStrategy": { 65 | "inactiveAfterSeconds": 0, 66 | "expungeAfterSeconds": 0 67 | }, 68 | "healthChecks": [], 69 | "constraints": [] 70 | } 71 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/spark-master.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/spark-master", 3 | "acceptedResourceRoles": [ 4 | "*", 5 | "slave_public" 6 | ], 7 | "backoffFactor": 1.15, 8 | "backoffSeconds": 1, 9 | "container": { 10 | "type": "DOCKER", 11 | "volumes": [], 12 | "docker": { 13 | "image": "gisellevd/ospbench-spark-master:3.0.0", 14 | "forcePullImage": true, 15 | "privileged": false, 16 | "parameters": [ 17 | { 18 | "key": "label", 19 | "value": "application=spark-master" 20 | }, 21 | { 22 | "key": "user", 23 | "value": "root" 24 | } 25 | ] 26 | } 27 | }, 28 | "cpus": 1, 29 | "disk": 10240, 30 | "instances": 1, 31 | "maxLaunchDelaySeconds": 3600, 32 | "mem": 4096, 33 | "gpus": 0, 34 | "networks": [ 35 | { 36 | "mode": "host" 37 | } 38 | ], 39 | "portDefinitions": [ 40 | { 41 | "name": "master-web-ui", 42 | "protocol": "udp,tcp", 43 | "port": 7777 44 | }, 45 | { 46 | "name": "rest-url", 47 | "protocol": "udp,tcp", 48 | "port": 6060 49 | }, 50 | { 51 | "name": "master", 52 | "protocol": "udp,tcp", 53 | "port": 7077 54 | } 55 | ], 56 | "requirePorts": true, 57 | "upgradeStrategy": { 58 | "maximumOverCapacity": 1, 59 | "minimumHealthCapacity": 1 60 | }, 61 | "user": "root", 62 | "killSelection": "YOUNGEST_FIRST", 63 | "unreachableStrategy": { 64 | "inactiveAfterSeconds": 0, 65 | "expungeAfterSeconds": 0 66 | }, 67 | "healthChecks": [], 68 | "constraints": [] 69 | } 70 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/spark-submit-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/spark-submit", 3 | "acceptedResourceRoles": [ 4 | "*", 5 | "slave_public" 6 | ], 7 | "instances": 1, 8 | "container": { 9 | "type": "DOCKER", 10 | "volumes": [], 11 | "docker": { 12 | "image": "gisellevd/ospbench-spark-submit:3.0.0", 13 | "forcePullImage": true, 14 | "privileged": false, 15 | "parameters": [ 16 | { 17 | "key": "label", 18 | "value": "application=spark-submit" 19 | }, 20 | { 21 | "key": "user", 22 | "value": "root" 23 | } 24 | ] 25 | } 26 | }, 27 | "cpus": 2, 28 | "mem": 6144, 29 | "disk": 6144, 30 | "requirePorts": true, 31 | "networks": [ 32 | { 33 | "mode": "host" 34 | } 35 | ], 36 | "healthChecks": [], 37 | "constraints": [], 38 | "env": { 39 | "FRAMEWORK": "$FRAMEWORK", 40 | "JAR_NAME": "$JAR_NAME", 41 | "LAST_STAGE": "$LAST_STAGE", 42 | "KAFKA_AUTO_OFFSET_RESET_STRATEGY": "$KAFKA_AUTO_OFFSET_RESET_STRATEGY", 43 | "KAFKA_BOOTSTRAP_SERVERS": "$KAFKA_BOOTSTRAP_SERVERS", 44 | "ACTIVE_HDFS_NAME_NODE": "$ACTIVE_HDFS_NAME_NODE", 45 | "TOPICNAME": "$TOPICNAME", 46 | "VOLUME": "$DATA_VOLUME", 47 | "MODE": "$MODE", 48 | "FLOWTOPIC": "$FLOWTOPIC", 49 | "SPEEDTOPIC": "$SPEEDTOPIC", 50 | "AMT_WORKERS": "$AMT_WORKERS", 51 | "WORKER_CPU": "$WORKER_CPU", 52 | "WORKER_MEM": "$WORKER_MEM", 53 | "SPARK_EXECUTOR_MEMORY": "$SPARK_EXECUTOR_MEMORY", 54 | "SPARK_DEFAULT_PARALLELISM": "$NUM_PARTITIONS", 55 | "SPARK_SQL_SHUFFLE_PARTITIONS": "$NUM_SQL_PARTITIONS", 56 | "SPARK_CORES_MAX": "$SPARK_CORES_MAX", 57 | "CONC_GC_THREADS": "$CONC_GC_THREADS", 58 | "AWS_ACCESS_KEY_ID": "$AWS_ACCESS_KEY", 59 | "AWS_SECRET_ACCESS_KEY": "$AWS_SECRET_KEY" 60 | }, 61 | "portDefinitions": [ 62 | { 63 | "name": "driver-web-ui", 64 | "protocol": "udp,tcp", 65 | "port": 4040 66 | }, 67 | { 68 | "name": "driver", 69 | "protocol": "udp,tcp", 70 | "port": 7008 71 | } 72 | ] 73 | } 74 | -------------------------------------------------------------------------------- /deployment/aws_marathon_files/spark-worker-with-env.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/spark-worker-${WORKER_NB}", 3 | "acceptedResourceRoles": [ 4 | "*", 5 | "slave_public" 6 | ], 7 | "backoffFactor": 1.15, 8 | "backoffSeconds": 1, 9 | "container": { 10 | "type": "DOCKER", 11 | "volumes": [], 12 | "docker": { 13 | "image": "gisellevd/ospbench-spark-worker:3.0.0", 14 | "forcePullImage": true, 15 | "privileged": false, 16 | "parameters": [ 17 | { 18 | "key": "label", 19 | "value": "application=spark-worker-${WORKER_NB}" 20 | }, 21 | { 22 | "key": "user", 23 | "value": "root" 24 | } 25 | ] 26 | } 27 | }, 28 | "cpus": $WORKER_CPU, 29 | "disk": $DISK_MB, 30 | "env": { 31 | "CORES": "$WORKER_CPU", 32 | "MEMORY": "${WORKER_MEM}g" 33 | }, 34 | "instances": 1, 35 | "maxLaunchDelaySeconds": 3600, 36 | "mem": $WORKER_MEM_MB, 37 | "gpus": 0, 38 | "networks": [ 39 | { 40 | "mode": "host" 41 | } 42 | ], 43 | "portDefinitions": [ 44 | { 45 | "name": "worker-web-ui", 46 | "protocol": "udp,tcp", 47 | "port": 7778 48 | }, 49 | { 50 | "name": "driver-web-ui", 51 | "protocol": "udp,tcp", 52 | "port": 4041 53 | }, 54 | { 55 | "name": "driver", 56 | "protocol": "udp,tcp", 57 | "port": 7001 58 | }, 59 | { 60 | "name": "fileserver", 61 | "protocol": "udp,tcp", 62 | "port": 7002 63 | }, 64 | { 65 | "name": "broadcast", 66 | "protocol": "udp,tcp", 67 | "port": 7003 68 | }, 69 | { 70 | "name": "replclassserver", 71 | "protocol": "udp,tcp", 72 | "port": 7004 73 | }, 74 | { 75 | "name": "blockmanager", 76 | "protocol": "udp,tcp", 77 | "port": 7005 78 | }, 79 | { 80 | "name": "executor", 81 | "protocol": "udp,tcp", 82 | "port": 7006 83 | } 84 | ], 85 | "requirePorts": true, 86 | "upgradeStrategy": { 87 | "maximumOverCapacity": 1, 88 | "minimumHealthCapacity": 1 89 | }, 90 | "user": "root", 91 | "killSelection": "YOUNGEST_FIRST", 92 | "unreachableStrategy": { 93 | "inactiveAfterSeconds": 0, 94 | "expungeAfterSeconds": 0 95 | }, 96 | "healthChecks": [], 97 | "constraints": [] 98 | } 99 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/README.md: -------------------------------------------------------------------------------- 1 | This code has been based on [https://github.com/apache/flink-docker](https://github.com/apache/flink-docker) by Apache Software Foundation. 2 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/jobmanager/cancel-job.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | flink cancel `cat job.txt` 3 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/jobmanager/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ############################################################################### 4 | # Licensed to the Apache Software Foundation (ASF) under one 5 | # or more contributor license agreements. See the NOTICE file 6 | # distributed with this work for additional information 7 | # regarding copyright ownership. The ASF licenses this file 8 | # to you under the Apache License, Version 2.0 (the 9 | # "License"); you may not use this file except in compliance 10 | # with the License. You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # If unspecified, the hostname of the container is taken as the JobManager address 22 | JOB_MANAGER_RPC_ADDRESS=${JOB_MANAGER_RPC_ADDRESS:-$(hostname -f)} 23 | 24 | drop_privs_cmd() { 25 | if [ -x /sbin/su-exec ]; then 26 | # Alpine 27 | echo su-exec 28 | else 29 | # Others 30 | echo gosu 31 | fi 32 | } 33 | 34 | if [ "$1" = "help" ]; then 35 | echo "Usage: $(basename "$0") (jobmanager|taskmanager|help)" 36 | exit 0 37 | elif [ "$1" = "jobmanager" ]; then 38 | echo "Starting Job Manager" 39 | sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 40 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 41 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 42 | 43 | #### HIGH AVAILABILITY 44 | # echo "high-availability.storageDir: hdfs://10.0.3.33:9001//flink/ha/" >> "$FLINK_HOME/conf/flink-conf.yaml" 45 | 46 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 47 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/jobmanager.sh" start-foreground 48 | elif [ "$1" = "taskmanager" ]; then 49 | # TASK_MANAGER_NUMBER_OF_TASK_SLOTS=${TASK_MANAGER_NUMBER_OF_TASK_SLOTS:-$(grep -c ^processor /proc/cpuinfo)} 50 | # 51 | # sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 52 | # sed -i -e "s/taskmanager.numberOfTaskSlots: 1/taskmanager.numberOfTaskSlots: $TASK_MANAGER_NUMBER_OF_TASK_SLOTS/g" "$FLINK_HOME/conf/flink-conf.yaml" 53 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 54 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 55 | 56 | #### HIGH AVAILABILITY 57 | # echo "high-availability.storageDir: hdfs://10.0.3.221:9001//flink/ha/" >> "$FLINK_HOME/conf/flink-conf.yaml" 58 | 59 | echo "Starting Task Manager" 60 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 61 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/taskmanager.sh" start-foreground 62 | fi 63 | 64 | exec "$@" 65 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/jobmanager/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/jobmanager/submit-job.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | export JAR_NAME=$1 3 | 4 | curl $JAR_NAME > flink-assembly.jar 5 | sleep 15 6 | 7 | export jobid=$(flink run -d flink-assembly.jar) 8 | echo ${jobid##* } > job.txt 9 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/jobmanager/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-flink-jobmanager:1.11.1 2 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-large/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ############################################################################### 4 | # Licensed to the Apache Software Foundation (ASF) under one 5 | # or more contributor license agreements. See the NOTICE file 6 | # distributed with this work for additional information 7 | # regarding copyright ownership. The ASF licenses this file 8 | # to you under the Apache License, Version 2.0 (the 9 | # "License"); you may not use this file except in compliance 10 | # with the License. You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # If unspecified, the hostname of the container is taken as the JobManager address 22 | JOB_MANAGER_RPC_ADDRESS=${JOB_MANAGER_RPC_ADDRESS:-$(hostname -f)} 23 | 24 | drop_privs_cmd() { 25 | if [ -x /sbin/su-exec ]; then 26 | # Alpine 27 | echo su-exec 28 | else 29 | # Others 30 | echo gosu 31 | fi 32 | } 33 | 34 | if [ "$1" = "help" ]; then 35 | echo "Usage: $(basename "$0") (jobmanager|taskmanager|help)" 36 | exit 0 37 | elif [ "$1" = "jobmanager" ]; then 38 | echo "Starting Job Manager" 39 | sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 40 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 41 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 42 | 43 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 44 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/jobmanager.sh" start-foreground 45 | elif [ "$1" = "taskmanager" ]; then 46 | # TASK_MANAGER_NUMBER_OF_TASK_SLOTS=${TASK_MANAGER_NUMBER_OF_TASK_SLOTS:-$(grep -c ^processor /proc/cpuinfo)} 47 | # 48 | # sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 49 | # sed -i -e "s/taskmanager.numberOfTaskSlots: 1/taskmanager.numberOfTaskSlots: $TASK_MANAGER_NUMBER_OF_TASK_SLOTS/g" "$FLINK_HOME/conf/flink-conf.yaml" 50 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 51 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 52 | 53 | echo "Starting Task Manager" 54 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 55 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/taskmanager.sh" start-foreground 56 | fi 57 | 58 | exec "$@" 59 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-large/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-large/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-flink-taskmanager:1.11.1-large 2 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-small/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ############################################################################### 4 | # Licensed to the Apache Software Foundation (ASF) under one 5 | # or more contributor license agreements. See the NOTICE file 6 | # distributed with this work for additional information 7 | # regarding copyright ownership. The ASF licenses this file 8 | # to you under the Apache License, Version 2.0 (the 9 | # "License"); you may not use this file except in compliance 10 | # with the License. You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # If unspecified, the hostname of the container is taken as the JobManager address 22 | JOB_MANAGER_RPC_ADDRESS=${JOB_MANAGER_RPC_ADDRESS:-$(hostname -f)} 23 | 24 | drop_privs_cmd() { 25 | if [ -x /sbin/su-exec ]; then 26 | # Alpine 27 | echo su-exec 28 | else 29 | # Others 30 | echo gosu 31 | fi 32 | } 33 | 34 | if [ "$1" = "help" ]; then 35 | echo "Usage: $(basename "$0") (jobmanager|taskmanager|help)" 36 | exit 0 37 | elif [ "$1" = "jobmanager" ]; then 38 | echo "Starting Job Manager" 39 | sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 40 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 41 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 42 | 43 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 44 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/jobmanager.sh" start-foreground 45 | elif [ "$1" = "taskmanager" ]; then 46 | # TASK_MANAGER_NUMBER_OF_TASK_SLOTS=${TASK_MANAGER_NUMBER_OF_TASK_SLOTS:-$(grep -c ^processor /proc/cpuinfo)} 47 | # 48 | # sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 49 | # sed -i -e "s/taskmanager.numberOfTaskSlots: 1/taskmanager.numberOfTaskSlots: $TASK_MANAGER_NUMBER_OF_TASK_SLOTS/g" "$FLINK_HOME/conf/flink-conf.yaml" 50 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 51 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 52 | 53 | echo "Starting Task Manager" 54 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 55 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/taskmanager.sh" start-foreground 56 | fi 57 | 58 | exec "$@" 59 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-small/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-small/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-flink-taskmanager:1.11.1-small 2 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-smallest/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ############################################################################### 4 | # Licensed to the Apache Software Foundation (ASF) under one 5 | # or more contributor license agreements. See the NOTICE file 6 | # distributed with this work for additional information 7 | # regarding copyright ownership. The ASF licenses this file 8 | # to you under the Apache License, Version 2.0 (the 9 | # "License"); you may not use this file except in compliance 10 | # with the License. You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # If unspecified, the hostname of the container is taken as the JobManager address 22 | JOB_MANAGER_RPC_ADDRESS=${JOB_MANAGER_RPC_ADDRESS:-$(hostname -f)} 23 | 24 | drop_privs_cmd() { 25 | if [ -x /sbin/su-exec ]; then 26 | # Alpine 27 | echo su-exec 28 | else 29 | # Others 30 | echo gosu 31 | fi 32 | } 33 | 34 | if [ "$1" = "help" ]; then 35 | echo "Usage: $(basename "$0") (jobmanager|taskmanager|help)" 36 | exit 0 37 | elif [ "$1" = "jobmanager" ]; then 38 | echo "Starting Job Manager" 39 | sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 40 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 41 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 42 | 43 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 44 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/jobmanager.sh" start-foreground 45 | elif [ "$1" = "taskmanager" ]; then 46 | # TASK_MANAGER_NUMBER_OF_TASK_SLOTS=${TASK_MANAGER_NUMBER_OF_TASK_SLOTS:-$(grep -c ^processor /proc/cpuinfo)} 47 | # 48 | # sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 49 | # sed -i -e "s/taskmanager.numberOfTaskSlots: 1/taskmanager.numberOfTaskSlots: $TASK_MANAGER_NUMBER_OF_TASK_SLOTS/g" "$FLINK_HOME/conf/flink-conf.yaml" 50 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 51 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 52 | 53 | echo "Starting Task Manager" 54 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 55 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/taskmanager.sh" start-foreground 56 | fi 57 | 58 | exec "$@" 59 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-smallest/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager-smallest/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-flink-taskmanager:1.11.1-smallest 2 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ############################################################################### 4 | # Licensed to the Apache Software Foundation (ASF) under one 5 | # or more contributor license agreements. See the NOTICE file 6 | # distributed with this work for additional information 7 | # regarding copyright ownership. The ASF licenses this file 8 | # to you under the Apache License, Version 2.0 (the 9 | # "License"); you may not use this file except in compliance 10 | # with the License. You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # If unspecified, the hostname of the container is taken as the JobManager address 22 | JOB_MANAGER_RPC_ADDRESS=${JOB_MANAGER_RPC_ADDRESS:-$(hostname -f)} 23 | 24 | drop_privs_cmd() { 25 | if [ -x /sbin/su-exec ]; then 26 | # Alpine 27 | echo su-exec 28 | else 29 | # Others 30 | echo gosu 31 | fi 32 | } 33 | 34 | if [ "$1" = "help" ]; then 35 | echo "Usage: $(basename "$0") (jobmanager|taskmanager|help)" 36 | exit 0 37 | elif [ "$1" = "jobmanager" ]; then 38 | echo "Starting Job Manager" 39 | sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 40 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 41 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 42 | 43 | #### HIGH AVAILABILITY 44 | #### HDFS name nodes 45 | # echo "high-availability.storageDir: hdfs://10.0.3.221:9001/flink/ha/" >> "$FLINK_HOME/conf/flink-conf.yaml" 46 | 47 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 48 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/jobmanager.sh" start-foreground 49 | elif [ "$1" = "taskmanager" ]; then 50 | # TASK_MANAGER_NUMBER_OF_TASK_SLOTS=${TASK_MANAGER_NUMBER_OF_TASK_SLOTS:-$(grep -c ^processor /proc/cpuinfo)} 51 | # 52 | # sed -i -e "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: ${JOB_MANAGER_RPC_ADDRESS}/g" "$FLINK_HOME/conf/flink-conf.yaml" 53 | # sed -i -e "s/taskmanager.numberOfTaskSlots: 1/taskmanager.numberOfTaskSlots: $TASK_MANAGER_NUMBER_OF_TASK_SLOTS/g" "$FLINK_HOME/conf/flink-conf.yaml" 54 | echo "blob.server.port: 6124" >> "$FLINK_HOME/conf/flink-conf.yaml" 55 | echo "query.server.port: 6125" >> "$FLINK_HOME/conf/flink-conf.yaml" 56 | 57 | #### HIGH AVAILABILITY 58 | #### HDFS name nodes 59 | # echo "high-availability.storageDir: hdfs://10.0.3.33:9001/flink/ha/" >> "$FLINK_HOME/conf/flink-conf.yaml" 60 | 61 | echo "Starting Task Manager" 62 | echo "config file: " && grep '^[^\n#]' "$FLINK_HOME/conf/flink-conf.yaml" 63 | exec $(drop_privs_cmd) flink "$FLINK_HOME/bin/taskmanager.sh" start-foreground 64 | fi 65 | 66 | exec "$@" 67 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/flink_cluster_1.11.1/taskmanager/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-flink-taskmanager:1.11.1 2 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/README.md: -------------------------------------------------------------------------------- 1 | This Spark cluster has been based on [https://github.com/actionml/docker-spark](https://github.com/actionml/docker-spark) by Denis Baryshev (dennybaa@gmail.com). 2 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/.travis.yml: -------------------------------------------------------------------------------- 1 | language: bash 2 | services: docker 3 | 4 | env: 5 | global: 6 | - PROJECT=spark 7 | - NAMESPACE=actionml 8 | - PUSH_NAMESPACES=actionml 9 | 10 | install: 11 | - curl -sSL https://github.com/stackfeed/ci-scripts/raw/master/install.sh | sh -s 12 | - git clone https://github.com/docker-library/official-images.git ~/official-images 13 | 14 | before_script: 15 | - | 16 | export PATH=$PATH:~/ci-scripts 17 | [ ${TRAVIS_BRANCH} == master ] || export variant=${TRAVIS_BRANCH} 18 | 19 | script: 20 | - docker-build -v "${variant}" $NAMESPACE/$PROJECT . 21 | - ~/official-images/test/run.sh $(docker images -q | head -n1) 22 | 23 | after_success: 24 | - | 25 | # Check if not PR and namespace (org or user) is in the push list PUSH_NAMESPACES. 26 | [ "${TRAVIS_PULL_REQUEST}" = "false" ] && ( echo "${PUSH_NAMESPACES}" | grep -qw "${TRAVIS_REPO_SLUG%/*}" ) 27 | [ $? -eq 0 ] || exit 0 28 | 29 | # List of newly created images 30 | export images=$(docker images | grep "^$NAMESPACE/$PROJECT" | tr -s '[:space:]' | cut -f1,2 -d' ' | sed 's/ /:/') 31 | 32 | # Push to docker when DEPLOY is true 33 | docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD 34 | for image in $images; do docker push $image; done 35 | 36 | after_script: 37 | - docker images 38 | 39 | notifications: 40 | webhooks: https://hooks.microbadger.com/images/actionml/spark/${MICROBADGER_TOKEN} 41 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java:8-jre-alpine 2 | 3 | ENV GOSU_VERSION 1.10 4 | ENV SPARK_VERSION 3.0.0 5 | ENV SPARK_HOME /usr/local/spark 6 | ENV SPARK_LOCAL_DIRS /usr/local/spark/work 7 | ENV SPARK_USER aml 8 | ARG GLIBC_APKVER=2.27-r0 9 | ENV LANG=en_US.UTF-8 10 | 11 | LABEL vendor=ActionML \ 12 | version_tags="[\"2.1\",\"2.1.2\"]" 13 | 14 | # Update alpine and install required tools 15 | RUN echo "@community http://nl.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \ 16 | apk add --update --no-cache bash curl shadow@community 17 | 18 | # Glibc compatibility 19 | RUN curl -sSL https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/sgerrand.rsa.pub \ 20 | -o /etc/apk/keys/sgerrand.rsa.pub && \ 21 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-i18n-$GLIBC_APKVER.apk && \ 22 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-$GLIBC_APKVER.apk && \ 23 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-bin-$GLIBC_APKVER.apk && \ 24 | apk add --no-cache glibc-$GLIBC_APKVER.apk glibc-bin-$GLIBC_APKVER.apk glibc-i18n-$GLIBC_APKVER.apk && \ 25 | echo "export LANG=$LANG" > /etc/profile.d/locale.sh && \ 26 | rm /etc/apk/keys/sgerrand.rsa.pub glibc-*.apk 27 | 28 | 29 | 30 | # Get gosu 31 | RUN apk add --update --no-cache --virtual .deps gnupg && \ 32 | curl -sSL https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64 \ 33 | -o /usr/local/bin/gosu && chmod 755 /usr/local/bin/gosu \ 34 | && curl -sSL -o /tmp/gosu.asc https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc \ 35 | && export GNUPGHOME=/tmp \ 36 | && gpg --keyserver keyserver.ubuntu.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ 37 | && gpg --batch --verify /tmp/gosu.asc /usr/local/bin/gosu \ 38 | && rm -r /tmp/* && apk del .deps 39 | 40 | # Fetch and unpack spark dist 41 | RUN curl -L https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz \ 42 | | tar -xzp -C /usr/local/ && \ 43 | ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 ${SPARK_HOME} 44 | 45 | # Create users (to go "non-root") and set directory permissions 46 | RUN useradd -mU -d /home/hadoop hadoop && passwd -d hadoop && \ 47 | useradd -mU -d /home/$SPARK_USER -G hadoop $SPARK_USER && passwd -d $SPARK_USER && \ 48 | chown -R $SPARK_USER:hadoop $SPARK_HOME 49 | 50 | ADD entrypoint.sh spark-defaults.conf / 51 | ADD output-consumer-submit-job.sh evaluator-submit-job.sh active-driver-check.sh / 52 | COPY spark-defaults.conf /usr/local/spark/conf/ 53 | RUN chmod +x evaluator-submit-job.sh 54 | RUN chmod +x output-consumer-submit-job.sh 55 | RUN chmod +x active-driver-check.sh 56 | RUN chmod +x /entrypoint.sh 57 | RUN apk add jq 58 | 59 | ## Scratch directories can be passed as volumes 60 | # SPARK_HOME/work directory used on worker for scratch space and job output logs. 61 | # /tmp - Directory to use for "scratch" space in Spark, including map output files and RDDs that get stored on disk. 62 | VOLUME [ "/usr/local/spark/work", "/tmp" ] 63 | RUN chown -R $SPARK_USER:hadoop /tmp 64 | 65 | EXPOSE 8080 8081 6066 7077 4040 7001 7002 7003 7004 7005 7006 66 | ENTRYPOINT [ "/entrypoint.sh" ] 67 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/active-driver-check.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export SPARK_MASTER_IP=$1 3 | cd /usr/local/spark 4 | export APPS=$(curl --silent http://$SPARK_MASTER_IP:7777/json/applications) 5 | export ACTIVE_DRIVER_ID=$(echo $APPS | jq -c '.activedrivers[0].id') 6 | echo $ACTIVE_DRIVER_ID 7 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | ## Defaults 5 | # 6 | : ${SPARK_HOME:?must be set!} 7 | default_opts="--properties-file $SPARK_HOME/conf/spark-defaults.conf" 8 | 9 | # Check if CLI args list containes bind address key. 10 | cli_bind_address() { 11 | echo "$*" | grep -qE -- "--host\b|-h\b|--ip\b|-i\b" 12 | } 13 | 14 | # Set permissions on the scratch volumes 15 | scratch_volumes_permissions() { 16 | mkdir -p $SPARK_HOME/work && chown $SPARK_USER:hadoop $SPARK_HOME/work 17 | chmod 1777 /tmp 18 | } 19 | 20 | 21 | ## Configuration sourcing 22 | . $SPARK_HOME/sbin/spark-config.sh 23 | . $SPARK_HOME/bin/load-spark-env.sh 24 | 25 | 26 | ## Entrypoint 27 | 28 | scratch_volumes_permissions 29 | 30 | instance=master 31 | CLASS="org.apache.spark.deploy.$instance.${instance^}" 32 | 33 | # Handle custom bind address set via ENV or CLI 34 | eval bind_address=\$SPARK_${instance^^}_IP 35 | if ( ! cli_bind_address $@ ) && [ ! -z $bind_address ] ; then 36 | default_opts="${default_opts} --host ${bind_address} " 37 | fi 38 | 39 | echo "==> spark-class invocation arguments: $CLASS $default_opts $@" 40 | 41 | cd /tmp 42 | exec gosu $SPARK_USER:hadoop $SPARK_HOME/bin/spark-class $CLASS $default_opts --webui-port 7777 $@ 43 | ;; 44 | shell) 45 | shift 46 | echo "==> spark-shell invocation arguments: $default_opts $@" 47 | 48 | cd /tmp 49 | exec gosu $SPARK_USER:hadoop $SPARK_HOME/bin/spark-shell $default_opts $@ 50 | ;; 51 | *) 52 | cmdline="$@" 53 | exec ${cmdline:-/bin/bash} 54 | ;; 55 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/evaluator-submit-job.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export FRAMEWORK=$1 3 | export MODE=$2 4 | export AWS_ACCESS_KEY=$3 5 | export AWS_SECRET_KEY=$4 6 | export LAST_STAGE=$5 7 | export CLUSTER_URL=$6 8 | export INFLUXDB_URL=$7 9 | export DCOS_ACCESS_TOKEN=$8 10 | export FILEPATH=$9 11 | export AMT_WORKERS=${10} 12 | export WORKER_CPU=${11} 13 | export WORKER_MEM=${12} 14 | export BEGINTIME=${13} 15 | export JAR_NAME=${14} 16 | export INPUT_METRICS_PATH=${15} 17 | export RESULTS_PATH=${16} 18 | 19 | export SPARK_CORES_MAX=$(($AMT_WORKERS*$WORKER_CPU)) # executors*cores_per_executor + one core for driver 20 | export SPARK_DRIVER_MEMORY="4096m" 21 | export SPARK_DRIVER_CORES=1 22 | export SPARK_DEFAULT_PARALLELISM=$SPARK_CORES_MAX 23 | 24 | cd /usr/local/spark/ 25 | ./bin/spark-submit --master spark://spark-master.marathon.mesos:7077 \ 26 | --deploy-mode cluster \ 27 | --driver-memory $SPARK_DRIVER_MEMORY --driver-cores $SPARK_DRIVER_CORES --total-executor-cores 20 --executor-memory 17408m \ 28 | --driver-java-options="-Dspark.driver.host=$SPARK_WORKER2_HOST" \ 29 | --properties-file /usr/local/spark/conf/spark-defaults.conf \ 30 | --jars https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar \ 31 | --conf "spark.executor.extraJavaOptions=-Dcom.amazonaws.services.s3.enableV4" --conf "spark.driver.extraJavaOptions=-Dcom.amazonaws.services.s3.enableV4" \ 32 | --conf spark.FRAMEWORK=$FRAMEWORK \ 33 | --conf spark.MODE=$MODE \ 34 | --conf spark.AWS_ACCESS_KEY=$AWS_ACCESS_KEY \ 35 | --conf spark.AWS_SECRET_KEY=$AWS_SECRET_KEY \ 36 | --conf spark.FILEPATH=$FILEPATH \ 37 | --conf spark.LAST_STAGE=$LAST_STAGE \ 38 | --conf spark.CLUSTER_URL=$CLUSTER_URL \ 39 | --conf spark.INFLUXDB_URL=$INFLUXDB_URL \ 40 | --conf spark.AMT_WORKERS=$AMT_WORKERS \ 41 | --conf spark.WORKER_CPU=$WORKER_CPU \ 42 | --conf spark.WORKER_MEM=$WORKER_MEM \ 43 | --conf spark.DCOS_ACCESS_TOKEN=$DCOS_ACCESS_TOKEN \ 44 | --conf spark.BEGINTIME=$BEGINTIME \ 45 | --conf spark.INPUT_METRICS_PATH=$INPUT_METRICS_PATH \ 46 | --conf spark.RESULTS_PATH=$RESULTS_PATH \ 47 | --conf spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM \ 48 | --conf spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM \ 49 | --class evaluation.EvaluationMain \ 50 | $JAR_NAME 0 51 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/output-consumer-submit-job.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export FRAMEWORK=$1 3 | export MODE=$2 4 | export KAFKA_BOOTSTRAP_SERVERS=$3 5 | export AWS_ACCESS_KEY=$4 6 | export AWS_SECRET_KEY=$5 7 | export JOBUUID=$6 8 | export SPARK_WORKER2_HOST=$7 9 | export AMT_WORKERS=${8} 10 | export WORKER_CPU=${9} 11 | export WORKER_MEM=${10} 12 | export JAR_NAME=${11} 13 | export OUTPUT_METRICS_PATH=${12} 14 | 15 | export SPARK_CORES_MAX=$(($AMT_WORKERS*$WORKER_CPU)) # executors*cores_per_executor + one core for driver 16 | export SPARK_DRIVER_MEMORY="4096m" 17 | export SPARK_DRIVER_CORES=1 18 | export SPARK_DEFAULT_PARALLELISM=$SPARK_CORES_MAX 19 | 20 | cd /usr/local/spark/ 21 | DRIVER_ID=$(./bin/spark-submit --master spark://spark-master.marathon.mesos:7077 \ 22 | --deploy-mode cluster \ 23 | --driver-memory $SPARK_DRIVER_MEMORY --driver-cores $SPARK_DRIVER_CORES --total-executor-cores 20 --executor-memory 17408m \ 24 | --driver-java-options="-Dspark.driver.host=$SPARK_WORKER2_HOST" \ 25 | --properties-file /usr/local/spark/conf/spark-defaults.conf \ 26 | --jars https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar \ 27 | --conf "spark.executor.extraJavaOptions=-Dcom.amazonaws.services.s3.enableV4" --conf "spark.driver.extraJavaOptions=-Dcom.amazonaws.services.s3.enableV4" \ 28 | --conf spark.FRAMEWORK=$FRAMEWORK \ 29 | --conf spark.JOBUUID=$JOBUUID \ 30 | --conf spark.MODE=$MODE \ 31 | --conf spark.KAFKA_BOOTSTRAP_SERVERS=$KAFKA_BOOTSTRAP_SERVERS \ 32 | --conf spark.AWS_ACCESS_KEY=$AWS_ACCESS_KEY \ 33 | --conf spark.AWS_SECRET_KEY=$AWS_SECRET_KEY \ 34 | --conf spark.OUTPUT_METRICS_PATH=$OUTPUT_METRICS_PATH \ 35 | --conf spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM \ 36 | --conf spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM \ 37 | --conf spark.driver.host=$SPARK_WORKER2_HOST \ 38 | --class output.consumer.OutputConsumer \ 39 | $JAR_NAME 0 | grep -o 'driver-\w*-\w*' | head -n1) 40 | 41 | 42 | echo $DRIVER_ID > /driver-output-consumer.txt 43 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.driver.port 7001 2 | spark.fileserver.port 7002 3 | spark.broadcast.port 7003 4 | spark.replClassServer.port 7004 5 | spark.blockManager.port 7005 6 | spark.executor.port 7006 7 | spark.ui.port 4040 8 | spark.broadcast.factory org.apache.spark.broadcast.HttpBroadcastFactory 9 | spark.local.dir /usr/local/spark/work 10 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem 11 | spark.hadoop.fs.s3a.endpoint s3.eu-central-1.amazonaws.com 12 | spark.sql.streaming.minBatchesToRetain 2 13 | #spark.deploy.recoveryMode ZOOKEEPER 14 | #spark.deploy.zookeeper.url zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181/kafka 15 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/spark-kill-job.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export SPARK_MASTER_IP=$1 3 | cd /usr/local/spark 4 | export APPS=$(curl http://$SPARK_MASTER_IP:7777/json/applications) 5 | export ACTIVE_DRIVER_ID=$(echo $APPS | jq -c '.activedrivers[0].id'| tr -d '"') 6 | 7 | ./bin/spark-class org.apache.spark.deploy.Client kill spark://spark-master.marathon.mesos:7077 $ACTIVE_DRIVER_ID 8 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-master/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-spark-master:3.0.0 2 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-submit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java:8-jre-alpine 2 | 3 | ENV GOSU_VERSION 1.10 4 | ENV SPARK_VERSION 3.0.0 5 | ENV SPARK_HOME /usr/local/spark 6 | ENV SPARK_LOCAL_DIRS /usr/local/spark/work 7 | ENV SPARK_USER aml 8 | ARG GLIBC_APKVER=2.27-r0 9 | ENV LANG=en_US.UTF-8 10 | 11 | LABEL vendor=ActionML \ 12 | version_tags="[\"2.1\",\"2.1.2\"]" 13 | 14 | # Update alpine and install required tools 15 | RUN echo "@community http://nl.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \ 16 | apk add --update --no-cache bash curl shadow@community 17 | 18 | # Glibc compatibility 19 | RUN curl -sSL https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/sgerrand.rsa.pub \ 20 | -o /etc/apk/keys/sgerrand.rsa.pub && \ 21 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-i18n-$GLIBC_APKVER.apk && \ 22 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-$GLIBC_APKVER.apk && \ 23 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-bin-$GLIBC_APKVER.apk && \ 24 | apk add --no-cache glibc-$GLIBC_APKVER.apk glibc-bin-$GLIBC_APKVER.apk glibc-i18n-$GLIBC_APKVER.apk && \ 25 | echo "export LANG=$LANG" > /etc/profile.d/locale.sh && \ 26 | rm /etc/apk/keys/sgerrand.rsa.pub glibc-*.apk 27 | 28 | # Get gosu 29 | RUN apk add --update --no-cache --virtual .deps gnupg && \ 30 | curl -sSL https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64 \ 31 | -o /usr/local/bin/gosu && chmod 755 /usr/local/bin/gosu \ 32 | && curl -sSL -o /tmp/gosu.asc https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc \ 33 | && export GNUPGHOME=/tmp \ 34 | && gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ 35 | && gpg --batch --verify /tmp/gosu.asc /usr/local/bin/gosu \ 36 | && rm -r /tmp/* && apk del .deps 37 | 38 | # Fetch and unpack spark dist 39 | RUN curl -L https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz \ 40 | | tar -xzp -C /usr/local/ && \ 41 | ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 ${SPARK_HOME} 42 | 43 | # Create users (to go "non-root") and set directory permissions 44 | RUN useradd -mU -d /home/hadoop hadoop && passwd -d hadoop && \ 45 | useradd -mU -d /home/$SPARK_USER -G hadoop $SPARK_USER && passwd -d $SPARK_USER && \ 46 | chown -R $SPARK_USER:hadoop $SPARK_HOME 47 | 48 | 49 | COPY executejar.sh / 50 | 51 | RUN apk add gettext 52 | 53 | RUN mkdir -p /opt/docker/tmp 54 | 55 | RUN mkdir -p /spark/logs 56 | 57 | ADD spark-defaults.conf $SPARK_HOME/spark-defaults.conf 58 | 59 | ENV SPARK_LOCAL_DIRS /opt/docker/tmp 60 | 61 | RUN chmod +x executejar.sh 62 | 63 | 64 | ## Scratch directories can be passed as volumes 65 | # SPARK_HOME/work directory used on worker for scratch space and job output logs. 66 | # /tmp - Directory to use for "scratch" space in Spark, including map output files and RDDs that get stored on disk. 67 | VOLUME [ "/usr/local/spark/work", "/tmp" ] 68 | RUN chown -R $SPARK_USER:hadoop /tmp 69 | 70 | EXPOSE 8081 71 | USER $SPARK_USER:hadoop 72 | CMD ["/executejar.sh"] 73 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-submit/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-submit/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.driver.port 7008 2 | spark.fileserver.port 7002 3 | spark.broadcast.port 7003 4 | spark.replClassServer.port 7004 5 | spark.blockManager.port 7005 6 | spark.executor.port 7006 7 | spark.ui.port 4040 8 | spark.broadcast.factory org.apache.spark.broadcast.HttpBroadcastFactory 9 | spark.local.dir /usr/local/spark/work 10 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem 11 | spark.hadoop.fs.s3a.endpoint s3.eu-central-1.amazonaws.com 12 | spark.sql.streaming.minBatchesToRetain 2 13 | #spark.deploy.recoveryMode ZOOKEEPER 14 | #spark.deploy.zookeeper.url zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181/kafka 15 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-submit/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-spark-submit:3.0.0 2 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-worker/.travis.yml: -------------------------------------------------------------------------------- 1 | language: bash 2 | services: docker 3 | 4 | env: 5 | global: 6 | - PROJECT=spark 7 | - NAMESPACE=actionml 8 | - PUSH_NAMESPACES=actionml 9 | 10 | install: 11 | - curl -sSL https://github.com/stackfeed/ci-scripts/raw/master/install.sh | sh -s 12 | - git clone https://github.com/docker-library/official-images.git ~/official-images 13 | 14 | before_script: 15 | - | 16 | export PATH=$PATH:~/ci-scripts 17 | [ ${TRAVIS_BRANCH} == master ] || export variant=${TRAVIS_BRANCH} 18 | 19 | script: 20 | - docker-build -v "${variant}" $NAMESPACE/$PROJECT . 21 | - ~/official-images/test/run.sh $(docker images -q | head -n1) 22 | 23 | after_success: 24 | - | 25 | # Check if not PR and namespace (org or user) is in the push list PUSH_NAMESPACES. 26 | [ "${TRAVIS_PULL_REQUEST}" = "false" ] && ( echo "${PUSH_NAMESPACES}" | grep -qw "${TRAVIS_REPO_SLUG%/*}" ) 27 | [ $? -eq 0 ] || exit 0 28 | 29 | # List of newly created images 30 | export images=$(docker images | grep "^$NAMESPACE/$PROJECT" | tr -s '[:space:]' | cut -f1,2 -d' ' | sed 's/ /:/') 31 | 32 | # Push to docker when DEPLOY is true 33 | docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD 34 | for image in $images; do docker push $image; done 35 | 36 | after_script: 37 | - docker images 38 | 39 | notifications: 40 | webhooks: https://hooks.microbadger.com/images/actionml/spark/${MICROBADGER_TOKEN} 41 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-worker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java:8-jre-alpine 2 | 3 | ENV GOSU_VERSION 1.10 4 | ENV SPARK_VERSION 3.0.0 5 | ENV SPARK_HOME /usr/local/spark 6 | ENV SPARK_LOCAL_DIRS /usr/local/spark/work 7 | ENV SPARK_USER aml 8 | ARG GLIBC_APKVER=2.27-r0 9 | ENV LANG=en_US.UTF-8 10 | 11 | LABEL vendor=ActionML \ 12 | version_tags="[\"2.1\",\"2.1.2\"]" 13 | 14 | # Update alpine and install required tools 15 | RUN echo "@community http://nl.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \ 16 | apk add --update --no-cache bash curl shadow@community 17 | 18 | # Glibc compatibility 19 | RUN curl -sSL https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/sgerrand.rsa.pub \ 20 | -o /etc/apk/keys/sgerrand.rsa.pub && \ 21 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-i18n-$GLIBC_APKVER.apk && \ 22 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-$GLIBC_APKVER.apk && \ 23 | curl -sSLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/$GLIBC_APKVER/glibc-bin-$GLIBC_APKVER.apk && \ 24 | apk add --no-cache glibc-$GLIBC_APKVER.apk glibc-bin-$GLIBC_APKVER.apk glibc-i18n-$GLIBC_APKVER.apk && \ 25 | echo "export LANG=$LANG" > /etc/profile.d/locale.sh && \ 26 | rm /etc/apk/keys/sgerrand.rsa.pub glibc-*.apk 27 | 28 | # Get gosu 29 | RUN apk add --update --no-cache --virtual .deps gnupg && \ 30 | curl -sSL https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64 \ 31 | -o /usr/local/bin/gosu && chmod 755 /usr/local/bin/gosu \ 32 | && curl -sSL -o /tmp/gosu.asc https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc \ 33 | && export GNUPGHOME=/tmp \ 34 | && gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ 35 | && gpg --batch --verify /tmp/gosu.asc /usr/local/bin/gosu \ 36 | && rm -r /tmp/* && apk del .deps 37 | 38 | # Fetch and unpack spark dist 39 | RUN curl -L https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz \ 40 | | tar -xzp -C /usr/local/ && \ 41 | ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 ${SPARK_HOME} 42 | 43 | # Create users (to go "non-root") and set directory permissions 44 | RUN useradd -mU -d /home/hadoop hadoop && passwd -d hadoop && \ 45 | useradd -mU -d /home/$SPARK_USER -G hadoop $SPARK_USER && passwd -d $SPARK_USER && \ 46 | chown -R $SPARK_USER:hadoop $SPARK_HOME 47 | 48 | ADD entrypoint.sh spark-defaults.conf / 49 | COPY spark-defaults.conf /usr/local/spark/conf/ 50 | RUN chmod +x /entrypoint.sh 51 | 52 | ## Scratch directories can be passed as volumes 53 | # SPARK_HOME/work directory used on worker for scratch space and job output logs. 54 | # /tmp - Directory to use for "scratch" space in Spark, including map output files and RDDs that get stored on disk. 55 | VOLUME [ "/usr/local/spark/work", "/tmp" ] 56 | RUN chown -R $SPARK_USER:hadoop /tmp 57 | 58 | EXPOSE 8080 8081 6066 7077 4040 7001 7002 7003 7004 7005 7006 59 | ENTRYPOINT [ "/entrypoint.sh" ] 60 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-worker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | ## Defaults 5 | # 6 | : ${SPARK_HOME:?must be set!} 7 | default_opts="--properties-file /spark-defaults.conf" 8 | 9 | 10 | # Check if CLI args list containes bind address key. 11 | cli_bind_address() { 12 | echo "$*" | grep -qE -- "--host\b|-h\b|--ip\b|-i\b" 13 | } 14 | 15 | # Set permissions on the scratch volumes 16 | scratch_volumes_permissions() { 17 | mkdir -p $SPARK_HOME/work && chown $SPARK_USER:hadoop $SPARK_HOME/work 18 | chmod 1777 /tmp 19 | } 20 | 21 | 22 | ## Configuration sourcing 23 | . $SPARK_HOME/sbin/spark-config.sh 24 | . $SPARK_HOME/bin/load-spark-env.sh 25 | 26 | 27 | 28 | 29 | 30 | ## Entrypoint 31 | 32 | scratch_volumes_permissions 33 | 34 | instance=worker 35 | CLASS="org.apache.spark.deploy.$instance.${instance^}" 36 | 37 | # Handle custom bind address set via ENV or CLI 38 | eval bind_address=$SPARK_MASTER_IP 39 | echo $bind_address 40 | if ( ! cli_bind_address $@ ) && [ ! -z $bind_address ] ; then 41 | default_opts="${default_opts} --host ${bind_address}" 42 | fi 43 | 44 | echo "==> spark-class invocation arguments: $CLASS $default_opts $@" 45 | echo "==> spark-class invocation arguments: $CLASS " 46 | echo "==> spark-class invocation arguments: $default_opts" 47 | echo "==> spark-class invocation arguments: $@" 48 | 49 | cd /tmp 50 | 51 | exec gosu $SPARK_USER:hadoop $SPARK_HOME/bin/spark-class $CLASS --webui-port 7778 --cores $CORES --memory $MEMORY --properties-file $SPARK_HOME/conf/spark-defaults.conf spark://spark-master.marathon.mesos:7077 52 | ;; 53 | shell) 54 | shift 55 | echo "==> spark-shell invocation arguments: $default_opts $@" 56 | 57 | cd /tmp 58 | exec gosu $SPARK_USER:hadoop $SPARK_HOME/bin/spark-shell $default_opts $@ 59 | ;; 60 | *) 61 | cmdline="$@" 62 | exec ${cmdline:-/bin/bash} 63 | ;; 64 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-worker/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t `cat version` . 3 | docker push `cat version` 4 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-worker/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.driver.port 7001 2 | spark.fileserver.port 7002 3 | spark.broadcast.port 7003 4 | spark.replClassServer.port 7004 5 | spark.blockManager.port 7005 6 | spark.executor.port 7006 7 | spark.ui.port 4041 8 | spark.broadcast.factory org.apache.spark.broadcast.HttpBroadcastFactory 9 | spark.local.dir /tmp 10 | spark.sql.streaming.minBatchesToRetain 2 11 | #spark.deploy.recoveryMode ZOOKEEPER 12 | #spark.deploy.zookeeper.url zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181,zk-4.zk:2181,zk-5.zk:2181/kafka 13 | -------------------------------------------------------------------------------- /deployment/spark_cluster_3.0.0/docker-spark-worker/version: -------------------------------------------------------------------------------- 1 | gisellevd/ospbench-spark-worker:3.0.0 2 | -------------------------------------------------------------------------------- /evaluator/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | project/target 3 | project/project 4 | target -------------------------------------------------------------------------------- /evaluator/build.sbt: -------------------------------------------------------------------------------- 1 | enablePlugins(JavaAppPackaging) 2 | name := "stream-processing-evaluator" 3 | 4 | scalaVersion := "2.12.8" 5 | 6 | libraryDependencies ++= Dependencies.evaluation ++ Dependencies.typeSafe 7 | //dependencyOverrides ++= Dependencies.jacksonDependencyOverrides 8 | 9 | mainClass in(Compile, run) := Some("evaluation.EvaluationMain") 10 | mainClass in assembly := Some("evaluation.EvaluationMain") 11 | assemblyMergeStrategy in assembly := { 12 | case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard 13 | case _ => MergeStrategy.first 14 | } 15 | -------------------------------------------------------------------------------- /evaluator/project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import Keys._ 3 | 4 | object Versions { 5 | val asyncHttpClient = "2.1.0-alpha22" 6 | 7 | val httpClient = "4.2" 8 | val hadoopAws = "2.7.3" 9 | 10 | val spark = "3.0.0" 11 | val scalaBinary = "2.12" 12 | 13 | val typeSafe = "1.3.1" 14 | val typeSafePlay = "2.9.0" 15 | 16 | val scalaInfluxDBconnector = "0.5.2" 17 | } 18 | 19 | 20 | object Dependencies { 21 | val evaluation: Seq[ModuleID] = Seq( 22 | "org.apache.spark" % s"spark-core_${Versions.scalaBinary}" % Versions.spark % "provided", 23 | "org.apache.spark" % s"spark-sql_${Versions.scalaBinary}" % Versions.spark % "provided", 24 | "org.apache.spark" % s"spark-mllib_${Versions.scalaBinary}" % Versions.spark % "provided" 25 | ) 26 | 27 | 28 | val typeSafe: Seq[ModuleID] = Seq("com.typesafe.play" % s"play-json_${Versions.scalaBinary}" % Versions.typeSafePlay, 29 | "com.typesafe" % "config" % Versions.typeSafe, 30 | 31 | "org.asynchttpclient" % "async-http-client" % Versions.asyncHttpClient, 32 | "com.paulgoldbaum" %% "scala-influxdb-client" % Versions.scalaInfluxDBconnector exclude("org.asynchttpclient", "async-http-client"), 33 | 34 | "org.apache.httpcomponents" % "httpclient" % Versions.httpClient, 35 | 36 | //"org.apache.hadoop" % "hadoop-aws" % Versions.hadoopAws, 37 | "com.amazonaws" % "aws-java-sdk" % "1.7.4", 38 | "io.netty" % "netty-all" % "4.1.17.Final" 39 | 40 | ).map(_.exclude("com.fasterxml.jackson.core", "jackson-core") 41 | .exclude("com.fasterxml.jackson.core", "jackson-annotations") 42 | .exclude("com.fasterxml.jackson.core", "jackson-databind")) 43 | 44 | val jacksonDependencyOverrides = Set("com.fasterxml.jackson.core" % "jackson-core" % "2.6.7", 45 | "com.fasterxml.jackson.core" % "jackson-databind" % "2.6.7", 46 | "com.fasterxml.jackson.module" % "jackson-module-scala_2.11" % "2.6.7") 47 | } -------------------------------------------------------------------------------- /evaluator/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.13 -------------------------------------------------------------------------------- /evaluator/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | logLevel := Level.Warn 2 | resolvers := Seq( 3 | "otto-bintray" at "https://dl.bintray.com/ottogroup/maven", 4 | "Sbt plugins" at "https://dl.bintray.com/sbt/sbt-plugin-releases" 5 | ) 6 | 7 | addSbtPlugin("com.typesafe.sbt" %% "sbt-native-packager" % "1.3.2") 8 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") 9 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.7.0") 10 | -------------------------------------------------------------------------------- /evaluator/src/main/resources/resources.conf: -------------------------------------------------------------------------------- 1 | mode = "aws" #local or aws 2 | 3 | aws { 4 | endpoint = "s3.eu-central-1.amazonaws.com" 5 | } 6 | -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/EvaluationMain.scala: -------------------------------------------------------------------------------- 1 | package evaluation 2 | 3 | import com.amazonaws.SDKGlobalConfiguration 4 | import evaluation.config.EvaluationConfig 5 | import evaluation.modes._ 6 | import evaluation.utils.IOUtils 7 | import org.apache.spark.sql.SparkSession 8 | 9 | /** 10 | * Main entrypoint of application. 11 | * Computes key metrics of a benchmark run and does pre-processing for visualization in Notebooks 12 | * - Initializes Spark 13 | * - Chooses the right evaluator based on the mode 14 | * - Computes all metrics for mode and writes them to S3 15 | * 16 | * There are seven modes: 17 | * - constant-rate: used for sustainable throughput workload and scalability workloads of OSPBench 18 | * - latency-constant-rate: used for latency workload of OSPBench 19 | * - single-burst: used for single burst at startup workload of OSPBench 20 | * - periodic-burst: used for workload with periodic bursts of OSPBench 21 | * - worker-failure: used for workload with worker failure of OSPBench 22 | * - master-failure: used for workload with master failure of OSPBench 23 | * - faulty-event: used for workload with job, stage or task failure of OSPBench 24 | */ 25 | object EvaluationMain { 26 | def main(args: Array[String]): Unit = { 27 | System.setProperty(SDKGlobalConfiguration.ENABLE_S3_SIGV4_SYSTEM_PROPERTY, "true") 28 | val sparkSession = initSpark 29 | val evaluationConfig = EvaluationConfig(sparkSession) 30 | 31 | // Configuration to read and write to S3 32 | sparkSession.sparkContext.hadoopConfiguration.set("fs.s3a.access.key", evaluationConfig.awsAccessKey) 33 | sparkSession.sparkContext.hadoopConfiguration.set("fs.s3a.secret.key", evaluationConfig.awsSecretKey) 34 | sparkSession.sparkContext.hadoopConfiguration.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") 35 | sparkSession.sparkContext.hadoopConfiguration.set("fs.s3a.endpoint", evaluationConfig.awsEndpoint) 36 | 37 | val ioUtils = new IOUtils(sparkSession, evaluationConfig) 38 | val (data, runTimes) = ioUtils.readDataAndComputeRunTimes() 39 | 40 | if (evaluationConfig.mode == "constant-rate") 41 | new ConstantRateEvaluator(data, runTimes, evaluationConfig, sparkSession).run() 42 | else if (evaluationConfig.mode == "latency-constant-rate") 43 | new LatencyConstantRateEvaluator(data, runTimes, evaluationConfig, sparkSession).run() 44 | else if (evaluationConfig.mode =="single-burst") 45 | new SingleBurstEvaluator(data, runTimes, evaluationConfig, sparkSession).run() 46 | else if (evaluationConfig.mode == "periodic-burst") 47 | new PeriodicBurstEvaluator(data, runTimes, evaluationConfig, sparkSession).run() 48 | else if (evaluationConfig.mode == "worker-failure") 49 | new WorkerFailureEvaluator(data, runTimes, evaluationConfig, sparkSession).run() 50 | else if (evaluationConfig.mode == "master-failure") 51 | new MasterFailureEvaluator(data, runTimes, evaluationConfig, sparkSession).run() 52 | else if (evaluationConfig.mode == "faulty-event") 53 | new FaultyEventEvaluator(data, runTimes, evaluationConfig, sparkSession).run() 54 | } 55 | 56 | def initSpark: SparkSession = { 57 | SparkSession.builder() 58 | .appName("benchmark-evaluator") 59 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 60 | .getOrCreate() 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/config/EvaluationConfig.scala: -------------------------------------------------------------------------------- 1 | package evaluation.config 2 | 3 | import java.sql.Timestamp 4 | 5 | import com.typesafe.config.{Config, ConfigFactory} 6 | import org.apache.spark.sql.SparkSession 7 | 8 | case class EvaluationConfig(sparkSession: SparkSession) extends Serializable { 9 | 10 | val systemTime: String = new Timestamp(System.currentTimeMillis()).toString.replaceAll(" ", "_").replaceAll(":", "_").replaceAll("\\.", "_") 11 | 12 | // get configurations from the spark submit arguments 13 | val framework: String = sparkSession.conf.get("spark.FRAMEWORK") 14 | val clusterUrl: String = sparkSession.conf.get("spark.CLUSTER_URL") 15 | val influxDbUrl: String = sparkSession.conf.get("spark.INFLUXDB_URL") 16 | val dcosAccessToken: String = sparkSession.conf.get("spark.DCOS_ACCESS_TOKEN") 17 | val mode: String = sparkSession.conf.get("spark.MODE") 18 | val jobuuid: String = sparkSession.conf.get("spark.FILEPATH") 19 | val lastStage: String = sparkSession.conf.get("spark.LAST_STAGE") 20 | val beginTime: String = sparkSession.conf.get("spark.BEGINTIME") 21 | val inputMetricsPath: String = sparkSession.conf.get("spark.INPUT_METRICS_PATH") 22 | val resultsPath: String = sparkSession.conf.get("spark.RESULTS_PATH") 23 | 24 | val memoryPerWorker: String = sparkSession.conf.get("spark.WORKER_MEM") 25 | val cpuPerWorker: Int = sparkSession.conf.get("spark.WORKER_CPU").toInt 26 | val amountOfWorkers: Int = sparkSession.conf.get("spark.AMT_WORKERS").toInt 27 | val totalCpuResources: Int = cpuPerWorker*amountOfWorkers 28 | 29 | // Config file variables 30 | lazy val configProperties: Config = ConfigFactory.load("resources.conf") 31 | lazy val awsEndpoint: String = configProperties.getString("aws.endpoint") 32 | lazy val awsAccessKey: String = sparkSession.conf.get("spark.AWS_ACCESS_KEY") 33 | lazy val awsSecretKey: String = sparkSession.conf.get("spark.AWS_SECRET_KEY") 34 | 35 | lazy val dataFrameworkPath: String = inputMetricsPath + "/" + framework + "/" + mode + "/observations-log-" + jobuuid + "*" 36 | lazy val resourceMetricsFrameworkPath: String = inputMetricsPath + "/" + framework + "/" + mode + "/metrics-log-" + jobuuid + "*" 37 | lazy val gcMetricsFrameworkPath: String = inputMetricsPath + "/" + framework + "/" + mode + "/gc-log-" + jobuuid + "*" 38 | lazy val cadvisorMetricsFrameworkPath: String = inputMetricsPath + "/" + framework + "/" + mode + "/cadvisor-log-" + jobuuid + "*" 39 | lazy val cadvisorHdfsMetricsFrameworkPath: String = inputMetricsPath + "/" + framework + "/" + mode + "/hdfs-cadvisor-log-" + jobuuid + "*" 40 | lazy val cadvisorKafkaMetricsFrameworkPath: String = inputMetricsPath + "/" + framework + "/" + mode + "/kafka-cadvisor-log-" + jobuuid + "*" 41 | 42 | def dataOutputPath(dataContent: String): String = { 43 | resultsPath + "/" + framework + "/" + mode + "/stage" + lastStage + "/" + amountOfWorkers + "x-" + cpuPerWorker + "cpu-" + memoryPerWorker + "gb/" + beginTime + "-" + jobuuid + "/" + dataContent + ".csv" 44 | } 45 | 46 | 47 | val listOfTaskNames: List[String] = if (framework.contains("FLINK")) { 48 | List("flink-jobmanager") ++ (1 to amountOfWorkers).map{ el => "flink-taskmanager-" + el}.toList 49 | } else if (framework.contains("KAFKASTREAMS")) { 50 | (1 to amountOfWorkers).map{ el => "kafka-thread-" + el}.toList 51 | } else { 52 | List("spark-master") ++ (1 to amountOfWorkers).map{ el => "spark-worker-" + el}.toList 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/metrics/cadvisor/CadvisorQueryClient.scala: -------------------------------------------------------------------------------- 1 | package evaluation.metrics.cadvisor 2 | 3 | import evaluation.utils.QueryResponse 4 | import org.apache.http.client.methods.HttpGet 5 | import org.apache.http.impl.client.DefaultHttpClient 6 | import org.apache.http.util.EntityUtils 7 | 8 | import scala.util.parsing.json.JSON 9 | 10 | object CadvisorQueryClient { 11 | val client = new DefaultHttpClient 12 | 13 | def getResponse(url: String): Map[String, Any] = { 14 | val httpGet = new HttpGet(url) 15 | 16 | val response = client.execute(httpGet) 17 | 18 | val in = response.getEntity 19 | val encoding = response.getEntity.getContentEncoding 20 | 21 | val decodedResponse = EntityUtils.toString(in, "UTF-8") 22 | 23 | val jsonResponse = JSON.parseFull(decodedResponse).get.asInstanceOf[Map[String, Any]] 24 | jsonResponse 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/modes/ConstantRateEvaluator.scala: -------------------------------------------------------------------------------- 1 | package evaluation.modes 2 | 3 | import evaluation.config.EvaluationConfig 4 | import evaluation.metrics.cadvisor.CadvisorResourceUtils 5 | import evaluation.metrics.cadvisorextended.{CadvisorResourceComputer, HdfsResourceComputer, KafkaResourceComputer} 6 | import evaluation.metrics.jmx.{JmxGCUtils, JmxMemoryUtils} 7 | import evaluation.metrics.{LatencyUtils, ThroughputUtils} 8 | import evaluation.utils.{MetricObservation, MetricUtils} 9 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 10 | 11 | /** 12 | * Analyzes high-throughput runs and therefore computes less detailed metrics on latency and throughput. 13 | * It needs to compute a lot of detailed metrics to identify the bottleneck of the job. 14 | * We also include metrics on HDFS and Kafka to check if they are not the bottleneck. 15 | * 16 | * @param data: data of the run to analyze 17 | * @param runTimes: dataframe which contains some key information on the run such as framework, start and end times, etc. 18 | * @param evaluationConfig: configuration object 19 | * @param sparkSession 20 | */ 21 | class ConstantRateEvaluator(data: Dataset[MetricObservation], runTimes: DataFrame, evaluationConfig: EvaluationConfig, sparkSession: SparkSession) { 22 | val metricUtils = new MetricUtils(sparkSession, evaluationConfig) 23 | val latencyUtils = new LatencyUtils(sparkSession, evaluationConfig) 24 | val throughputUtils = new ThroughputUtils(sparkSession, evaluationConfig) 25 | val jmxResourceUtils = new JmxMemoryUtils(runTimes, sparkSession, evaluationConfig) 26 | val jmxGcUtils = new JmxGCUtils(runTimes, sparkSession, evaluationConfig) 27 | val cadvisorResourceUtils = new CadvisorResourceUtils(runTimes, sparkSession, evaluationConfig) 28 | val cadvisorResourceComputer = new CadvisorResourceComputer(runTimes, sparkSession, evaluationConfig) 29 | val hdfsResourceComputer = new HdfsResourceComputer(runTimes, sparkSession, evaluationConfig) 30 | val kafkaResourceComputer = new KafkaResourceComputer(runTimes, sparkSession, evaluationConfig) 31 | 32 | def run(): Unit = { 33 | // Source: Cadvisor 34 | val cpuMetrics = cadvisorResourceUtils.processCpuMetricsAndComputeStats() 35 | cadvisorResourceUtils.processNetworkMetrics() 36 | cadvisorResourceUtils.processMemoryMetrics() 37 | 38 | cadvisorResourceComputer.wrangleCadvisorMetrics() 39 | hdfsResourceComputer.wrangleHdfsCadvisorMetrics() 40 | kafkaResourceComputer.wrangleKafkaCadvisorMetrics() 41 | 42 | // Source: JMX 43 | val jmxAggregatedMetrics = jmxResourceUtils.compute() 44 | jmxGcUtils.compute() 45 | 46 | // Source: Others 47 | val latency = latencyUtils.computeForSustainableThroughput(data) 48 | val throughput = throughputUtils.computeForSustainableTP(data) 49 | 50 | metricUtils.computeJobOverview(runTimes, latency, throughput, jmxAggregatedMetrics, cpuMetrics) 51 | } 52 | } -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/modes/FaultyEventEvaluator.scala: -------------------------------------------------------------------------------- 1 | package evaluation.modes 2 | 3 | import evaluation.config.EvaluationConfig 4 | import evaluation.metrics.cadvisor.CadvisorResourceUtils 5 | import evaluation.metrics.cadvisorextended.CadvisorResourceComputer 6 | import evaluation.metrics.jmx.{JmxGCUtils, JmxMemoryUtils} 7 | import evaluation.metrics.{LatencyUtils, ThroughputUtils} 8 | import evaluation.utils.{MetricObservation, MetricUtils} 9 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 10 | 11 | /** 12 | * Computes metrics to analyze the job, stage, task failure workload. 13 | * 14 | * @param data: data of the run to analyze 15 | * @param runTimes: dataframe which contains some key information on the run such as framework, start and end times, etc. 16 | * @param evaluationConfig: configuration object 17 | * @param sparkSession 18 | */ 19 | class FaultyEventEvaluator (data: Dataset[MetricObservation], runTimes: DataFrame, evaluationConfig: EvaluationConfig, sparkSession: SparkSession) { 20 | val metricUtils = new MetricUtils(sparkSession, evaluationConfig) 21 | val latencyUtils = new LatencyUtils(sparkSession, evaluationConfig) 22 | val throughputUtils = new ThroughputUtils(sparkSession, evaluationConfig) 23 | val jmxResourceUtils = new JmxMemoryUtils(runTimes, sparkSession, evaluationConfig) 24 | val jmxGcUtils = new JmxGCUtils(runTimes, sparkSession, evaluationConfig) 25 | val cadvisorResourceUtils = new CadvisorResourceUtils(runTimes, sparkSession, evaluationConfig) 26 | 27 | def run(): Unit = { 28 | // Source: Cadvisor 29 | val cpuMetrics = cadvisorResourceUtils.processCpuMetricsAndComputeStats() 30 | cadvisorResourceUtils.processNetworkMetrics() 31 | cadvisorResourceUtils.processMemoryMetrics() 32 | 33 | // Source: JMX 34 | val jmxAggregatedMetrics = jmxResourceUtils.compute() 35 | jmxGcUtils.compute() 36 | 37 | // Source: Others 38 | val latency = latencyUtils.compute(data) 39 | val throughput = throughputUtils.compute(data) 40 | 41 | metricUtils.computeJobOverview(runTimes, latency, throughput, jmxAggregatedMetrics, cpuMetrics) 42 | } 43 | } -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/modes/LatencyConstantRateEvaluator.scala: -------------------------------------------------------------------------------- 1 | package evaluation.modes 2 | 3 | import evaluation.config.EvaluationConfig 4 | import evaluation.metrics.cadvisor.CadvisorResourceUtils 5 | import evaluation.metrics.cadvisorextended.CadvisorResourceComputer 6 | import evaluation.metrics.jmx.{JmxGCUtils, JmxMemoryUtils} 7 | import evaluation.metrics.{LatencyUtils, ThroughputUtils} 8 | import evaluation.utils.{MetricObservation, MetricUtils} 9 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 10 | 11 | /** 12 | * Needs to give a detailed view of the latency distribution of the job. 13 | * 14 | * @param data: data of the run to analyze 15 | * @param runTimes: dataframe which contains some key information on the run such as framework, start and end times, etc. 16 | * @param evaluationConfig: configuration object 17 | * @param sparkSession 18 | */ 19 | class LatencyConstantRateEvaluator(data: Dataset[MetricObservation], runTimes: DataFrame, evaluationConfig: EvaluationConfig, sparkSession: SparkSession) { 20 | val metricUtils = new MetricUtils(sparkSession, evaluationConfig) 21 | val latencyUtils = new LatencyUtils(sparkSession, evaluationConfig) 22 | val throughputUtils = new ThroughputUtils(sparkSession, evaluationConfig) 23 | val jmxResourceUtils = new JmxMemoryUtils(runTimes, sparkSession, evaluationConfig) 24 | val jmxGcUtils = new JmxGCUtils(runTimes, sparkSession, evaluationConfig) 25 | val cadvisorResourceUtils = new CadvisorResourceUtils(runTimes, sparkSession, evaluationConfig) 26 | 27 | def run(): Unit = { 28 | // Source: Cadvisor 29 | val cpuMetrics = cadvisorResourceUtils.processCpuMetricsAndComputeStats() 30 | cadvisorResourceUtils.processNetworkMetrics() 31 | cadvisorResourceUtils.processMemoryMetrics() 32 | 33 | // Source: JMX 34 | val jmxAggregatedMetrics = jmxResourceUtils.compute() 35 | jmxGcUtils.compute() 36 | 37 | // Source: Others 38 | val latency = latencyUtils.compute(data) 39 | val throughput = throughputUtils.compute(data) 40 | 41 | metricUtils.computeJobOverview(runTimes, latency, throughput, jmxAggregatedMetrics, cpuMetrics) 42 | } 43 | } -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/modes/MasterFailureEvaluator.scala: -------------------------------------------------------------------------------- 1 | package evaluation.modes 2 | 3 | import evaluation.config.EvaluationConfig 4 | import evaluation.metrics.cadvisor.CadvisorResourceUtils 5 | import evaluation.metrics.cadvisorextended.CadvisorResourceComputer 6 | import evaluation.metrics.jmx.{JmxGCUtils, JmxMemoryUtils} 7 | import evaluation.metrics.{LatencyUtils, ThroughputUtils} 8 | import evaluation.utils.{MetricObservation, MetricUtils} 9 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 10 | 11 | /** 12 | * Computes metrics that are used to analyze the master failure workload 13 | * 14 | * @param data: data of the run to analyze 15 | * @param runTimes: dataframe which contains some key information on the run such as framework, start and end times, etc. 16 | * @param evaluationConfig: configuration object 17 | * @param sparkSession 18 | */ 19 | class MasterFailureEvaluator (data: Dataset[MetricObservation], runTimes: DataFrame, evaluationConfig: EvaluationConfig, sparkSession: SparkSession) { 20 | val metricUtils = new MetricUtils(sparkSession, evaluationConfig) 21 | val latencyUtils = new LatencyUtils(sparkSession, evaluationConfig) 22 | val throughputUtils = new ThroughputUtils(sparkSession, evaluationConfig) 23 | val jmxResourceUtils = new JmxMemoryUtils(runTimes, sparkSession, evaluationConfig) 24 | val jmxGcUtils = new JmxGCUtils(runTimes, sparkSession, evaluationConfig) 25 | val cadvisorResourceUtils = new CadvisorResourceUtils(runTimes, sparkSession, evaluationConfig) 26 | 27 | def run(): Unit = { 28 | // Source: Cadvisor 29 | val cpuMetrics = cadvisorResourceUtils.processCpuMetricsAndComputeStats() 30 | 31 | // Source: JMX 32 | val jmxAggregatedMetrics = jmxResourceUtils.compute() 33 | jmxGcUtils.compute() 34 | 35 | // Source: Others 36 | val latency = latencyUtils.computeForMasterFailure(data) 37 | val throughput = throughputUtils.compute(data) 38 | 39 | metricUtils.computeJobOverview(runTimes, latency, throughput, jmxAggregatedMetrics, cpuMetrics) 40 | } 41 | } -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/modes/PeriodicBurstEvaluator.scala: -------------------------------------------------------------------------------- 1 | package evaluation.modes 2 | 3 | import evaluation.config.EvaluationConfig 4 | import evaluation.metrics.cadvisor.CadvisorResourceUtils 5 | import evaluation.metrics.cadvisorextended.CadvisorResourceComputer 6 | import evaluation.metrics.jmx.{JmxGCUtils, JmxMemoryUtils} 7 | import evaluation.metrics.{LatencyUtils, ThroughputUtils} 8 | import evaluation.utils.{MetricObservation, MetricUtils} 9 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 10 | 11 | /** 12 | * Computes metrics that are important to analyze the periodic burst workload. 13 | * The focus here is mainly on timeseries data because we focus on the behavior throughout a burst. 14 | * 15 | * @param data: data of the run to analyze 16 | * @param runTimes: dataframe which contains some key information on the run such as framework, start and end times, etc. 17 | * @param evaluationConfig: configuration object 18 | * @param sparkSession 19 | */ 20 | class PeriodicBurstEvaluator(data: Dataset[MetricObservation], runTimes: DataFrame, evaluationConfig: EvaluationConfig, sparkSession: SparkSession) { 21 | val metricUtils = new MetricUtils(sparkSession, evaluationConfig) 22 | val latencyUtils = new LatencyUtils(sparkSession, evaluationConfig) 23 | val throughputUtils = new ThroughputUtils(sparkSession, evaluationConfig) 24 | val jmxResourceUtils = new JmxMemoryUtils(runTimes, sparkSession, evaluationConfig) 25 | val jmxGcUtils = new JmxGCUtils(runTimes, sparkSession, evaluationConfig) 26 | val cadvisorResourceUtils = new CadvisorResourceUtils(runTimes, sparkSession, evaluationConfig) 27 | 28 | def run(): Unit = { 29 | // Source: Cadvisor 30 | cadvisorResourceUtils.processCpuMetricsAndComputeStats() 31 | cadvisorResourceUtils.processNetworkMetrics() 32 | cadvisorResourceUtils.processMemoryMetrics() 33 | 34 | // Source: JMX 35 | jmxResourceUtils.compute() 36 | jmxGcUtils.compute() 37 | 38 | // Source: Others 39 | latencyUtils.computeForPeriodicBurst(data) 40 | throughputUtils.computeForPeriodicBurst(data) 41 | } 42 | } -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/modes/SingleBurstEvaluator.scala: -------------------------------------------------------------------------------- 1 | package evaluation.modes 2 | 3 | import evaluation.config.EvaluationConfig 4 | import evaluation.metrics.cadvisor.CadvisorResourceUtils 5 | import evaluation.metrics.cadvisorextended.CadvisorResourceComputer 6 | import evaluation.metrics.jmx.{JmxGCUtils, JmxMemoryUtils} 7 | import evaluation.metrics.{LatencyUtils, ThroughputUtils} 8 | import evaluation.utils.{MetricObservation, MetricUtils} 9 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 10 | 11 | /** 12 | * Computes metrics that are important for the single burst workload. 13 | * This workload does not compute latency metrics because it does not make sense. 14 | * 15 | * @param data: data of the run to analyze 16 | * @param runTimes: dataframe which contains some key information on the run such as framework, start and end times, etc. 17 | * @param evaluationConfig: configuration object 18 | * @param sparkSession 19 | */ 20 | class SingleBurstEvaluator(data: Dataset[MetricObservation], runTimes: DataFrame, evaluationConfig: EvaluationConfig,sparkSession: SparkSession) { 21 | val metricUtils = new MetricUtils(sparkSession, evaluationConfig) 22 | val latencyUtils = new LatencyUtils(sparkSession, evaluationConfig) 23 | val throughputUtils = new ThroughputUtils(sparkSession, evaluationConfig) 24 | val jmxResourceUtils = new JmxMemoryUtils(runTimes, sparkSession, evaluationConfig) 25 | val jmxGcUtils = new JmxGCUtils(runTimes, sparkSession, evaluationConfig) 26 | val cadvisorResourceUtils = new CadvisorResourceUtils(runTimes, sparkSession, evaluationConfig) 27 | 28 | def run(): Unit = { 29 | // Source: Cadvisor 30 | val cpuMetrics = cadvisorResourceUtils.processCpuMetricsAndComputeStats() 31 | cadvisorResourceUtils.processNetworkMetrics() 32 | cadvisorResourceUtils.processMemoryMetrics() 33 | 34 | // Source: JMX 35 | val jmxAggregatedMetrics = jmxResourceUtils.compute() 36 | jmxGcUtils.compute() 37 | 38 | // Source: Others 39 | val throughput = throughputUtils.compute(data) 40 | 41 | metricUtils.computeSingleBurstJobOverview(throughput, jmxAggregatedMetrics, cpuMetrics) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/modes/WorkerFailureEvaluator.scala: -------------------------------------------------------------------------------- 1 | package evaluation.modes 2 | 3 | import evaluation.config.EvaluationConfig 4 | import evaluation.metrics.cadvisor.CadvisorResourceUtils 5 | import evaluation.metrics.cadvisorextended.CadvisorResourceComputer 6 | import evaluation.metrics.jmx.{JmxGCUtils, JmxMemoryUtils} 7 | import evaluation.metrics.{LatencyUtils, ThroughputUtils} 8 | import evaluation.utils.{MetricObservation, MetricUtils} 9 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 10 | 11 | /** 12 | * Computes metrics for worker failure workloads 13 | * 14 | * @param data: data of the run to analyze 15 | * @param runTimes: dataframe which contains some key information on the run such as framework, start and end times, etc. 16 | * @param evaluationConfig: configuration object 17 | * @param sparkSession 18 | */ 19 | class WorkerFailureEvaluator (data: Dataset[MetricObservation], runTimes: DataFrame, evaluationConfig: EvaluationConfig, sparkSession: SparkSession) { 20 | val metricUtils = new MetricUtils(sparkSession, evaluationConfig) 21 | val latencyUtils = new LatencyUtils(sparkSession, evaluationConfig) 22 | val throughputUtils = new ThroughputUtils(sparkSession, evaluationConfig) 23 | val jmxResourceUtils = new JmxMemoryUtils(runTimes, sparkSession, evaluationConfig) 24 | val jmxGcUtils = new JmxGCUtils(runTimes, sparkSession, evaluationConfig) 25 | val cadvisorResourceUtils = new CadvisorResourceUtils(runTimes, sparkSession, evaluationConfig) 26 | 27 | def run(): Unit = { 28 | // Source: Cadvisor 29 | cadvisorResourceUtils.processCpuMetricsAndComputeStats() 30 | cadvisorResourceUtils.processNetworkMetrics() 31 | cadvisorResourceUtils.processMemoryMetrics() 32 | 33 | // Source: JMX 34 | jmxResourceUtils.compute() 35 | jmxGcUtils.compute() 36 | 37 | // Source: Others 38 | latencyUtils.computeForWorkerFailure(data) 39 | throughputUtils.computeForWorkerFailure(data) 40 | } 41 | } -------------------------------------------------------------------------------- /evaluator/src/main/scala/evaluation/utils/DataTypes.scala: -------------------------------------------------------------------------------- 1 | package evaluation.utils 2 | 3 | /** 4 | * Data Types for evaluation purposes. 5 | */ 6 | 7 | case class RunConfiguration( 8 | framework: String, 9 | phase: Int, 10 | scale: Int, //volume inflation factor 11 | bufferTimeout: Int, //linger time of kafka 12 | shortLb: Int, // short lookback 13 | longLb: Int, // long lookback, 14 | startTime: Long 15 | ) 16 | 17 | case class MetricObservation( 18 | runConfiguration: RunConfiguration, 19 | inputKafkaTimestamp: Long, 20 | outputKafkaTimestamp: Long 21 | ) 22 | 23 | case class QueryResponse( 24 | containerId: String, 25 | taskName: String, 26 | runConfig: RunConfiguration, 27 | queryResponse: Map[String, Any] 28 | ) 29 | 30 | case class CpuUsage( 31 | containerName: String, 32 | containerTaskId: String, 33 | runConfiguration: RunConfiguration, 34 | time: Long, 35 | cpuUsage: Float, 36 | cpuUsagePct: Double 37 | ) 38 | 39 | case class MemUsage( 40 | containerName: String, 41 | runConfiguration: RunConfiguration, 42 | time: Long, 43 | memUsageMB: Double 44 | ) 45 | 46 | case class BytesTransferred( 47 | containerName: String, 48 | runConfiguration: RunConfiguration, 49 | time: Long, 50 | mbits: Double 51 | ) 52 | 53 | 54 | case class PgFaults( 55 | containerName: String, 56 | runConfiguration: RunConfiguration, 57 | time: Long, 58 | pgFaults: Double 59 | ) 60 | 61 | case class PacketsDropped( 62 | containerName: String, 63 | runConfiguration: RunConfiguration, 64 | time: Long, 65 | dropped: Long 66 | ) 67 | 68 | 69 | 70 | case class OperationTime( 71 | containerName: String, 72 | runConfiguration: RunConfiguration, 73 | time: Long, 74 | operationTime: Long 75 | ) -------------------------------------------------------------------------------- /evaluator/version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "4.1" -------------------------------------------------------------------------------- /kafka-cluster-tools/read-from-topic.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | export TOPIC_NAME=${1:-metrics} 3 | 4 | export KAFKA_ADVERTISED_HOST_NAME=$(hostname -I | head -n1 | awk '{print $1;}') 5 | 6 | docker run --rm -it \ 7 | wurstmeister/kafka:2.12-2.1.1 ./opt/kafka_2.12-2.1.1/bin/kafka-console-consumer.sh \ 8 | --bootstrap-server $KAFKA_ADVERTISED_HOST_NAME:9092 \ 9 | --topic $TOPIC_NAME 10 | -------------------------------------------------------------------------------- /kafka-cluster-tools/setup-kafka.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # Start up Zookeeper 4 | docker run -d \ 5 | -p 2181:2181 \ 6 | jplock/zookeeper 7 | 8 | # Start up Kafka 9 | export IP_ADDR=$(hostname -I | head -n1 | awk '{print $1;}') 10 | docker run -d \ 11 | -p 9092:9092 \ 12 | -e KAFKA_ADVERTISED_HOST_NAME=$IP_ADDR \ 13 | -e KAFKA_ADVERTISED_PORT="9092" \ 14 | -e KAFKA_ZOOKEEPER_CONNECT=${IP_ADDR}:2181 \ 15 | -e KAFKA_CREATE_TOPICS="ndwspeed:1:1,ndwflow:1:1,metrics:1:1,aggregation-data-topic:1:1,speed-through-topic:1:1,flow-through-topic:1:1" \ 16 | wurstmeister/kafka:2.12-2.1.1 17 | 18 | docker run -d \ 19 | -p 82:80 \ 20 | -p 2003:2003 \ 21 | -p 3000:3000 \ 22 | -p 7002:7002 \ 23 | -v /var/lib/gmonitor/graphite:/var/lib/graphite/storage/whisper \ 24 | -v /var/lib/gmonitor/grafana/data:/usr/share/grafana/data \ 25 | kamon/grafana_graphite 26 | -------------------------------------------------------------------------------- /metrics-exporter/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | project/target 3 | project/project 4 | target -------------------------------------------------------------------------------- /metrics-exporter/README.md: -------------------------------------------------------------------------------- 1 | # OSPBench Metrics Exporter 2 | 3 | This component exports JMX and cAdvisor metrics from containers and writes them to Kafka. 4 | It collects metrics from the framework cluster containers, Kafka brokers and HDFS components. 5 | 6 | 7 | ## Deployment 8 | This component requires the following environment variables: 9 | 10 | - `FRAMEWORK`: either FLINK/KAFKASTREAMS/SPARK/STRUCTUREDSTREAMIGNG 11 | - `TOPICNAME`: name of the topic currently used for the benchmark run. This component will publish to the topic `metrics-$TOPICNAME`. 12 | - `JMX_HOSTS`: the hosts from which metrics should be scraped, so the host of each framework cluster component. 13 | - `CLUSTER_URL`: IP of the DC/OS cluster. We retrieve this in our scripts with: 14 | 15 | DCOS_DNS_ADDRESS=$(aws cloudformation describe-stacks --region eu-west-1 --stack-name=streaming-benchmark | jq '.Stacks[0].Outputs | .[] | select(.Description=="Master") | .OutputValue' | awk '{print tolower($0)}') 16 | export CLUSTER_URL=http://${DCOS_DNS_ADDRESS//\"} 17 | echo $CLUSTER_UR 18 | 19 | - `DCOS_ACCESS_TOKEN`: token to access DC/OS. We retrieve this in our scripts with: 20 | 21 | dcos config show core.dcos_acs_token 22 | 23 | - `CADVISOR_HOSTS`: cadvisor hosts 24 | - `KAFKA_BOOTSTRAP_SERVERS`: Kafka brokers 25 | 26 | 27 | You can run this component as a Docker container next to cAdvisor and a framework cluster. 28 | -------------------------------------------------------------------------------- /metrics-exporter/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt.Keys.javaOptions 2 | 3 | name := "metrics-exporter" 4 | version := "3.0" 5 | scalaVersion := "2.11.8" 6 | 7 | val extJvmOpts = Seq( 8 | "-Xmx6g", 9 | "-Xms6g" 10 | ) 11 | 12 | libraryDependencies ++= Dependencies.rootDependencies 13 | 14 | assemblyMergeStrategy in assembly := { 15 | case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard 16 | case _ => MergeStrategy.first 17 | } 18 | mainClass in assembly := Some("benchmark.metrics.exporter.ExporterMain") 19 | mainClass in(Compile, run) := Some("benchmark.metrics.exporter.ExporterMain") 20 | 21 | // JVM options 22 | javaOptions in Universal ++= extJvmOpts.map(opt => s"-J$opt") 23 | javaOptions in Test ++= extJvmOpts 24 | // Docker configs 25 | javaOptions in Docker ++= extJvmOpts.map(opt => s"-J$opt") 26 | packageName in Docker := "metrics-exporter" 27 | 28 | enablePlugins(JavaAppPackaging) 29 | -------------------------------------------------------------------------------- /metrics-exporter/project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Versions { 4 | val asyncHttpClient = "2.1.0-alpha22" 5 | val circe = "0.11.1" 6 | val httpClient = "4.2" 7 | val typeSafe = "1.3.1" 8 | val scalaBinary = "2.11" 9 | val kafka = "0.10.2.1" 10 | val logback = "1.2.2" 11 | } 12 | 13 | object Dependencies { 14 | val rootDependencies: Seq[ModuleID] = Seq( 15 | "com.typesafe" % "config" % Versions.typeSafe, 16 | "org.apache.kafka" % s"kafka_${Versions.scalaBinary}" % Versions.kafka, 17 | "ch.qos.logback" % "logback-classic" % Versions.logback, 18 | "io.circe" %% "circe-core" % Versions.circe, 19 | "io.circe" %% "circe-generic" % Versions.circe, 20 | "io.circe" %% "circe-parser" % Versions.circe, 21 | "org.asynchttpclient" % "async-http-client" % Versions.asyncHttpClient, 22 | "org.apache.httpcomponents" % "httpclient" % Versions.httpClient 23 | ).map(_.exclude("log4j", "log4j") 24 | .exclude("org.slf4j", "slf4j-log4j12")) 25 | } -------------------------------------------------------------------------------- /metrics-exporter/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.13 -------------------------------------------------------------------------------- /metrics-exporter/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | logLevel := Level.Warn 2 | resolvers := Seq( 3 | "otto-bintray" at "https://dl.bintray.com/ottogroup/maven", 4 | "Sbt plugins" at "https://dl.bintray.com/sbt/sbt-plugin-releases" 5 | ) 6 | 7 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2") 8 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") 9 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.7.0") 10 | addSbtPlugin("com.typesafe.sbt" %% "sbt-native-packager" % "1.3.2") -------------------------------------------------------------------------------- /metrics-exporter/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /metrics-exporter/src/main/scala/benchmark/metrics/exporter/ConfigUtils.scala: -------------------------------------------------------------------------------- 1 | package benchmark.metrics.exporter 2 | 3 | object ConfigUtils { 4 | val framework: String = sys.env("FRAMEWORK") 5 | 6 | val kafkaBootstrapServers: String = sys.env("KAFKA_BOOTSTRAP_SERVERS") 7 | val outputKafkaTopic: String = "metrics-" + sys.env("TOPICNAME") 8 | 9 | val jmxHosts: String = sys.env("JMX_HOSTS") 10 | 11 | val cadvisorHosts: String = sys.env("CADVISOR_HOSTS") 12 | 13 | val clusterUrl: String = sys.env("CLUSTER_URL") 14 | val dcosAccessToken: String = sys.env("DCOS_ACCESS_TOKEN") 15 | } 16 | -------------------------------------------------------------------------------- /metrics-exporter/src/main/scala/benchmark/metrics/exporter/MetricTypes.scala: -------------------------------------------------------------------------------- 1 | package benchmark.metrics.exporter 2 | 3 | case class ResourceStats( 4 | containerName: String, 5 | time: Long, 6 | nonHeapUsed: Long, 7 | nonHeapCommitted: Long, 8 | heapUsed: Long, 9 | heapCommitted: Long, 10 | cpuLoad: Double, 11 | sunProcessCpuLoad: Double, 12 | javaLangCpu: Double 13 | ) 14 | 15 | case class GCBeanStats( 16 | containerName: String, 17 | time: Long, 18 | name: String, 19 | collectionTime: Long, 20 | collectionCount: Long, 21 | memoryPoolNames: String, 22 | lastGcDuration: Option[Long], 23 | lastGcEndTime: Option[Long], 24 | lastGcMemoryBefore: Option[Map[String, Long]], 25 | lastGcMemoryAfter: Option[Map[String, Long]] 26 | ) 27 | 28 | case class LastGCInformation( 29 | name: String, 30 | lastGcDuration: Long, 31 | lastGcEndTime: Long, 32 | lastGcMemoryBefore: Map[String, Long], 33 | lastGcMemoryAfter: Map[String, Long] 34 | ) 35 | 36 | case class ContainerInfo( 37 | taskName: String, 38 | containerId: String, 39 | cadvisorHost: String 40 | ) 41 | -------------------------------------------------------------------------------- /output-consumer/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | project/target 3 | project/project 4 | target -------------------------------------------------------------------------------- /output-consumer/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt.Keys.javaOptions 2 | 3 | name := "benchmark-output-consumer" 4 | 5 | scalaVersion := "2.12.8" 6 | 7 | val extJvmOpts = Seq( 8 | "-Xmx8g", 9 | "-Xms8g" 10 | ) 11 | 12 | libraryDependencies ++= Dependencies.rootDependencies 13 | 14 | assemblyMergeStrategy in assembly := { 15 | case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard 16 | case _ => MergeStrategy.first 17 | } 18 | mainClass in assembly := Some("output.consumer.OutputConsumer") 19 | mainClass in(Compile, run) := Some("output.consumer.OutputConsumer") 20 | 21 | // JVM options 22 | javaOptions in Universal ++= extJvmOpts.map(opt => s"-J$opt") 23 | javaOptions in Test ++= extJvmOpts 24 | // Docker configs 25 | javaOptions in Docker ++= extJvmOpts.map(opt => s"-J$opt") 26 | maintainer in Docker := "Giselle van Dongen " 27 | packageSummary in Docker := "Metrics consumer for stream processing benchmark" 28 | packageDescription := "output-consumer" 29 | 30 | 31 | enablePlugins(JavaAppPackaging) 32 | 33 | 34 | -------------------------------------------------------------------------------- /output-consumer/project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Versions { 4 | val dropwizardMetrics = "3.2.2" 5 | val typeSafe = "1.3.1" 6 | val typeSafePlayVersion = "2.9.0" 7 | val scalaBinary = "2.12" 8 | val scalaAwsSDK = "1.7.4" 9 | val kafka = "0.10.2.1" 10 | val logback = "1.2.2" 11 | val spark = "3.0.0" 12 | } 13 | 14 | object Dependencies { 15 | val sparkDependencies = Seq( 16 | "org.apache.spark" % s"spark-core_${Versions.scalaBinary}" % Versions.spark % "provided", 17 | "org.apache.spark" % s"spark-sql_${Versions.scalaBinary}" % Versions.spark % "provided", 18 | "org.apache.spark" % s"spark-streaming_${Versions.scalaBinary}" % Versions.spark, 19 | "org.apache.spark" % s"spark-streaming-kafka-0-10_${Versions.scalaBinary}" % Versions.spark, 20 | "org.apache.spark" % s"spark-sql-kafka-0-10_${Versions.scalaBinary}" % Versions.spark 21 | ).map(_.exclude("org.slf4j", "slf4j-log4j12")) 22 | 23 | 24 | val rootDependencies: Seq[ModuleID] = Seq( 25 | "com.typesafe" % "config" % Versions.typeSafe, 26 | "com.typesafe.play" % s"play-json_${Versions.scalaBinary}" % Versions.typeSafePlayVersion, 27 | "io.dropwizard.metrics" % "metrics-core" % Versions.dropwizardMetrics, 28 | "org.apache.kafka" % s"kafka_${Versions.scalaBinary}" % Versions.kafka, 29 | "ch.qos.logback" % "logback-classic" % Versions.logback, 30 | "com.amazonaws" % "aws-java-sdk" % Versions.scalaAwsSDK exclude("joda-time", "joda-time") 31 | ).map(_.exclude("log4j", "log4j") 32 | .exclude("org.slf4j", "slf4j-log4j12").exclude("com.fasterxml.jackson.core", "jackson-core") 33 | .exclude("com.fasterxml.jackson.core", "jackson-annotations") 34 | .exclude("com.fasterxml.jackson.core", "jackson-databind")) ++ sparkDependencies 35 | } -------------------------------------------------------------------------------- /output-consumer/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.16 -------------------------------------------------------------------------------- /output-consumer/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | logLevel := Level.Warn 2 | 3 | resolvers := Seq( 4 | "otto-bintray" at "https://dl.bintray.com/ottogroup/maven", 5 | "Sbt plugins" at "https://dl.bintray.com/sbt/sbt-plugin-releases" 6 | ) 7 | 8 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2") 9 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") 10 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.7.0") 11 | addSbtPlugin("com.typesafe.sbt" %% "sbt-native-packager" % "1.3.2") -------------------------------------------------------------------------------- /output-consumer/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /output-consumer/src/main/resources/resources.conf: -------------------------------------------------------------------------------- 1 | mode = "aws" #Can be local or aws 2 | 3 | metricsTopic = "metrics" 4 | 5 | aws { 6 | endpoint = "s3-eu-central-1.amazonaws.com" 7 | } 8 | 9 | local { 10 | kafka { 11 | bootstrap.servers = "localhost:9092" 12 | } 13 | graphite { 14 | enabled = true 15 | host = "localhost" 16 | port = 2003 17 | } 18 | stage = "3" 19 | } -------------------------------------------------------------------------------- /output-consumer/src/main/scala/output/consumer/ConfigUtils.scala: -------------------------------------------------------------------------------- 1 | package output.consumer 2 | 3 | import java.sql.Timestamp 4 | 5 | import com.typesafe.config.{Config, ConfigFactory} 6 | 7 | import scala.util.Try 8 | 9 | /** 10 | * Configuration of the output consumer 11 | * Comes from two sources: 12 | * - from the settings within the Spark context 13 | * - from the src/main/resources.conf file 14 | */ 15 | class ConfigUtils { 16 | val configProperties: Config = ConfigFactory.load("resources.conf") 17 | val systemTime: String = new Timestamp(System.currentTimeMillis()).toString.replaceAll(" ", "_").replaceAll(":", "_").replaceAll("\\.", "_") 18 | 19 | val local: Boolean = configProperties.getString("mode").contains("local") 20 | } 21 | 22 | class LocalConfigUtils extends ConfigUtils { 23 | val kafkaBootstrapServers: String = configProperties.getString("local.kafka.bootstrap.servers") 24 | val graphiteEnabled: Boolean = configProperties.getBoolean("local.graphite.enabled") 25 | val graphiteHost: String = configProperties.getString("local.graphite.host") 26 | val graphitePort: Int = configProperties.getInt("local.graphite.port") 27 | val stage: Int = configProperties.getInt("local.stage") 28 | } 29 | 30 | class ClusterConfigUtils(extraKeys: Map[String, String]) extends ConfigUtils { 31 | val framework: String = extraKeys("spark.FRAMEWORK") 32 | val mode: String = extraKeys("spark.MODE") 33 | val JOBUUID: String = extraKeys("spark.JOBUUID") 34 | val kafkaBootstrapServers: String = extraKeys("spark.KAFKA_BOOTSTRAP_SERVERS") 35 | 36 | val awsEndpoint: String = configProperties.getString("aws.endpoint") 37 | val awsAccessKey: String = extraKeys("spark.AWS_ACCESS_KEY") 38 | val awsSecretKey: String = extraKeys("spark.AWS_SECRET_KEY") 39 | 40 | val pathPrefix: String = extraKeys("spark.OUTPUT_METRICS_PATH") 41 | val path: String = pathPrefix + "/" + framework + "/" + mode + "/observations-log-" + JOBUUID 42 | val metricsPath: String = pathPrefix + "/" + framework + "/" + mode + "/metrics-log-" + JOBUUID 43 | val gcNotificationsPath: String = pathPrefix + "/" + framework + "/" + mode + "/gc-log-" + JOBUUID 44 | val cadvisorPath: String = pathPrefix + "/" + framework + "/" + mode + "/cadvisor-log-" + JOBUUID 45 | val cadvisorHdfsPath: String = pathPrefix + "/" + framework + "/" + mode + "/hdfs-cadvisor-log-" + JOBUUID 46 | val cadvisorKafkaPath: String = pathPrefix + "/" + framework + "/" + mode + "/kafka-cadvisor-log-" + JOBUUID 47 | } -------------------------------------------------------------------------------- /output-consumer/src/main/scala/output/consumer/LocalModeWriter.scala: -------------------------------------------------------------------------------- 1 | package output.consumer 2 | 3 | import java.net.InetSocketAddress 4 | import java.util.Properties 5 | import java.util.concurrent.TimeUnit 6 | 7 | import com.codahale.metrics.graphite.{Graphite, GraphiteReporter} 8 | import com.codahale.metrics.jmx.JmxReporter 9 | import com.codahale.metrics.{MetricFilter, MetricRegistry, Slf4jReporter} 10 | import org.apache.kafka.clients.consumer.KafkaConsumer 11 | import org.slf4j.LoggerFactory 12 | import play.api.libs.json.Json 13 | 14 | import scala.collection.JavaConverters._ 15 | 16 | /** 17 | * For local development! 18 | * - Reads data from Kafka continuously 19 | * - Updates latency histogram as data comes in 20 | * - Sends histograms to Graphite and console logs 21 | */ 22 | object LocalModeWriter { 23 | def run: Unit = { 24 | val logger = LoggerFactory.getLogger(this.getClass) 25 | val registry: MetricRegistry = new MetricRegistry 26 | val localConfigUtils = new LocalConfigUtils 27 | 28 | val jmxReporter: JmxReporter = JmxReporter.forRegistry(registry).build() 29 | jmxReporter.start() 30 | 31 | val slf4jReporter: Slf4jReporter = Slf4jReporter.forRegistry(registry) 32 | .outputTo(logger) 33 | .convertRatesTo(TimeUnit.MILLISECONDS) 34 | .convertDurationsTo(TimeUnit.MILLISECONDS).build() 35 | slf4jReporter.start(10, TimeUnit.SECONDS) 36 | 37 | if(localConfigUtils.graphiteEnabled) { 38 | val graphite: Graphite = new Graphite(new InetSocketAddress(localConfigUtils.graphiteHost, localConfigUtils.graphitePort)) 39 | val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry) 40 | .prefixedWith("benchmark") 41 | .convertRatesTo(TimeUnit.SECONDS) 42 | .convertDurationsTo(TimeUnit.MILLISECONDS) 43 | .filter(MetricFilter.ALL) 44 | .build(graphite) 45 | reporter.start(1, TimeUnit.SECONDS) 46 | } else { 47 | logger.warn("Could not start a connection to Graphite. Will continue with only publishing in logs.") 48 | } 49 | 50 | //properties of metric consumer 51 | val props = new Properties() 52 | props.put("bootstrap.servers", localConfigUtils.kafkaBootstrapServers) 53 | props.put("group.id", "output-consumer") 54 | props.put("enable.auto.commit", "true") 55 | props.put("auto.commit.interval.ms", "1000") 56 | props.put("session.timeout.ms", "30000") 57 | props.put("auto.offset.reset", "latest") 58 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") 59 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") 60 | 61 | //metric consumer 62 | val consumer = new KafkaConsumer[String, String](props) 63 | 64 | //subscribe to metrics topic 65 | consumer.subscribe(java.util.Collections.singletonList("metrics")) 66 | 67 | while (true) { 68 | val metrics = consumer.poll(10000) 69 | for (metric <- metrics.asScala) { 70 | val kafkaPublishTimestampResult = metric.timestamp() 71 | val metricValue = Json.parse(metric.value()) 72 | val publishTimestampInput = (metricValue \ "publishTimestamp").as[Long] 73 | val jobProfile = (metricValue \ "jobProfile").as[String] 74 | val durationPublishToPublish = kafkaPublishTimestampResult - publishTimestampInput 75 | registry.histogram(jobProfile).update(durationPublishToPublish) 76 | } 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /output-consumer/src/main/scala/output/consumer/OutputConsumer.scala: -------------------------------------------------------------------------------- 1 | package output.consumer 2 | 3 | /** 4 | * Starting point of the application 5 | * Can run in two modes: 6 | * - Local mode with histograms in Graphite/Grafana and logs 7 | * - Cluster mode with writing to S3 8 | */ 9 | object OutputConsumer { 10 | def main(args: Array[String]): Unit = { 11 | System.setProperty("com.amazonaws.services.s3.enableV4", "true") 12 | val configUtils = new ConfigUtils 13 | 14 | if (configUtils.local) { 15 | LocalModeWriter.run 16 | } else { 17 | SingleBatchWriter.run 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /output-consumer/version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "3.1" -------------------------------------------------------------------------------- /result-analysis/.gitignore: -------------------------------------------------------------------------------- 1 | metastore_db 2 | .ipynb_checkpoints 3 | figures 4 | data 5 | scalability-data 6 | -------------------------------------------------------------------------------- /result-analysis/README.md: -------------------------------------------------------------------------------- 1 | # OSPBench Result Analysis - Jupyter Notebook 2 | This article will lead you through the notebooks that are provided to generate visualizations and do further analysis. 3 | 4 | This folder contains Jupyter notebooks with PySpark to do the analysis. 5 | 6 | You need Jupyter and Spark installed. Follow one of the many installation guides online. 7 | 8 | Our packages have gotten slightly old. We used: 9 | 10 | - Jupyter 4.3.0 11 | - Python 3.6.6 12 | - Spark 2.3.0 13 | 14 | It is possible that some syntax changes are required for the latest versions of these tools. 15 | 16 | Run Jupyter on the root of the result-analysis folder to have the paths correct in the notebooks. 17 | For us this meant: 18 | 19 | cd result-analysis 20 | pyspark 21 | 22 | Then open the notebooks at [localhost:8888](localhost:8888). 23 | 24 | ## Preparations 25 | Initially, we used the structure for the input data which is used in the `data` folder. The latency, periodic burst and single burst notebooks make use of this. 26 | 27 | Later on, we switched to a different structure: the one used in the folder `scalability-data`. The scalability and failure notebooks make use of this. 28 | 29 | ## The notebooks 30 | There is one notebook per workload. The names of the notebook are similar to the names of the corresponding workloads. 31 | 32 | The data which we used in our papers on OSPBench and the notebooks used to visualize them are both available here. 33 | --------------------------------------------------------------------------------