├── warez
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── test-data
│ ├── product-black-hole-price-update.json
│ ├── product-black-hole-stock-update.json
│ ├── product-black-hole.json
│ └── product-singing-sword.json
├── data
│ ├── values
│ │ ├── keywords.txt
│ │ └── uuids.txt
│ ├── generate-stock-update.sh
│ ├── generate-price-update.sh
│ ├── generate-product.sh
│ └── README.md
├── akka-streamlets
│ └── src
│ │ └── main
│ │ └── scala
│ │ ├── warez
│ │ ├── ProductIngress.scala
│ │ ├── PriceUpdateIngress.scala
│ │ ├── StockUpdateIngress.scala
│ │ ├── JsonFormats.scala
│ │ ├── RecommenderModelIngress.scala
│ │ └── ElasticSearchClient.scala
│ │ ├── wip
│ │ ├── ProductLogger.scala
│ │ ├── PriceUpdateLogger.scala
│ │ └── StockUpdateLogger.scala
│ │ └── dsl
│ │ ├── HttpIngress.scala
│ │ ├── FlowEgress.scala
│ │ └── HttpServer.scala
├── spark-streamlets
│ └── src
│ │ ├── test
│ │ └── scala
│ │ │ └── pipelines
│ │ │ └── example
│ │ │ └── warez
│ │ │ ├── TestUtils.scala
│ │ │ ├── SparkProductJoinerKitSpec.scala
│ │ │ └── SparkProductOperationsSpec.scala
│ │ └── main
│ │ └── scala
│ │ └── pipelines
│ │ └── example
│ │ └── warez
│ │ └── SparkProductJoiner.scala
├── .gitignore
├── datamodel
│ └── src
│ │ └── main
│ │ └── avro
│ │ ├── PriceUpdate.avsc
│ │ ├── StockUpdate.avsc
│ │ ├── RecommenderModel.avsc
│ │ └── Product.avsc
├── blueprint
│ └── src
│ │ └── main
│ │ ├── blueprint
│ │ └── blueprint.conf
│ │ └── resources
│ │ └── logback.xml
├── ml-training
│ ├── avro
│ │ └── RecommenderModel.avsc
│ └── README.md
├── target-env.sbt.example
└── build.sbt
├── mixed-sensors
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── src
│ └── main
│ │ ├── scala
│ │ ├── README.md
│ │ └── pipelines
│ │ │ └── example
│ │ │ ├── TimeOps.scala
│ │ │ ├── IdentityAkkaStreamsProcessor0.scala
│ │ │ ├── IdentityAkkaStreamsProcessor1.scala
│ │ │ ├── IdentityAkkaStreamsProcessor2.scala
│ │ │ ├── IdentitySparkProcessor0.scala
│ │ │ ├── IdentitySparkProcessor2.scala
│ │ │ ├── IdentitySparkProcessor1.scala
│ │ │ ├── SparkRandomGenDataIngress.scala
│ │ │ └── SparkConsoleEgress.scala
│ │ ├── blueprint
│ │ ├── akka-spark-single-processor.conf
│ │ ├── t0-t1-blueprint.conf
│ │ ├── t0-process-t1-blueprint.conf
│ │ ├── blueprint.conf
│ │ └── parallel-100ms-delay.conf
│ │ └── avro
│ │ └── data.avsc
├── target-env.sbt.example
└── build.sbt
├── spark-sensors
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── src
│ └── main
│ │ ├── scala
│ │ ├── README.md
│ │ └── pipelines
│ │ │ └── example
│ │ │ ├── SparkConsoleEgress.scala
│ │ │ ├── MovingAverageSparklet.scala
│ │ │ └── SparkRandomGenDataIngress.scala
│ │ ├── blueprint
│ │ └── blueprint.conf
│ │ └── avro
│ │ ├── agg.avsc
│ │ └── data.avsc
├── .gitignore
├── target-env.sbt.example
└── build.sbt
├── flink-taxi-ride
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── test-data
│ ├── send-data-rides.sh
│ ├── send-data-fares.sh
│ ├── send-data-small.sh
│ ├── nycTaxiFares-small.json
│ └── nycTaxiRides-small.json
├── datamodel
│ └── src
│ │ └── main
│ │ └── avro
│ │ ├── taxiridefare.avsc
│ │ ├── taxifare.avsc
│ │ └── taxiride.avsc
├── taxi-ride-pipeline
│ └── src
│ │ └── main
│ │ └── blueprint
│ │ └── blueprint.conf
├── ingestor
│ └── src
│ │ └── main
│ │ ├── scala
│ │ └── pipelines
│ │ │ └── examples
│ │ │ └── ingestor
│ │ │ ├── TaxiFareIngress.scala
│ │ │ ├── TaxiRideIngress.scala
│ │ │ └── JsonFormats.scala
│ │ └── resources
│ │ ├── log4j.properties
│ │ └── logback.xml
├── logger
│ └── src
│ │ └── main
│ │ ├── resources
│ │ ├── log4j.properties
│ │ └── logback.xml
│ │ └── scala
│ │ └── pipelines
│ │ └── examples
│ │ └── logger
│ │ └── FarePerRideLogger.scala
├── target-env.sbt.example
├── README.md
├── build.sbt
└── processor
│ └── src
│ └── main
│ └── scala
│ └── pipelines
│ └── examples
│ └── processor
│ └── TaxiRideProcessor.scala
├── sensor-data-java
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── test-data
│ ├── device-ids.txt
│ ├── 10-storm.json
│ ├── 10-storm-1.json
│ ├── 12-hurricane.json
│ ├── invalid-metric.json
│ ├── 04-moderate-breeze.json
│ ├── 11-violent-storm.json
│ ├── wrk-04-moderate-breeze.lua
│ └── future-data.json
├── src
│ ├── main
│ │ ├── java
│ │ │ └── pipelines
│ │ │ │ └── examples
│ │ │ │ └── sensordata
│ │ │ │ ├── SensorDataUtils.java
│ │ │ │ ├── SensorDataIngress.java
│ │ │ │ ├── SensorDataStreamingIngress.java
│ │ │ │ ├── MetricsValidation.java
│ │ │ │ └── SensorDataToMetrics.java
│ │ ├── avro
│ │ │ ├── InvalidMetric.avsc
│ │ │ ├── Measurements.avsc
│ │ │ ├── SensorData.avsc
│ │ │ └── Metric.avsc
│ │ ├── blueprint
│ │ │ └── blueprint.conf
│ │ └── resources
│ │ │ └── logback.xml
│ └── test
│ │ └── java
│ │ └── pipelines
│ │ └── examples
│ │ └── sensordata
│ │ └── MetricsValidationTest.java
├── .gitignore
├── load-data-into-pvc.sh
├── target-env.sbt.example
├── build.sbt
└── README.md
├── sensor-data-scala
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── src
│ └── main
│ │ ├── resources
│ │ ├── local.conf
│ │ └── logback.xml
│ │ ├── scala
│ │ └── pipelines
│ │ │ └── examples
│ │ │ ├── sensordata
│ │ │ ├── SensorDataUtils.scala
│ │ │ ├── SensorDataHttpIngress.scala
│ │ │ ├── SensorDataMerge.scala
│ │ │ ├── RotorSpeedFilter.scala
│ │ │ ├── InvalidMetricLogger.scala
│ │ │ ├── SensorDataStreamingIngress.scala
│ │ │ ├── MetricsValidation.scala
│ │ │ ├── RotorspeedWindowLogger.scala
│ │ │ ├── SensorDataToMetrics.scala
│ │ │ ├── JsonFormats.scala
│ │ │ ├── ValidMetricLogger.scala
│ │ │ └── SensorDataFileIngress.scala
│ │ │ └── package.scala
│ │ ├── avro
│ │ ├── InvalidMetric.avsc
│ │ ├── Measurements.avsc
│ │ ├── SensorData.avsc
│ │ └── Metric.avsc
│ │ └── blueprint
│ │ └── blueprint.conf
├── test-data
│ ├── 10-storm.json
│ ├── 12-hurricane.json
│ ├── 04-moderate-breeze.json
│ ├── 11-violent-storm.json
│ ├── invalid-metric.json
│ ├── wrk-04-moderate-breeze.lua
│ └── future-data.json
├── .gitignore
├── target-env.sbt.example
├── load-data-into-pvc.sh
├── build.sbt
└── README.md
├── call-record-aggregator
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── spark-aggregation
│ └── src
│ │ ├── main
│ │ └── scala
│ │ │ ├── README.md
│ │ │ └── pipelines
│ │ │ └── examples
│ │ │ └── carly
│ │ │ └── aggregator
│ │ │ ├── CallAggregatorConsoleEgress.scala
│ │ │ ├── CallStatsAggregator.scala
│ │ │ └── CallRecordGeneratorIngress.scala
│ │ └── test
│ │ └── scala
│ │ └── pipelines
│ │ └── examples
│ │ └── carly
│ │ └── aggregator
│ │ ├── CallRecordGeneratorIngressSpec.scala
│ │ └── CallStatsAggregatorSpec.scala
├── akka-cdr-ingestor
│ └── src
│ │ ├── main
│ │ └── scala
│ │ │ └── pipelines
│ │ │ └── examples
│ │ │ └── carly
│ │ │ └── ingestor
│ │ │ ├── JsonFormats.scala
│ │ │ ├── CallRecordMerge.scala
│ │ │ ├── CallRecordIngress.scala
│ │ │ ├── CallRecordStreamingIngress.scala
│ │ │ └── CallRecordValidation.scala
│ │ └── test
│ │ └── scala
│ │ └── pipelines
│ │ └── examples
│ │ └── carly
│ │ └── ingestor
│ │ ├── CallRecordValidationSpec.scala
│ │ └── CallRecordMergeSpec.scala
├── datamodel
│ ├── src
│ │ └── main
│ │ │ └── avro
│ │ │ ├── InvalidRecord.avsc
│ │ │ ├── AggregatedCallStats.avsc
│ │ │ └── CallRecord.avsc
│ └── data
│ │ └── data-sample-20.json
├── .gitignore
├── send_data.sh
├── target-env.sbt.example
├── call-record-pipeline
│ └── src
│ │ └── main
│ │ ├── blueprint
│ │ └── blueprint.conf
│ │ └── resources
│ │ └── logback.xml
├── akka-java-aggregation-output
│ └── src
│ │ └── main
│ │ └── java
│ │ └── pipelines
│ │ └── examples
│ │ └── carly
│ │ └── output
│ │ ├── AggregateRecordEgress.java
│ │ └── InvalidRecordEgress.java
└── build.sbt
├── spark-resilience-test
├── project
│ ├── build.properties
│ ├── plugins.sbt
│ └── pipelines-plugins.sbt
├── src
│ ├── main
│ │ ├── scala
│ │ │ ├── pipelines
│ │ │ │ └── example
│ │ │ │ │ ├── SequenceSettings.scala
│ │ │ │ │ ├── SuicidalMonkeyProcessor.scala
│ │ │ │ │ └── SparkSequenceGeneratorIngress.scala
│ │ │ └── README.md
│ │ ├── blueprint
│ │ │ └── blueprint.conf
│ │ └── avro
│ │ │ └── data.avsc
│ └── test
│ │ └── scala
│ │ └── pipelines
│ │ └── example
│ │ ├── SparkSequenceValidatorEgressTest.scala
│ │ ├── DataGroupTest.scala
│ │ └── SparkSequenceGeneratorIngressTest.scala
├── .gitignore
├── target-env.sbt.example
└── build.sbt
├── .gitignore
└── README.md
/warez/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/mixed-sensors/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/spark-sensors/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/flink-taxi-ride/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/sensor-data-java/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/sensor-data-scala/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/call-record-aggregator/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/spark-resilience-test/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.2.8
2 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/device-ids.txt:
--------------------------------------------------------------------------------
1 | c75cb448-df0e-4692-8e06-0321b7703992
2 |
--------------------------------------------------------------------------------
/warez/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
--------------------------------------------------------------------------------
/mixed-sensors/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
2 |
--------------------------------------------------------------------------------
/sensor-data-scala/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
--------------------------------------------------------------------------------
/spark-sensors/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
2 |
--------------------------------------------------------------------------------
/call-record-aggregator/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
--------------------------------------------------------------------------------
/sensor-data-java/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
2 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/resources/local.conf:
--------------------------------------------------------------------------------
1 | file-ingress {
2 | source-data-mount="/tmp/pipelines"
3 | }
4 |
--------------------------------------------------------------------------------
/spark-sensors/src/main/scala/README.md:
--------------------------------------------------------------------------------
1 | # This is an example project that illustrates the use of the Spark Support in Pipelines
2 |
--------------------------------------------------------------------------------
/warez/test-data/product-black-hole-price-update.json:
--------------------------------------------------------------------------------
1 | {
2 | "productId":"123456789",
3 | "skuId":"1",
4 | "price":10
5 | }
6 |
--------------------------------------------------------------------------------
/warez/test-data/product-black-hole-stock-update.json:
--------------------------------------------------------------------------------
1 | {
2 | "productId":"123456789",
3 | "skuId":"1",
4 | "diff":10
5 | }
6 |
--------------------------------------------------------------------------------
/call-record-aggregator/spark-aggregation/src/main/scala/README.md:
--------------------------------------------------------------------------------
1 | # This is an example project that illustrates the use of the Spark Support in Pipelines
2 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/README.md:
--------------------------------------------------------------------------------
1 | # This is an example project used to compare the execution of Spark and AkkaStreams components in one Pipeline.
2 |
--------------------------------------------------------------------------------
/flink-taxi-ride/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
2 | addSbtPlugin("com.cavorite" % "sbt-avro-1-8" % "1.1.6")
3 |
--------------------------------------------------------------------------------
/spark-resilience-test/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")
2 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2")
3 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/TimeOps.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | object TimeOps {
4 |
5 | def nowAsOption: Option[Long] = Some(System.currentTimeMillis())
6 |
7 | }
8 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataUtils.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | object SensorDataUtils {
4 | def isValidMetric(m: Metric) = m.value >= 0.0
5 | }
6 |
--------------------------------------------------------------------------------
/flink-taxi-ride/test-data/send-data-rides.sh:
--------------------------------------------------------------------------------
1 | for str in $(cat nycTaxiRides.json)
2 | do
3 | echo "Using $str"
4 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-ride -H "Content-Type: application/json" --data "$str"
5 | done
6 |
--------------------------------------------------------------------------------
/flink-taxi-ride/test-data/send-data-fares.sh:
--------------------------------------------------------------------------------
1 | for str in $(cat nycTaxiFares.json)
2 | do
3 | echo "Using $str"
4 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-fare -H "Content-Type: application/json" --data "$str"
5 | done
6 |
7 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/package.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples
2 |
3 | import java.time.Instant
4 |
5 | package object sensordata {
6 | implicit def toInstant(millis: Long): Instant = Instant.ofEpochMilli(millis)
7 | }
8 |
9 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataUtils.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata;
2 |
3 | public final class SensorDataUtils {
4 | public static boolean isValidMetric(Metric m) {
5 | return m.getValue() >= 0.0;
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/10-storm.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545346279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 100.1
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/10-storm-1.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545346279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 100.1
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/12-hurricane.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495546546279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 129.4
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/invalid-metric.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545346279,
4 | "measurements": {
5 | "power": -1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 25.3
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-scala/test-data/10-storm.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545346279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 23.4,
7 | "windSpeed": 100.1
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-scala/test-data/12-hurricane.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495546546279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 78.3,
7 | "windSpeed": 129.4
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/04-moderate-breeze.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "d75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545346279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 25.3
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/11-violent-storm.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545646279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 105.9
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-scala/test-data/04-moderate-breeze.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545346279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 25.3
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-scala/test-data/11-violent-storm.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545646279,
4 | "measurements": {
5 | "power": 1.7,
6 | "rotorSpeed": 45.7,
7 | "windSpeed": 105.9
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/sensor-data-scala/test-data/invalid-metric.json:
--------------------------------------------------------------------------------
1 | {
2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
3 | "timestamp": 1495545346279,
4 | "measurements": {
5 | "power": -1.7,
6 | "rotorSpeed": 3.9,
7 | "windSpeed": 25.3
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/warez/data/values/keywords.txt:
--------------------------------------------------------------------------------
1 | crude
2 | ivray
3 | hajes
4 | commy
5 | jerky
6 | lanum
7 | miaul
8 | skied
9 | pidan
10 | paled
11 | board
12 | blots
13 | molar
14 | pareu
15 | stong
16 | cadua
17 | dhoti
18 | urutu
19 | claws
20 | tardy
21 | ramed
22 | shuln
23 | boult
24 | brian
25 | ketal
26 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/warez/ProductIngress.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
4 | import pipelines.streamlets.avro._
5 | import JsonFormats._
6 | import warez.dsl._
7 |
8 | class ProductIngress extends HttpIngress[Product](AvroOutlet[Product]("out", _.id.toString))
9 |
--------------------------------------------------------------------------------
/flink-taxi-ride/datamodel/src/main/avro/taxiridefare.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.flink.avro",
3 | "type": "record",
4 | "name": "TaxiRideFare",
5 | "fields":[
6 | {
7 | "name": "rideId", "type": "long"
8 | },
9 | {
10 | "name": "totalFare", "type": "float"
11 | }
12 | ]
13 | }
14 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/main/scala/pipelines/example/SequenceSettings.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import scala.concurrent.duration._
4 |
5 | object SequenceSettings {
6 |
7 | val GroupSize: Int = 1500
8 | val FailureProbability: Double = 0.05
9 | val TimeoutDuration: Long = 1.minute.toMillis
10 | val RecordsPerSecond: Int = 50
11 |
12 | }
13 |
--------------------------------------------------------------------------------
/spark-sensors/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | ingress = pipelines.example.SparkRandomGenDataIngress
4 | process = pipelines.example.MovingAverageSparklet
5 | egress = pipelines.example.SparkConsoleEgress
6 | }
7 | connections {
8 | ingress.out = [process.in]
9 | process.out = [egress.in]
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/warez/PriceUpdateIngress.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
4 | import pipelines.streamlets.avro._
5 | import JsonFormats._
6 | import warez.dsl._
7 |
8 | class PriceUpdateIngress extends HttpIngress[PriceUpdate](AvroOutlet[PriceUpdate]("out", _.productId.toString))
9 |
10 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/warez/StockUpdateIngress.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
4 | import pipelines.streamlets.avro._
5 | import JsonFormats._
6 | import warez.dsl._
7 |
8 | class StockUpdateIngress extends HttpIngress[StockUpdate](AvroOutlet[StockUpdate]("out", _.productId.toString))
9 |
10 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/avro/InvalidMetric.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "InvalidMetric",
5 | "fields":[
6 | {
7 | "name": "metric", "type": "pipelines.examples.sensordata.Metric"
8 | },
9 | {
10 | "name": "error", "type": "string"
11 | }
12 | ]
13 | }
14 |
15 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/avro/InvalidMetric.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "InvalidMetric",
5 | "fields":[
6 | {
7 | "name": "metric", "type": "pipelines.examples.sensordata.Metric"
8 | },
9 | {
10 | "name": "error", "type": "string"
11 | }
12 | ]
13 | }
14 |
15 |
--------------------------------------------------------------------------------
/warez/spark-streamlets/src/test/scala/pipelines/example/warez/TestUtils.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example.warez
2 | import java.util.UUID
3 |
4 | import warez.Sku
5 |
6 | import scala.collection.immutable.Seq
7 |
8 | object TestUtils {
9 | def uuid: String = UUID.randomUUID().toString
10 | def genSkus(names: Seq[String] = Seq("small", "med", "large")): Seq[Sku] = names.map(Sku(uuid, _))
11 | }
12 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/JsonFormats.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.ingestor
2 |
3 | import spray.json._
4 | import pipelines.examples.carly.data.CallRecord
5 |
6 | case object JsonCallRecord extends DefaultJsonProtocol {
7 | implicit val crFormat = jsonFormat(CallRecord.apply, "user", "other", "direction", "duration", "timestamp")
8 | }
9 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | rs-ingress = pipelines.example.SparkSequenceGeneratorIngress
4 | rs-process = pipelines.example.SuicidalMonkeyProcessor
5 | rs-egress = pipelines.example.SparkSequenceValidatorEgress
6 | }
7 | connections {
8 | rs-ingress.out = [rs-process.in]
9 | rs-process.out = [rs-egress.in]
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/warez/.gitignore:
--------------------------------------------------------------------------------
1 | etc/bootstrap-local.conf
2 |
3 | .class
4 | *.log
5 | .history
6 |
7 | target/
8 | lib_managed/
9 | src_managed/
10 | project/boot/
11 | project/plugins/project/
12 | project/activator-sbt*
13 |
14 | .env
15 |
16 | **/values.sbt
17 |
18 | # IntelliJ
19 | .idea/
20 | *.iml
21 | *.iws
22 |
23 | # Mac
24 | .DS_Store
25 |
26 | # vim swap files
27 | *.swp
28 | .*.swp
29 |
30 | target-env.sbt
31 |
--------------------------------------------------------------------------------
/warez/datamodel/src/main/avro/PriceUpdate.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "warez",
3 |
4 | "type": "record",
5 | "name": "PriceUpdate",
6 |
7 | "fields": [
8 | {
9 | "name": "productId",
10 | "type": "string"
11 | },
12 | {
13 | "name": "skuId",
14 | "type": "string"
15 | },
16 | {
17 | "name": "price",
18 | "type": "int"
19 | }
20 | ]
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/warez/datamodel/src/main/avro/StockUpdate.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "warez",
3 |
4 | "type": "record",
5 | "name": "StockUpdate",
6 |
7 | "fields": [
8 | {
9 | "name": "productId",
10 | "type": "string"
11 | },
12 | {
13 | "name": "skuId",
14 | "type": "string"
15 | },
16 | {
17 | "name": "diff",
18 | "type": "int"
19 | }
20 | ]
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/call-record-aggregator/datamodel/src/main/avro/InvalidRecord.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.carly.data",
3 | "type": "record",
4 | "name": "InvalidRecord",
5 | "fields":[
6 | {
7 | "name": "record",
8 | "type": "string"
9 | },
10 | {
11 | "name": "error",
12 | "type": "string"
13 | }
14 | ]
15 | }
16 |
17 |
--------------------------------------------------------------------------------
/spark-sensors/.gitignore:
--------------------------------------------------------------------------------
1 | etc/bootstrap-local.conf
2 |
3 | .class
4 | *.log
5 | .history
6 |
7 | target/
8 | lib_managed/
9 | src_managed/
10 | project/boot/
11 | project/plugins/project/
12 | project/activator-sbt*
13 |
14 | .env
15 |
16 | **/values.sbt
17 |
18 | # IntelliJ
19 | .idea/
20 | *.iml
21 | *.iws
22 |
23 | # Mac
24 | .DS_Store
25 |
26 | # vim swap files
27 | *.swp
28 | .*.swp
29 |
30 | target-env.sbt
31 |
--------------------------------------------------------------------------------
/sensor-data-java/.gitignore:
--------------------------------------------------------------------------------
1 | etc/bootstrap-local.conf
2 |
3 | .class
4 | *.log
5 | .history
6 |
7 | target/
8 | lib_managed/
9 | src_managed/
10 | project/boot/
11 | project/plugins/project/
12 | project/activator-sbt*
13 |
14 | .env
15 |
16 | **/values.sbt
17 |
18 | # IntelliJ
19 | .idea/
20 | *.iml
21 | *.iws
22 |
23 | # Mac
24 | .DS_Store
25 |
26 | # vim swap files
27 | *.swp
28 | .*.swp
29 |
30 | target-env.sbt
31 |
--------------------------------------------------------------------------------
/sensor-data-scala/.gitignore:
--------------------------------------------------------------------------------
1 | etc/bootstrap-local.conf
2 |
3 | .class
4 | *.log
5 | .history
6 |
7 | target/
8 | lib_managed/
9 | src_managed/
10 | project/boot/
11 | project/plugins/project/
12 | project/activator-sbt*
13 |
14 | .env
15 |
16 | **/values.sbt
17 |
18 | # IntelliJ
19 | .idea/
20 | *.iml
21 | *.iws
22 |
23 | # Mac
24 | .DS_Store
25 |
26 | # vim swap files
27 | *.swp
28 | .*.swp
29 |
30 | target-env.sbt
31 |
--------------------------------------------------------------------------------
/call-record-aggregator/.gitignore:
--------------------------------------------------------------------------------
1 | etc/bootstrap-local.conf
2 |
3 | .class
4 | *.log
5 | .history
6 |
7 | target/
8 | lib_managed/
9 | src_managed/
10 | project/boot/
11 | project/plugins/project/
12 | project/activator-sbt*
13 |
14 | .env
15 |
16 | **/values.sbt
17 |
18 | # IntelliJ
19 | .idea/
20 | *.iml
21 | *.iws
22 |
23 | # Mac
24 | .DS_Store
25 |
26 | # vim swap files
27 | *.swp
28 | .*.swp
29 |
30 | target-env.sbt
31 |
--------------------------------------------------------------------------------
/spark-resilience-test/.gitignore:
--------------------------------------------------------------------------------
1 | etc/bootstrap-local.conf
2 |
3 | .class
4 | *.log
5 | .history
6 |
7 | target/
8 | lib_managed/
9 | src_managed/
10 | project/boot/
11 | project/plugins/project/
12 | project/activator-sbt*
13 |
14 | .env
15 |
16 | **/values.sbt
17 |
18 | # IntelliJ
19 | .idea/
20 | *.iml
21 | *.iws
22 |
23 | # Mac
24 | .DS_Store
25 |
26 | # vim swap files
27 | *.swp
28 | .*.swp
29 |
30 | target-env.sbt
31 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/avro/Measurements.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "Measurements",
5 | "fields":[
6 | {
7 | "name": "power", "type": "double"
8 | },
9 | {
10 | "name": "rotorSpeed", "type": "double"
11 | },
12 | {
13 | "name": "windSpeed", "type": "double"
14 | }
15 | ]
16 | }
17 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/avro/Measurements.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "Measurements",
5 | "fields":[
6 | {
7 | "name": "power", "type": "double"
8 | },
9 | {
10 | "name": "rotorSpeed", "type": "double"
11 | },
12 | {
13 | "name": "windSpeed", "type": "double"
14 | }
15 | ]
16 | }
17 |
--------------------------------------------------------------------------------
/spark-sensors/src/main/avro/agg.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.example",
3 | "type": "record",
4 | "name": "Agg",
5 | "fields":[
6 | {
7 | "name": "src",
8 | "type": "string"
9 | },
10 | {
11 | "name": "gauge",
12 | "type": "string"
13 | },
14 | {
15 | "name": "value",
16 | "type": "double"
17 | }
18 | ]
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/flink-taxi-ride/test-data/send-data-small.sh:
--------------------------------------------------------------------------------
1 | for str in $(cat nycTaxiRides-small.json)
2 | do
3 | echo "Using $str"
4 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-ride -H "Content-Type: application/json" --data "$str"
5 | done
6 |
7 | for str in $(cat nycTaxiFares-small.json)
8 | do
9 | echo "Using $str"
10 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-fare -H "Content-Type: application/json" --data "$str"
11 | done
12 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/main/avro/data.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.example",
3 | "type": "record",
4 | "name": "Data",
5 | "fields":[
6 | {
7 | "name": "timestamp",
8 | "type": "long"
9 | },
10 | {
11 | "name": "key",
12 | "type": "long"
13 | },
14 | {
15 | "name": "value",
16 | "type": "long"
17 | }
18 | ]
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/avro/SensorData.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "SensorData",
5 | "fields":[
6 | {
7 | "name": "deviceId", "type": "string"
8 | },
9 | {
10 | "name": "timestamp", "type": "long"
11 | },
12 | {
13 | "name": "measurements", "type": "pipelines.examples.sensordata.Measurements"
14 | }
15 | ]
16 | }
17 |
18 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/wip/ProductLogger.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import pipelines.streamlets.avro._
4 | import pipelines.akkastream.scaladsl._
5 | import akka.actor.ActorSystem
6 | import warez.dsl._
7 |
8 | object ProductLogger extends FlowEgress[Product](AvroInlet[Product]("in")) {
9 | def flowWithContext(system: ActorSystem) =
10 | FlowWithOffsetContext[Product].map { product ⇒
11 | system.log.warning(s"Product! $product")
12 | product
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | etc/bootstrap-local.conf
2 |
3 | .class
4 | *.log
5 | .history
6 |
7 | target/
8 | lib_managed/
9 | src_managed/
10 | project/boot/
11 | project/plugins/project/
12 | project/activator-sbt*
13 |
14 | .env
15 |
16 | **/values.sbt
17 |
18 | # IntelliJ
19 | .idea/
20 | *.iml
21 | *.iws
22 |
23 | # Mac
24 | .DS_Store
25 |
26 | # vim swap files
27 | *.swp
28 | .*.swp
29 | .metals/
30 | .vscode/
31 | *.code-workspace
32 | */.bloop/
33 | */project/.bloop/
34 | .gitignore
35 | target-env.sbt
36 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | sensor-data = pipelines.examples.sensordata.SensorDataIngress
4 | filter = pipelines.examples.sensordata.FilterStreamlet
5 | metrics = pipelines.examples.sensordata.SensorDataToMetrics
6 | validation = pipelines.examples.sensordata.MetricsValidation
7 | }
8 |
9 | connections {
10 | sensor-data.out = [metrics.in]
11 | metrics.out = [filter.in]
12 | filter.out = [validation.in]
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/warez/data/generate-stock-update.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 |
3 | ##########
4 | # Generate a Json Document representing a stock update.
5 | ##########
6 |
7 | #set -x
8 |
9 | ROOTDIR=$(cd $(dirname $0); pwd)
10 |
11 | uuid=$(shuf -n 1 "${ROOTDIR}/values/uuids.txt")
12 | if [ $((RANDOM%2)) -eq 0 ]
13 | then
14 | sku_suffix="aa"
15 | else
16 | sku_suffix="bb"
17 | fi
18 |
19 | cat << EOF
20 | {
21 | "productId": "$uuid",
22 | "skuId": "${uuid%..}$sku_suffix",
23 | "diff": $(((RANDOM%21)-10))
24 | }
25 | EOF
26 |
27 |
--------------------------------------------------------------------------------
/flink-taxi-ride/taxi-ride-pipeline/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | taxi-ride = pipelines.examples.ingestor.TaxiRideIngress
4 | taxi-fare = pipelines.examples.ingestor.TaxiFareIngress
5 | processor = pipelines.examples.processor.TaxiRideProcessor
6 | logger = pipelines.examples.logger.FarePerRideLogger
7 | }
8 | connections {
9 | taxi-ride.out = [processor.in-taxiride]
10 | taxi-fare.out = [processor.in-taxifare]
11 | processor.out = [logger.in]
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/avro/Metric.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "Metric",
5 | "fields":[
6 | {
7 | "name": "deviceId", "type": "string"
8 | },
9 | {
10 | "name": "timestamp", "type": "long"
11 | },
12 | {
13 | "name": "name", "type": "string"
14 | },
15 | {
16 | "name": "value", "type": "double"
17 | }
18 | ]
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/warez/data/generate-price-update.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 |
3 | ##########
4 | # Generate a Json Document representing a price update.
5 | ##########
6 |
7 | #set -x
8 |
9 | ROOTDIR=$(cd $(dirname $0); pwd)
10 |
11 | uuid=$(shuf -n 1 "${ROOTDIR}/values/uuids.txt")
12 | if [ $((RANDOM%2)) -eq 0 ]
13 | then
14 | sku_suffix="aa"
15 | else
16 | sku_suffix="bb"
17 | fi
18 |
19 | cat << EOF
20 | {
21 | "productId": "$uuid",
22 | "skuId": "${uuid%..}$sku_suffix",
23 | "price": $(((RANDOM%1999) + 1))
24 | }
25 | EOF
26 |
27 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/wip/PriceUpdateLogger.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import pipelines.streamlets.avro._
4 | import pipelines.akkastream.scaladsl._
5 | import akka.actor.ActorSystem
6 | import warez.dsl._
7 |
8 | object PriceUpdateLogger extends FlowEgress[PriceUpdate](AvroInlet[PriceUpdate]("in")) {
9 | def flowWithContext(system: ActorSystem) =
10 | FlowWithOffsetContext[PriceUpdate].map { priceUpdate ⇒
11 | system.log.warning(s"Price Update! $priceUpdate")
12 | priceUpdate
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/wip/StockUpdateLogger.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import pipelines.streamlets.avro._
4 | import pipelines.akkastream.scaladsl._
5 | import akka.actor.ActorSystem
6 | import warez.dsl._
7 |
8 | object StockUpdateLogger extends FlowEgress[StockUpdate](AvroInlet[StockUpdate]("in")) {
9 | def flowWithContext(system: ActorSystem) =
10 | FlowWithOffsetContext[StockUpdate].map { stockUpdate ⇒
11 | system.log.warning(s"Stock Update! $stockUpdate")
12 | stockUpdate
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/blueprint/akka-spark-single-processor.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | ingress = pipelines.example.SparkRandomGenDataIngress
4 |
5 | spark-process1 = pipelines.example.IdentitySparkProcessor1
6 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1
7 |
8 | egress = pipelines.example.SparkConsoleEgress
9 | }
10 | connections {
11 | ingress.out = [spark-process1.in, akka-process1.in]
12 |
13 | spark-process1.out = [egress.in1]
14 | akka-process1.out = [egress.in2]
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/warez/JsonFormats.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import spray.json._
4 |
5 | object JsonFormats extends DefaultJsonProtocol {
6 |
7 | implicit val skuFormat: JsonFormat[Sku] = jsonFormat4(Sku.apply)
8 |
9 | implicit val priceUpdateFormat = jsonFormat3(PriceUpdate.apply)
10 |
11 | implicit val stockUpdateFormat = jsonFormat3(StockUpdate.apply)
12 |
13 | implicit val productFormat = jsonFormat5(Product.apply)
14 |
15 | implicit val recommenderModelFormat = jsonFormat4(RecommenderModel.apply)
16 | }
17 |
18 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/dsl/HttpIngress.scala:
--------------------------------------------------------------------------------
1 | package warez
2 | package dsl
3 |
4 | import akka.http.scaladsl.unmarshalling._
5 |
6 | import pipelines.streamlets._
7 | import pipelines.akkastream._
8 | import pipelines.akkastream.util.scaladsl.HttpServerLogic
9 |
10 | abstract class HttpIngress[Out: FromByteStringUnmarshaller](val out: CodecOutlet[Out])
11 | extends AkkaServerStreamlet {
12 |
13 | final override val shape = StreamletShape.withOutlets(out)
14 |
15 | override final def createLogic = HttpServerLogic.default(this, out)
16 | }
17 |
--------------------------------------------------------------------------------
/call-record-aggregator/send_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DEFAULT_DATASET="./datamodel/data/data-sample-20.json"
4 | if [ "$1" == "" ]
5 | then
6 | RESOURCE=$DEFAULT_DATASET
7 | else
8 | RESOURCE="$1"
9 | fi
10 |
11 | echo "Using $RESOURCE"
12 |
13 | ROUTE_HOST=$(kubectl pipelines status call-record-pipeline | grep /cdr-ingress | awk '{print $2}')
14 |
15 | for str in $( cat $RESOURCE ); do
16 | echo Sending $str
17 | curl -i \
18 | -X POST $ROUTE_HOST \
19 | -u assassin:4554551n \
20 | -H "Content-Type: application/json" \
21 | --data "$str"
22 | done
--------------------------------------------------------------------------------
/spark-sensors/src/main/avro/data.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.example",
3 | "type": "record",
4 | "name": "Data",
5 | "fields":[
6 | {
7 | "name": "src",
8 | "type": "string"
9 | },
10 | {
11 | "name": "timestamp",
12 | "type": "long"
13 | },
14 | {
15 | "name": "gauge",
16 | "type": "string"
17 | },
18 | {
19 | "name": "value",
20 | "type": "double"
21 | }
22 | ]
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/warez/blueprint/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | products = warez.ProductIngress
4 | stock-updates = warez.StockUpdateIngress
5 | price-updates = warez.PriceUpdateIngress
6 | products-search = warez.ProductSearchApiEgress
7 | product-joiner = pipelines.example.warez.SparkProductJoiner
8 | }
9 | connections {
10 | products.out = [product-joiner.in-0]
11 | stock-updates.out = [product-joiner.in-1]
12 | price-updates.out = [product-joiner.in-2]
13 | product-joiner.out = [products-search.in]
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/wrk-04-moderate-breeze.lua:
--------------------------------------------------------------------------------
1 | function read_txt_file(path)
2 | local file, errorMessage = io.open(path, "r")
3 | if not file then
4 | error("Could not read the file:" .. errorMessage .. "\n")
5 | end
6 |
7 | local content = file:read "*all"
8 | file:close()
9 | return content
10 | end
11 |
12 | init = function(args)
13 | local FileBody = read_txt_file("04-moderate-breeze.json")
14 |
15 | wrk.method = "POST"
16 | wrk.headers["Content-Type"] = "application/json"
17 | wrk.headers["Connection"] = "Keep-Alive"
18 | wrk.body = FileBody
19 |
20 | end
21 |
--------------------------------------------------------------------------------
/sensor-data-scala/test-data/wrk-04-moderate-breeze.lua:
--------------------------------------------------------------------------------
1 | function read_txt_file(path)
2 | local file, errorMessage = io.open(path, "r")
3 | if not file then
4 | error("Could not read the file:" .. errorMessage .. "\n")
5 | end
6 |
7 | local content = file:read "*all"
8 | file:close()
9 | return content
10 | end
11 |
12 | init = function(args)
13 | local FileBody = read_txt_file("04-moderate-breeze.json")
14 |
15 | wrk.method = "POST"
16 | wrk.headers["Content-Type"] = "application/json"
17 | wrk.headers["Connection"] = "Keep-Alive"
18 | wrk.body = FileBody
19 |
20 | end
21 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/warez/RecommenderModelIngress.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
4 | import pipelines.streamlets.avro._
5 | import JsonFormats._
6 | import warez.dsl._
7 |
8 | /**
9 | * Ingress that reads the recommender model in base64 string format. We assume that the model
10 | * file is transferred after converting to base64. This should be the start of the model serving
11 | * pipeline.
12 | */
13 | class RecommenderModelIngress extends HttpIngress[RecommenderModel](AvroOutlet[RecommenderModel]("out", _.modelId.toString))
14 |
15 |
--------------------------------------------------------------------------------
/warez/datamodel/src/main/avro/RecommenderModel.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "warez",
3 |
4 | "type": "record",
5 | "name": "RecommenderModel",
6 |
7 | "fields": [
8 | {
9 | "name": "modelId",
10 | "type": "string"
11 | },
12 | {
13 | "name": "tensorFlowModel",
14 | "type": "bytes"
15 | },
16 | {
17 | "name": "productMap",
18 | "type": {
19 | "type": "map",
20 | "values": "int"
21 | }
22 | },
23 | {
24 | "name": "customerMap",
25 | "type": {
26 | "type": "map",
27 | "values": "int"
28 | }
29 | }
30 | ]
31 | }
32 |
33 |
34 |
--------------------------------------------------------------------------------
/warez/ml-training/avro/RecommenderModel.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "warez",
3 |
4 | "type": "record",
5 | "name": "RecommenderModel",
6 |
7 | "fields": [
8 | {
9 | "name": "modelId",
10 | "type": "string"
11 | },
12 | {
13 | "name": "tensorFlowModel",
14 | "type": "bytes"
15 | },
16 | {
17 | "name": "productMap",
18 | "type": {
19 | "type": "map",
20 | "values": "int"
21 | }
22 | },
23 | {
24 | "name": "customerMap",
25 | "type": {
26 | "type": "map",
27 | "values": "int"
28 | }
29 | }
30 | ]
31 | }
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/flink-taxi-ride/test-data/nycTaxiFares-small.json:
--------------------------------------------------------------------------------
1 | {"driverId":2013000006,"paymentType":"CSH","rideId":6,"startTime":1356998400000,"taxiId":2013000006,"tip":0.0,"tolls":4.800000190734863,"totalFare":34.29999923706055}
2 | {"driverId":2013000011,"paymentType":"CRD","rideId":11,"startTime":1356998400000,"taxiId":2013000011,"tip":4.699999809265137,"tolls":0.0,"totalFare":28.700000762939453}
3 | {"driverId":2013000031,"paymentType":"CSH","rideId":31,"startTime":1356998400000,"taxiId":2013000031,"tip":0.0,"tolls":0.0,"totalFare":20.5}
4 | {"driverId":2013000055,"paymentType":"CSH","rideId":55,"startTime":1356998400000,"taxiId":2013000055,"tip":0.0,"tolls":0.0,"totalFare":26.5}
5 |
--------------------------------------------------------------------------------
/call-record-aggregator/datamodel/src/main/avro/AggregatedCallStats.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.carly.data",
3 | "type": "record",
4 | "name": "AggregatedCallStats",
5 | "fields":[
6 | {
7 | "name": "startTime",
8 | "type": "long"
9 | },
10 | {
11 | "name": "windowDuration",
12 | "type": "long"
13 | },
14 | {
15 | "name": "avgCallDuration",
16 | "type": "double"
17 | },
18 | {
19 | "name": "totalCallDuration",
20 | "type": "long"
21 | }
22 | ]
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataHttpIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
4 | import pipelines.akkastream._
5 | import pipelines.akkastream.util.scaladsl._
6 |
7 | import pipelines.streamlets._
8 | import pipelines.streamlets.avro._
9 | import SensorDataJsonSupport._
10 |
11 | class SensorDataHttpIngress extends AkkaServerStreamlet {
12 | val out = AvroOutlet[SensorData]("out").withPartitioner(RoundRobinPartitioner)
13 | def shape = StreamletShape.withOutlets(out)
14 | override def createLogic = HttpServerLogic.default(this, out)
15 | }
16 |
17 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataMerge.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import pipelines.streamlets._
4 | import pipelines.streamlets.avro._
5 | import pipelines.akkastream._
6 | import pipelines.akkastream.util.scaladsl.MergeLogic
7 |
8 | class SensorDataMerge extends AkkaStreamlet {
9 | val in0 = AvroInlet[SensorData]("in-0")
10 | val in1 = AvroInlet[SensorData]("in-1")
11 | val out = AvroOutlet[SensorData]("out", _.deviceId.toString)
12 |
13 | final override val shape = StreamletShape.withInlets(in0, in1).withOutlets(out)
14 | final override def createLogic = new MergeLogic(Vector(in0, in1), out)
15 | }
16 |
--------------------------------------------------------------------------------
/warez/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/flink-taxi-ride/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/mixed-sensors/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/sensor-data-java/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/sensor-data-scala/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/spark-sensors/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/call-record-aggregator/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/spark-resilience-test/project/pipelines-plugins.sbt:
--------------------------------------------------------------------------------
1 | // Resolver for the pipelines-sbt plugin
2 | //
3 | // NOTE: Lightbend Commercial repository!
4 | // Please add your Lightbend Commercial download credentials to the global SBT config.
5 | //
6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md
7 | // for details on how to setup your Lightbend Commercial download credentials.
8 | //
9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns)
10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/"
11 |
12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2")
13 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/dsl/FlowEgress.scala:
--------------------------------------------------------------------------------
1 | package warez
2 | package dsl
3 |
4 | import akka.actor.ActorSystem
5 |
6 | import pipelines.streamlets._
7 | import pipelines.akkastream._
8 | import pipelines.akkastream.scaladsl._
9 |
10 | abstract class FlowEgress[In](val in: CodecInlet[In])
11 | extends AkkaStreamlet {
12 |
13 | final override val shape = StreamletShape.withInlets(in)
14 | def flowWithContext(system: ActorSystem): FlowWithOffsetContext[In, In]
15 |
16 | override def createLogic = new RunnableGraphStreamletLogic {
17 | def runnableGraph =
18 | sourceWithOffsetContext(in)
19 | .via(flowWithContext(system))
20 | .to(sinkWithOffsetContext)
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/call-record-aggregator/datamodel/src/main/avro/CallRecord.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.carly.data",
3 | "type": "record",
4 | "name": "CallRecord",
5 | "fields":[
6 | {
7 | "name": "user",
8 | "type": "string"
9 | },
10 | {
11 | "name": "other",
12 | "type": "string"
13 | },
14 | {
15 | "name": "direction",
16 | "type": "string"
17 | },
18 | {
19 | "name": "duration",
20 | "type": "long"
21 | },
22 | {
23 | "name": "timestamp",
24 | "type": "long"
25 | }
26 | ]
27 | }
28 |
29 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/avro/SensorData.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "SensorData",
5 | "fields":[
6 | {
7 | "name": "deviceId",
8 | "type": {
9 | "type": "string",
10 | "logicalType": "uuid"
11 | }
12 | },
13 | {
14 | "name": "timestamp",
15 | "type": {
16 | "type": "long",
17 | "logicalType": "timestamp-millis"
18 | }
19 | },
20 | {
21 | "name": "measurements", "type": "pipelines.examples.sensordata.Measurements"
22 | }
23 | ]
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/warez/test-data/product-black-hole.json:
--------------------------------------------------------------------------------
1 | {
2 | "id":"123456789",
3 | "name":"Acme Portable Hole",
4 | "description":"A cartoon hole that can be applied to any surface. https://www.youtube.com/watch?v=znzkdE-QQp0",
5 | "keywords":[
6 | "black",
7 | "hole",
8 | "gag",
9 | "plot device",
10 | "roger rabbit"
11 | ],
12 | "skus":[
13 | {
14 | "id":"1",
15 | "name":"Small Hole",
16 | "stock":10,
17 | "price":5
18 | },
19 | {
20 | "id":"2",
21 | "name":"Medium Hole",
22 | "stock":10,
23 | "price":10
24 | },
25 | {
26 | "id":"3",
27 | "name":"Large Hole",
28 | "stock":15,
29 | "price":20
30 | }
31 | ]
32 | }
--------------------------------------------------------------------------------
/warez/test-data/product-singing-sword.json:
--------------------------------------------------------------------------------
1 | {
2 | "id":"912345678",
3 | "name":"Acme Singing Sword",
4 | "description":"A cartoon singing sword that belts out show tunes. https://www.youtube.com/watch?v=6u8wBfDtZkE",
5 | "keywords":[
6 | "sword",
7 | "sings",
8 | "roger rabbit",
9 | "ineffective",
10 | "weapon"
11 | ],
12 | "skus":[
13 | {
14 | "id":"5",
15 | "name":"Tenor",
16 | "stock":50,
17 | "price":10
18 | },
19 | {
20 | "id":"6",
21 | "name":"Baritone",
22 | "stock":5,
23 | "price":25
24 | },
25 | {
26 | "id":"7",
27 | "name":"Bass",
28 | "stock":12,
29 | "price":50
30 | }
31 | ]
32 | }
33 |
--------------------------------------------------------------------------------
/flink-taxi-ride/ingestor/src/main/scala/pipelines/examples/ingestor/TaxiFareIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples
2 | package ingestor
3 |
4 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
5 |
6 | import pipelines.streamlets.avro._
7 | import pipelines.streamlets._
8 | import pipelines.akkastream._
9 | import pipelines.flink.avro._
10 | import TaxiFareJsonProtocol._
11 | import pipelines.akkastream.util.scaladsl.HttpServerLogic
12 |
13 | class TaxiFareIngress extends AkkaServerStreamlet {
14 | val out = AvroOutlet[TaxiFare]("out", _.rideId.toString)
15 |
16 | final override val shape = StreamletShape.withOutlets(out)
17 | final override def createLogic = HttpServerLogic.default(this, out)
18 | }
19 |
--------------------------------------------------------------------------------
/flink-taxi-ride/ingestor/src/main/scala/pipelines/examples/ingestor/TaxiRideIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples
2 | package ingestor
3 |
4 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
5 |
6 | import pipelines.streamlets.avro._
7 | import pipelines.streamlets._
8 | import pipelines.akkastream._
9 | import pipelines.flink.avro._
10 | import TaxiRideJsonProtocol._
11 | import pipelines.akkastream.util.scaladsl.HttpServerLogic
12 |
13 | class TaxiRideIngress extends AkkaServerStreamlet {
14 | val out = AvroOutlet[TaxiRide]("out", _.rideId.toString)
15 |
16 | final override val shape = StreamletShape.withOutlets(out)
17 | final override def createLogic = HttpServerLogic.default(this, out)
18 | }
19 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/avro/Metric.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.examples.sensordata",
3 | "type": "record",
4 | "name": "Metric",
5 | "fields":[
6 | {
7 | "name": "deviceId",
8 | "type": {
9 | "type": "string",
10 | "logicalType": "uuid"
11 | }
12 | },
13 | {
14 | "name": "timestamp",
15 | "type": {
16 | "type": "long",
17 | "logicalType": "timestamp-millis"
18 | }
19 | },
20 | {
21 | "name": "name", "type": "string"
22 | },
23 | {
24 | "name": "value", "type": "double"
25 | }
26 | ]
27 | }
28 |
29 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/RotorSpeedFilter.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl._
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class RotorSpeedFilter extends AkkaStreamlet {
9 | val in = AvroInlet[Metric]("in")
10 | val out = AvroOutlet[Metric]("out").withPartitioner(RoundRobinPartitioner)
11 | val shape = StreamletShape(in, out)
12 |
13 | override def createLogic = new RunnableGraphStreamletLogic() {
14 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out))
15 | def flow = FlowWithOffsetContext[Metric].filter(_.name == "rotorSpeed")
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/blueprint/t0-t1-blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | ingress = pipelines.example.SparkRandomGenDataIngress
4 |
5 | spark-process1 = pipelines.example.IdentitySparkProcessor1
6 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1
7 |
8 | spark-process2 = pipelines.example.IdentitySparkProcessor2
9 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2
10 |
11 | egress = pipelines.example.SparkConsoleEgress
12 | }
13 | connections {
14 | ingress.out = [spark-process1.in, akka-process1.in]
15 | spark-process1.out = [spark-process2.in]
16 | akka-process1.out = [akka-process2.in]
17 | spark-process2.out = [egress.in1]
18 | akka-process2.out = [egress.in2]
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordMerge.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.ingestor
2 |
3 | import pipelines.streamlets._
4 | import pipelines.streamlets.avro._
5 | import pipelines.akkastream._
6 | import pipelines.akkastream.util.scaladsl.MergeLogic
7 |
8 | import pipelines.examples.carly.data._
9 |
10 | class CallRecordMerge extends AkkaStreamlet {
11 | val in0 = AvroInlet[CallRecord]("in-0")
12 | val in1 = AvroInlet[CallRecord]("in-1")
13 | val in2 = AvroInlet[CallRecord]("in-2")
14 | val out = AvroOutlet[CallRecord]("out", _.user)
15 | final override val shape = StreamletShape.withInlets(in0, in1, in2).withOutlets(out)
16 | final override def createLogic = new MergeLogic(Vector(in0, in1, in2), out)
17 | }
18 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/IdentityAkkaStreamsProcessor0.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl.{ FlowWithOffsetContext, RunnableGraphStreamletLogic }
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class IdentityAkkaStreamsProcessor0 extends AkkaStreamlet {
9 | val in = AvroInlet[Data]("in")
10 | val out = AvroOutlet[Data]("out", _.src)
11 |
12 | val shape = StreamletShape(in).withOutlets(out)
13 |
14 | override def createLogic = new RunnableGraphStreamletLogic() {
15 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out))
16 | def flow = FlowWithOffsetContext[Data].map { d ⇒ Thread.sleep(100); d }
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/IdentityAkkaStreamsProcessor1.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl.{ FlowWithOffsetContext, RunnableGraphStreamletLogic }
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class IdentityAkkaStreamsProcessor1 extends AkkaStreamlet {
9 | val in = AvroInlet[Data]("in")
10 | val out = AvroOutlet[Data]("out", _.src)
11 |
12 | val shape = StreamletShape(in).withOutlets(out)
13 |
14 | override def createLogic = new RunnableGraphStreamletLogic() {
15 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out))
16 | def flow = FlowWithOffsetContext[Data].map(d ⇒ d.copy(t1 = TimeOps.nowAsOption))
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/IdentityAkkaStreamsProcessor2.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl.{ FlowWithOffsetContext, RunnableGraphStreamletLogic }
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class IdentityAkkaStreamsProcessor2 extends AkkaStreamlet {
9 | val in = AvroInlet[Data]("in")
10 | val out = AvroOutlet[Data]("out", _.src)
11 |
12 | val shape = StreamletShape(in).withOutlets(out)
13 |
14 | override def createLogic = new RunnableGraphStreamletLogic() {
15 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out))
16 | def flow = FlowWithOffsetContext[Data].map(d ⇒ d.copy(t2 = TimeOps.nowAsOption))
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/IdentitySparkProcessor0.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import org.apache.spark.sql.streaming.OutputMode
4 |
5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
6 | import pipelines.spark.sql.SQLImplicits._
7 | import pipelines.streamlets.StreamletShape
8 | import pipelines.streamlets.avro._
9 |
10 | class IdentitySparkProcessor0 extends SparkStreamlet {
11 |
12 | val in = AvroInlet[Data]("in")
13 | val out = AvroOutlet[Data]("out", _.src)
14 | val shape = StreamletShape(in, out)
15 |
16 | override def createLogic() = new SparkStreamletLogic {
17 | override def buildStreamingQueries = {
18 | writeStream(readStream(in).map { d ⇒ Thread.sleep(200); d }, out, OutputMode.Append).toQueryExecution
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/IdentitySparkProcessor2.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import org.apache.spark.sql.streaming.OutputMode
4 |
5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
6 | import pipelines.spark.sql.SQLImplicits._
7 | import pipelines.streamlets.StreamletShape
8 | import pipelines.streamlets.avro._
9 |
10 | class IdentitySparkProcessor2 extends SparkStreamlet {
11 |
12 | val in = AvroInlet[Data]("in")
13 | val out = AvroOutlet[Data]("out", _.src)
14 | val shape = StreamletShape(in, out)
15 |
16 | override def createLogic() = new SparkStreamletLogic {
17 | override def buildStreamingQueries = {
18 | writeStream(readStream(in).map(d ⇒ d.copy(t2 = TimeOps.nowAsOption)), out, OutputMode.Append).toQueryExecution
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/avro/data.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.example",
3 | "type": "record",
4 | "name": "Data",
5 | "fields":[
6 | {
7 | "name": "src",
8 | "type": "string"
9 | },
10 | {
11 | "name": "timestamp",
12 | "type": "long"
13 | },
14 | {
15 | "name": "t1",
16 | "type":["null", "long"],
17 | "default": null
18 | },
19 | {
20 | "name": "t2",
21 | "type":["null", "long"],
22 | "default": null
23 | },
24 | {
25 | "name": "gauge",
26 | "type": "string"
27 | },
28 | {
29 | "name": "value",
30 | "type": "double"
31 | }
32 | ]
33 | }
34 |
35 |
--------------------------------------------------------------------------------
/flink-taxi-ride/datamodel/src/main/avro/taxifare.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.flink.avro",
3 | "type": "record",
4 | "name": "TaxiFare",
5 | "fields":[
6 | {
7 | "name": "rideId", "type": "long"
8 | },
9 | {
10 | "name": "taxiId", "type": "long"
11 | },
12 | {
13 | "name": "paymentType", "type": "string"
14 | },
15 | {
16 | "name": "driverId", "type": "long"
17 | },
18 | {
19 | "name": "startTime", "type": "long"
20 | },
21 | {
22 | "name": "tip", "type": "float"
23 | },
24 | {
25 | "name": "tolls", "type": "float"
26 | },
27 | {
28 | "name": "totalFare", "type": "float"
29 | }
30 | ]
31 | }
32 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/InvalidMetricLogger.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl._
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class InvalidMetricLogger extends AkkaStreamlet {
9 | val inlet = AvroInlet[InvalidMetric]("in")
10 | val shape = StreamletShape.withInlets(inlet)
11 |
12 | override def createLogic = new RunnableGraphStreamletLogic() {
13 | val flow = FlowWithOffsetContext[InvalidMetric]
14 | .map { invalidMetric ⇒
15 | system.log.warning(s"Invalid metric detected! $invalidMetric")
16 | invalidMetric
17 | }
18 |
19 | def runnableGraph = {
20 | sourceWithOffsetContext(inlet).via(flow).to(sinkWithOffsetContext)
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.ingestor
2 |
3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
4 |
5 | import JsonCallRecord._
6 | import pipelines.streamlets.avro._
7 | import pipelines.examples.carly.data._
8 | import pipelines.streamlets._
9 | import pipelines.akkastream._
10 | import pipelines.akkastream.util.scaladsl.HttpServerLogic
11 |
12 | class CallRecordIngress extends AkkaServerStreamlet {
13 |
14 | //tag::docs-outlet-partitioner-example[]
15 | val out = AvroOutlet[CallRecord]("out").withPartitioner(RoundRobinPartitioner)
16 | //end::docs-outlet-partitioner-example[]
17 |
18 | final override val shape = StreamletShape.withOutlets(out)
19 | final override def createLogic = HttpServerLogic.default(this, out)
20 | }
21 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataStreamingIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import akka.http.scaladsl.common.EntityStreamingSupport
4 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
5 |
6 | import SensorDataJsonSupport._
7 | import pipelines.akkastream.AkkaServerStreamlet
8 | import pipelines.akkastream.util.scaladsl._
9 | import pipelines.streamlets.{ RoundRobinPartitioner, StreamletShape }
10 | import pipelines.streamlets.avro._
11 |
12 | class SensorDataStreamingIngress extends AkkaServerStreamlet {
13 | val out = AvroOutlet[SensorData]("out", RoundRobinPartitioner)
14 | def shape = StreamletShape.withOutlets(out)
15 |
16 | implicit val entityStreamingSupport = EntityStreamingSupport.json()
17 | override def createLogic = HttpServerLogic.defaultStreaming(this, out)
18 | }
19 |
--------------------------------------------------------------------------------
/sensor-data-java/load-data-into-pvc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script loads the test file containing device ids that should be filtered out by the pipeline.
4 | # For this to work the application has to be deployed and all pods need to have entered `running` state
5 |
6 | streamletName="filter"
7 | podName=$(kubectl get pods -n sensor-data-java -l com.lightbend.pipelines/streamlet-name=$streamletName --output jsonpath={.items..metadata.name})
8 | if [ $? -ne 0 ]; then
9 | echo "Could not find the streamlet `$streamletName` which contains the mounted PVC this script will copy the filter file into."
10 | echo "Make sure that the application has been deployed and all pods are running."
11 | exit 1
12 | fi
13 |
14 | echo "Copying files to /mnt/data in pod $podName"
15 | kubectl cp test-data/device-ids.txt -n sensor-data-java $podName:/mnt/data
16 |
17 | echo "Done"
18 |
--------------------------------------------------------------------------------
/flink-taxi-ride/ingestor/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, console
2 |
3 | # Uncomment this if you want to _only_ change Flink's logging
4 | #log4j.logger.org.apache.flink=INFO
5 |
6 | # The following lines keep the log level of common libraries/connectors on
7 | # log level INFO. The root logger does not override this. You have to manually
8 | # change the log levels here.
9 | log4j.logger.akka=WARN
10 | log4j.logger.org.apache.kafka=WARN
11 | log4j.logger.org.apache.hadoop=WARN
12 | log4j.logger.org.apache.zookeeper=WARN
13 |
14 | log4j.appender.console=org.apache.log4j.ConsoleAppender
15 | log4j.appender.console.target=System.out
16 | log4j.appender.console.immediateFlush=true
17 | log4j.appender.console.encoding=UTF-8
18 |
19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.console.layout.conversionPattern=%d [%t] %-5p %c - %m%n
21 |
--------------------------------------------------------------------------------
/flink-taxi-ride/logger/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, console
2 |
3 | # Uncomment this if you want to _only_ change Flink's logging
4 | #log4j.logger.org.apache.flink=INFO
5 |
6 | # The following lines keep the log level of common libraries/connectors on
7 | # log level INFO. The root logger does not override this. You have to manually
8 | # change the log levels here.
9 | log4j.logger.akka=WARN
10 | log4j.logger.org.apache.kafka=WARN
11 | log4j.logger.org.apache.hadoop=WARN
12 | log4j.logger.org.apache.zookeeper=WARN
13 |
14 | log4j.appender.console=org.apache.log4j.ConsoleAppender
15 | log4j.appender.console.target=System.out
16 | log4j.appender.console.immediateFlush=true
17 | log4j.appender.console.encoding=UTF-8
18 |
19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.console.layout.conversionPattern=%d [%t] %-5p %c - %m%n
21 |
--------------------------------------------------------------------------------
/warez/data/generate-product.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 |
3 | ##########
4 | # Generate a Json Document representing a product.
5 | ##########
6 |
7 | #set -x
8 |
9 | ROOTDIR=$(cd $(dirname $0); pwd)
10 |
11 | uuid=$(shuf -n 1 "${ROOTDIR}/values/uuids.txt")
12 | mapfile -t words < <(shuf -n 12 "${ROOTDIR}/values/5-letters-words.txt")
13 | mapfile -t keywords < <(shuf -n 2 "${ROOTDIR}/values/keywords.txt")
14 |
15 | cat << EOF
16 | {
17 | "id": "$uuid",
18 | "name": "${words[0]}-${words[1]}",
19 | "description": "${words[2]} ${words[3]} ${words[4]}, ${words[5]} ${words[6]}.",
20 | "keywords": [
21 | "${keywords[0]}",
22 | "${keywords[1]}"
23 | ],
24 | "skus": [
25 | {
26 | "id": "${uuid%..}aa",
27 | "name": "${words[0]}-${words[1]}-${words[7]}"
28 | },
29 | {
30 | "id": "${uuid%..}bb",
31 | "name": "${words[0]}-${words[1]}-${words[8]}"
32 | }
33 | ]
34 | }
35 | EOF
36 |
37 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/IdentitySparkProcessor1.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import pipelines.streamlets.StreamletShape
4 |
5 | import pipelines.streamlets.avro._
6 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet }
7 |
8 | import org.apache.spark.sql.Dataset
9 | import org.apache.spark.sql.functions._
10 | import org.apache.spark.sql.types.TimestampType
11 | import pipelines.spark.sql.SQLImplicits._
12 | import org.apache.spark.sql.streaming.OutputMode
13 |
14 | class IdentitySparkProcessor1 extends SparkStreamlet {
15 |
16 | val in = AvroInlet[Data]("in")
17 | val out = AvroOutlet[Data]("out", _.src)
18 | val shape = StreamletShape(in, out)
19 |
20 | override def createLogic() = new SparkStreamletLogic {
21 | override def buildStreamingQueries = {
22 | writeStream(readStream(in).map(d ⇒ d.copy(t1 = TimeOps.nowAsOption)), out, OutputMode.Append).toQueryExecution
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/spark-sensors/src/main/scala/pipelines/example/SparkConsoleEgress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import pipelines.streamlets.StreamletShape
4 |
5 | import pipelines.streamlets.avro._
6 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet }
7 | import pipelines.spark.sql.SQLImplicits._
8 | import org.apache.spark.sql.streaming.OutputMode
9 |
10 | class SparkConsoleEgress extends SparkStreamlet {
11 | val in = AvroInlet[Agg]("in")
12 | val shape = StreamletShape(in)
13 |
14 | override def createLogic() = new SparkStreamletLogic {
15 | //tag::docs-checkpointDir-example[]
16 | override def buildStreamingQueries = {
17 | readStream(in).writeStream
18 | .format("console")
19 | .option("checkpointLocation", context.checkpointDir("console-egress"))
20 | .outputMode(OutputMode.Append())
21 | .start()
22 | .toQueryExecution
23 | }
24 | //end::docs-checkpointDir-example[]
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/main/scala/README.md:
--------------------------------------------------------------------------------
1 | # Spark Resilience Test
2 |
3 | This example attempts to validate that the spark components in a Pipeline are resilient to failure.
4 | It creates 3 components:
5 | - A data producer ingress,
6 | - A processor, and
7 | - A stateful validator egress.
8 |
9 | The producer creates a monotonically increasing index with a timestamp
10 |
11 | The egress keeps track of the indexes received and detects if any gaps in the stream occur
12 |
13 | The processor is an ephemeral pass-through. Like a suicidal monkey, it kills itself randomly.
14 | If all resilience features are working properly, it must come alive and keep its work where it left, leaving no holes in the data stream.
15 |
16 | Note: We have currently determined that spark-driver pods are not correctly reporting health.
17 | While this gets solved, the suicidal monkey will stay alive.
18 |
19 |
20 | The egress should detect and report if this is not the case.
21 |
22 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordStreamingIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.ingestor
2 |
3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
4 | import akka.http.scaladsl.common.EntityStreamingSupport
5 |
6 | import pipelines.streamlets.avro._
7 | import pipelines.examples.carly.ingestor.JsonCallRecord._
8 | import pipelines.examples.carly.data._
9 | import pipelines.streamlets._
10 | import pipelines.akkastream._
11 | import pipelines.akkastream.util.scaladsl.HttpServerLogic
12 |
13 | class CallRecordStreamingIngress extends AkkaServerStreamlet {
14 | implicit val entityStreamingSupport = EntityStreamingSupport.json()
15 |
16 | val out = AvroOutlet[CallRecord]("out").withPartitioner(RoundRobinPartitioner)
17 |
18 | final override val shape = StreamletShape.withOutlets(out)
19 |
20 | override final def createLogic = HttpServerLogic.defaultStreaming(this, out)
21 | }
22 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/MetricsValidation.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.util.scaladsl._
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class MetricsValidation extends AkkaStreamlet {
9 | val in = AvroInlet[Metric]("in")
10 | val invalid = AvroOutlet[InvalidMetric]("invalid").withPartitioner(metric ⇒ metric.metric.deviceId.toString)
11 | val valid = AvroOutlet[Metric]("valid").withPartitioner(RoundRobinPartitioner)
12 | val shape = StreamletShape(in).withOutlets(invalid, valid)
13 |
14 | override def createLogic = new SplitterLogic(in, invalid, valid) {
15 | def flow = flowWithOffsetContext()
16 | .map { metric ⇒
17 | if (!SensorDataUtils.isValidMetric(metric)) Left(InvalidMetric(metric, "All measurements must be positive numbers!"))
18 | else Right(metric)
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/mixed-sensors/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | // This file contains settings that are specific to a deployment environment.
2 |
3 | /*
4 | * The `pipelinesDockerRegistry` setting specifies the Docker registry
5 | * that the Pipelines sbt plugin uses for pushing application Docker images.
6 | * This registry needs to have been configured as a "pullable" registry on any
7 | * Kubernetes cluster you want to deploy this application to.
8 | *
9 | * The specified Docker registry URL needs to be configured on the cluster that
10 | * the application should be deployed to. There are two common models for setting
11 | * this up:
12 | *
13 | * 1. A central docker registry, used by multiple Kubernetes clusters
14 | * 2. A Docker registry per Kubernetes cluster (common for Openshift)
15 | *
16 | * The sbt plugin expects you to have logged in to the specified registry using
17 | * the `docker login` command.
18 | */
19 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
20 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | System.out
15 |
16 | %d{ISO8601} %-5level [%logger{0}] - %msg%n
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/flink-taxi-ride/ingestor/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | System.out
15 |
16 | %d{ISO8601} %-5level [%logger{0}] - %msg%n
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/flink-taxi-ride/logger/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | System.out
15 |
16 | %d{ISO8601} %-5level [%logger{0}] - %msg%n
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/blueprint/t0-process-t1-blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | ingress = pipelines.example.SparkRandomGenDataIngress
4 |
5 | spark-process1 = pipelines.example.IdentitySparkProcessor1
6 | spark-process2 = pipelines.example.IdentitySparkProcessor2
7 | spark-process = pipelines.example.IdentitySparkProcessor0
8 |
9 | akka-process = pipelines.example.IdentityAkkaStreamsProcessor0
10 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1
11 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2
12 |
13 | egress = pipelines.example.SparkConsoleEgress
14 | }
15 | connections {
16 | ingress.out = [spark-process1.in, akka-process1.in]
17 | spark-process1.out = [spark-process.in]
18 | akka-process1.out = [akka-process.in]
19 |
20 | spark-process.out = [spark-process2.in]
21 | akka-process.out = [akka-process2.in]
22 |
23 | spark-process2.out = [egress.in1]
24 | akka-process2.out = [egress.in2]
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataIngress.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata;
2 |
3 | import pipelines.akkastream.AkkaServerStreamlet;
4 |
5 | import pipelines.akkastream.StreamletLogic;
6 | import pipelines.akkastream.util.javadsl.HttpServerLogic;
7 |
8 | import pipelines.streamlets.RoundRobinPartitioner;
9 | import pipelines.streamlets.StreamletShape;
10 | import pipelines.streamlets.avro.AvroOutlet;
11 |
12 | import akka.http.javadsl.marshallers.jackson.Jackson;
13 |
14 | public class SensorDataIngress extends AkkaServerStreamlet {
15 | AvroOutlet out = AvroOutlet.create("out", SensorData.class)
16 | .withPartitioner(RoundRobinPartitioner.getInstance());
17 |
18 | public StreamletShape shape() {
19 | return StreamletShape.createWithOutlets(out);
20 | }
21 |
22 | public StreamletLogic createLogic() {
23 | return HttpServerLogic.createDefault(this, out, Jackson.byteStringUnmarshaller(SensorData.class), getStreamletContext());
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/sensor-data-java/test-data/future-data.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
4 | "timestamp": 1495545346279,
5 | "measurements": {
6 | "power": 1.7,
7 | "rotorSpeed": 3.9,
8 | "windSpeed": 25.3
9 | }
10 | },
11 | {
12 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
13 | "timestamp": 1495545346279,
14 | "measurements": {
15 | "power": -1.7,
16 | "rotorSpeed": 3.9,
17 | "windSpeed": 25.3
18 | }
19 | }
20 | ,
21 | {
22 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
23 | "timestamp": 3134584800000,
24 | "measurements": {
25 | "power": 1.7,
26 | "rotorSpeed": 3.9,
27 | "windSpeed": 100.1
28 | }
29 | },
30 | {
31 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
32 | "timestamp": 3134584800000,
33 | "measurements": {
34 | "power": 1.7,
35 | "rotorSpeed": 3.9,
36 | "windSpeed": 100.1
37 | }
38 | }
39 | ]
40 |
41 |
42 |
--------------------------------------------------------------------------------
/sensor-data-scala/test-data/future-data.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
4 | "timestamp": 1495545346279,
5 | "measurements": {
6 | "power": 1.7,
7 | "rotorSpeed": 3.9,
8 | "windSpeed": 25.3
9 | }
10 | },
11 | {
12 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
13 | "timestamp": 1495545346279,
14 | "measurements": {
15 | "power": -1.7,
16 | "rotorSpeed": 3.9,
17 | "windSpeed": 25.3
18 | }
19 | }
20 | ,
21 | {
22 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
23 | "timestamp": 3134584800000,
24 | "measurements": {
25 | "power": 1.7,
26 | "rotorSpeed": 3.9,
27 | "windSpeed": 100.1
28 | }
29 | },
30 | {
31 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992",
32 | "timestamp": 3134584800000,
33 | "measurements": {
34 | "power": 1.7,
35 | "rotorSpeed": 3.9,
36 | "windSpeed": 100.1
37 | }
38 | }
39 | ]
40 |
41 |
42 |
--------------------------------------------------------------------------------
/warez/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | /*
2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify
3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host)
4 | * that the Pipelines sbt plugin uses for pushing application Docker images.
5 | *
6 | * Example:
7 | *
8 | * pipelinesDockerRegistry := Some("foo.com")
9 | * pipelinesDockerRepository := Some("bar/baz")
10 | *
11 | * This will cause your application Docker images to be pushed as:
12 | *
13 | * `foo.com/bar/baz/[image name]:[tag]`
14 | *
15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.:
16 | *
17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com")
18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz")
19 | *
20 | * The sbt plugin expects you to have logged in to the specified registry using
21 | * the `docker login` command.
22 | */
23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY")
25 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/RotorspeedWindowLogger.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl._
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class RotorspeedWindowLogger extends AkkaStreamlet {
9 | val in = AvroInlet[Metric]("in")
10 | val shape = StreamletShape(in)
11 | override def createLogic = new RunnableGraphStreamletLogic() {
12 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext)
13 | def flow = {
14 | FlowWithOffsetContext[Metric]
15 | .grouped(5)
16 | .map { rotorSpeedWindow ⇒
17 | val (avg, _) = rotorSpeedWindow.map(_.value).foldLeft((0.0, 1)) { case ((avg, idx), next) ⇒ (avg + (next - avg) / idx, idx + 1) }
18 |
19 | system.log.info(s"Average rotorspeed is: $avg")
20 |
21 | avg
22 | }
23 | .mapContext(_.last) // TODO: this is a tricky one to understand...
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/flink-taxi-ride/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | /*
2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify
3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host)
4 | * that the Pipelines sbt plugin uses for pushing application Docker images.
5 | *
6 | * Example:
7 | *
8 | * pipelinesDockerRegistry := Some("foo.com")
9 | * pipelinesDockerRepository := Some("bar/baz")
10 | *
11 | * This will cause your application Docker images to be pushed as:
12 | *
13 | * `foo.com/bar/baz/[image name]:[tag]`
14 | *
15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.:
16 | *
17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com")
18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz")
19 | *
20 | * The sbt plugin expects you to have logged in to the specified registry using
21 | * the `docker login` command.
22 | */
23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY")
25 |
--------------------------------------------------------------------------------
/sensor-data-java/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | /*
2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify
3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host)
4 | * that the Pipelines sbt plugin uses for pushing application Docker images.
5 | *
6 | * Example:
7 | *
8 | * pipelinesDockerRegistry := Some("foo.com")
9 | * pipelinesDockerRepository := Some("bar/baz")
10 | *
11 | * This will cause your application Docker images to be pushed as:
12 | *
13 | * `foo.com/bar/baz/[image name]:[tag]`
14 | *
15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.:
16 | *
17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com")
18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz")
19 | *
20 | * The sbt plugin expects you to have logged in to the specified registry using
21 | * the `docker login` command.
22 | */
23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY")
25 |
--------------------------------------------------------------------------------
/spark-sensors/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | /*
2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify
3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host)
4 | * that the Pipelines sbt plugin uses for pushing application Docker images.
5 | *
6 | * Example:
7 | *
8 | * pipelinesDockerRegistry := Some("foo.com")
9 | * pipelinesDockerRepository := Some("bar/baz")
10 | *
11 | * This will cause your application Docker images to be pushed as:
12 | *
13 | * `foo.com/bar/baz/[image name]:[tag]`
14 | *
15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.:
16 | *
17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com")
18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz")
19 | *
20 | * The sbt plugin expects you to have logged in to the specified registry using
21 | * the `docker login` command.
22 | */
23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY")
25 |
--------------------------------------------------------------------------------
/call-record-aggregator/spark-aggregation/src/main/scala/pipelines/examples/carly/aggregator/CallAggregatorConsoleEgress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.aggregator
2 |
3 | import pipelines.streamlets._
4 | import pipelines.streamlets.avro._
5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
6 | import pipelines.spark.sql.SQLImplicits._
7 | import org.apache.spark.sql.streaming.OutputMode
8 |
9 | import org.apache.log4j.{ Level, Logger }
10 |
11 | import pipelines.examples.carly.data._
12 |
13 | class CallAggregatorConsoleEgress extends SparkStreamlet {
14 |
15 | val rootLogger = Logger.getRootLogger()
16 | rootLogger.setLevel(Level.ERROR)
17 |
18 | val in = AvroInlet[AggregatedCallStats]("in")
19 | val shape = StreamletShape(in)
20 |
21 | override def createLogic = new SparkStreamletLogic {
22 | override def buildStreamingQueries = {
23 | readStream(in).writeStream
24 | .format("console")
25 | .outputMode(OutputMode.Append())
26 | .start()
27 | .toQueryExecution
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/sensor-data-scala/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | /*
2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify
3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host)
4 | * that the Pipelines sbt plugin uses for pushing application Docker images.
5 | *
6 | * Example:
7 | *
8 | * pipelinesDockerRegistry := Some("foo.com")
9 | * pipelinesDockerRepository := Some("bar/baz")
10 | *
11 | * This will cause your application Docker images to be pushed as:
12 | *
13 | * `foo.com/bar/baz/[image name]:[tag]`
14 | *
15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.:
16 | *
17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com")
18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz")
19 | *
20 | * The sbt plugin expects you to have logged in to the specified registry using
21 | * the `docker login` command.
22 | */
23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY")
25 |
--------------------------------------------------------------------------------
/spark-resilience-test/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | /*
2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify
3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host)
4 | * that the Pipelines sbt plugin uses for pushing application Docker images.
5 | *
6 | * Example:
7 | *
8 | * pipelinesDockerRegistry := Some("foo.com")
9 | * pipelinesDockerRepository := Some("bar/baz")
10 | *
11 | * This will cause your application Docker images to be pushed as:
12 | *
13 | * `foo.com/bar/baz/[image name]:[tag]`
14 | *
15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.:
16 | *
17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com")
18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz")
19 | *
20 | * The sbt plugin expects you to have logged in to the specified registry using
21 | * the `docker login` command.
22 | */
23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY")
25 |
--------------------------------------------------------------------------------
/warez/blueprint/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | System.out
18 |
19 | %d{ISO8601} %-5level [%logger{0}] - %msg%n
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/call-record-aggregator/target-env.sbt.example:
--------------------------------------------------------------------------------
1 | /*
2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify
3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host)
4 | * that the Pipelines sbt plugin uses for pushing application Docker images.
5 | *
6 | * Example:
7 | *
8 | * pipelinesDockerRegistry := Some("foo.com")
9 | * pipelinesDockerRepository := Some("bar/baz")
10 | *
11 | * This will cause your application Docker images to be pushed as:
12 | *
13 | * `foo.com/bar/baz/[image name]:[tag]`
14 | *
15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.:
16 | *
17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com")
18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz")
19 | *
20 | * The sbt plugin expects you to have logged in to the specified registry using
21 | * the `docker login` command.
22 | */
23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY")
24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY")
25 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | System.out
18 |
19 | %d{ISO8601} %-5level [%logger{0}] - %msg%n
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/call-record-aggregator/call-record-pipeline/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | cdr-generator1 = pipelines.examples.carly.aggregator.CallRecordGeneratorIngress
4 | cdr-generator2 = pipelines.examples.carly.aggregator.CallRecordGeneratorIngress
5 | merge = pipelines.examples.carly.ingestor.CallRecordMerge
6 | cdr-ingress = pipelines.examples.carly.ingestor.CallRecordIngress
7 | cdr-validator = pipelines.examples.carly.ingestor.CallRecordValidation
8 | cdr-aggregator = pipelines.examples.carly.aggregator.CallStatsAggregator
9 | console-egress = pipelines.examples.carly.output.AggregateRecordEgress
10 | error-egress = pipelines.examples.carly.output.InvalidRecordEgress
11 |
12 | }
13 | connections {
14 | cdr-generator1.out = [merge.in-0]
15 | cdr-generator2.out = [merge.in-1]
16 | cdr-ingress.out = [merge.in-2]
17 | merge.out = [cdr-validator.in]
18 | cdr-validator.valid = [cdr-aggregator.in]
19 | cdr-aggregator.out = [console-egress.in]
20 | cdr-validator.invalid = [error-egress.in]
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/call-record-aggregator/call-record-pipeline/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | System.out
18 |
19 | %d{ISO8601} %-5level [%logger{0}] - %msg%n
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/flink-taxi-ride/datamodel/src/main/avro/taxiride.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "pipelines.flink.avro",
3 | "type": "record",
4 | "name": "TaxiRide",
5 | "fields":[
6 | {
7 | "name": "rideId", "type": "long"
8 | },
9 | {
10 | "name": "isStart", "type": "boolean"
11 | },
12 | {
13 | "name": "taxiId", "type": "long"
14 | },
15 | {
16 | "name": "passengerCnt", "type": "int"
17 | },
18 | {
19 | "name": "driverId", "type": "long"
20 | },
21 | {
22 | "name": "startLon", "type": "float"
23 | },
24 | {
25 | "name": "startLat", "type": "float"
26 | },
27 | {
28 | "name": "endLon", "type": "float"
29 | },
30 | {
31 | "name": "endLat", "type": "float"
32 | },
33 | {
34 | "name": "startTime", "type": "long"
35 | },
36 | {
37 | "name": "endTime", "type": "long"
38 | }
39 | ]
40 | }
41 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | http-ingress = pipelines.examples.sensordata.SensorDataHttpIngress
4 | file-ingress = pipelines.examples.sensordata.SensorDataFileIngress
5 | merge = pipelines.examples.sensordata.SensorDataMerge
6 | metrics = pipelines.examples.sensordata.SensorDataToMetrics
7 | validation = pipelines.examples.sensordata.MetricsValidation
8 | valid-logger = pipelines.examples.sensordata.ValidMetricLogger
9 | invalid-logger = pipelines.examples.sensordata.InvalidMetricLogger
10 | rotorizer = pipelines.examples.sensordata.RotorSpeedFilter
11 | rotor-avg-logger = pipelines.examples.sensordata.RotorspeedWindowLogger
12 | }
13 |
14 | connections {
15 | http-ingress.out = [merge.in-0]
16 | file-ingress.out = [merge.in-1]
17 | merge.out = [metrics.in]
18 | metrics.out = [validation.in]
19 | validation.invalid = [invalid-logger.in]
20 | validation.valid = [
21 | valid-logger.in,
22 | rotorizer.in
23 | ]
24 | rotorizer.out = [rotor-avg-logger.in]
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/test/scala/pipelines/example/SparkSequenceValidatorEgressTest.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import scala.collection.immutable.Seq
4 | import scala.concurrent.duration._
5 |
6 | import pipelines.spark.testkit._
7 | import pipelines.spark.sql.SQLImplicits._
8 |
9 | class SparkSequenceValidatorEgressTest extends SparkScalaTestSupport {
10 |
11 | val streamlet = new SparkSequenceValidatorEgress()
12 | val testKit = SparkStreamletTestkit(session)
13 |
14 | "SparkSequenceValidatorEgress" should {
15 | "output streaming data" in {
16 |
17 | // Setup inlet tap on inlet(s) port(s)
18 | val in: SparkInletTap[Data] = testKit.inletAsTap[Data](streamlet.in)
19 |
20 | // Build data and send to inlet tap
21 | val now = System.currentTimeMillis()
22 | val data = (0 until SequenceSettings.GroupSize).map(i ⇒ Data(now + i * 1000, 1, i.toLong)) ++
23 | (0 until SequenceSettings.GroupSize - 1).map(i ⇒ Data(now + i * 1000, 2, i.toLong))
24 | in.addData(data)
25 |
26 | testKit.run(streamlet, Seq(in), Seq.empty, 10.seconds)
27 |
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/call-record-aggregator/spark-aggregation/src/test/scala/pipelines/examples/carly/aggregator/CallRecordGeneratorIngressSpec.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.aggregator
2 |
3 | import scala.collection.immutable.Seq
4 | import scala.concurrent.duration._
5 |
6 | import pipelines.examples.carly.data._
7 |
8 | import pipelines.spark.testkit._
9 | import pipelines.spark.sql.SQLImplicits._
10 |
11 | class CallRecordGeneratorIngressSpec extends SparkScalaTestSupport {
12 |
13 | val streamlet = new CallRecordGeneratorIngress()
14 | val testKit = SparkStreamletTestkit(session).withConfigParameterValues(ConfigParameterValue(streamlet.RecordsPerSecond, "50"))
15 |
16 | "CallRecordGeneratorIngress" should {
17 | "produce elements to its outlet" in {
18 |
19 | // setup outlet tap on outlet port
20 | val out = testKit.outletAsTap[CallRecord](streamlet.out)
21 |
22 | testKit.run(streamlet, Seq.empty, Seq(out), 40.seconds)
23 |
24 | // get data from outlet tap
25 | val results = out.asCollection(session)
26 |
27 | // assert
28 | results.size must be > 0
29 |
30 | }
31 | }
32 | }
33 |
34 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataToMetrics.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl._
5 | import pipelines.streamlets.{ RoundRobinPartitioner, StreamletShape }
6 | import pipelines.streamlets.avro._
7 |
8 | class SensorDataToMetrics extends AkkaStreamlet {
9 | val in = AvroInlet[SensorData]("in")
10 | val out = AvroOutlet[Metric]("out").withPartitioner(RoundRobinPartitioner)
11 | val shape = StreamletShape(in, out)
12 | def flow = {
13 | FlowWithOffsetContext[SensorData]
14 | .mapConcat { data ⇒
15 | List(
16 | Metric(data.deviceId, data.timestamp, "power", data.measurements.power),
17 | Metric(data.deviceId, data.timestamp, "rotorSpeed", data.measurements.rotorSpeed),
18 | Metric(data.deviceId, data.timestamp, "windSpeed", data.measurements.windSpeed)
19 | )
20 | }
21 | }
22 | override def createLogic = new RunnableGraphStreamletLogic() {
23 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out))
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordValidation.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.ingestor
2 |
3 | import pipelines.streamlets.avro._
4 | import pipelines.streamlets.StreamletShape
5 | import pipelines.akkastream.AkkaStreamlet
6 | import pipelines.akkastream.util.scaladsl.SplitterLogic
7 |
8 | import pipelines.examples.carly.data._
9 |
10 | class CallRecordValidation extends AkkaStreamlet {
11 |
12 | private val oldDataWatermark = java.sql.Timestamp.valueOf("2010-01-01 00:00:00.000").getTime / 1000 //seconds
13 |
14 | val in = AvroInlet[CallRecord]("in")
15 | val left = AvroOutlet[InvalidRecord]("invalid", _.record)
16 | val right = AvroOutlet[CallRecord]("valid", _.user)
17 |
18 | final override val shape = StreamletShape(in).withOutlets(left, right)
19 | final override def createLogic = new SplitterLogic(in, left, right) {
20 | def flow =
21 | flowWithOffsetContext()
22 | .map { record ⇒
23 | if (record.timestamp < oldDataWatermark) Left(InvalidRecord(record.toString, "Timestamp outside range!"))
24 | else Right(record)
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/test/scala/pipelines/example/DataGroupTest.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import org.scalatest.{ Matchers, WordSpec }
4 |
5 | class DataGroupTest extends WordSpec with Matchers {
6 |
7 | "DataGroup" should {
8 | val groupSize = 20
9 | // simulate the behavior of the data producer
10 | val data = (0 to groupSize * 10)
11 | .map(i ⇒ (i.toLong / groupSize, i.toLong))
12 | .groupBy { case (k, _) ⇒ k }
13 | .map { case (k, seqKV) ⇒ (k, seqKV.map { case (_, v) ⇒ v }) }
14 |
15 | "report completed when it has received all data" in {
16 | val dataGroup = DataGroup(3, groupSize, data(3))
17 | assert(dataGroup.isComplete, "dataGroup should be complete with the data sample")
18 | }
19 |
20 | "report missing elements when it doesn't have all data for its group" in {
21 | val dataSubset = data(5).drop(3)
22 | val dataGroup = DataGroup(5, groupSize, dataSubset)
23 | assert(!dataGroup.isComplete, "dataGroup should be incomplete")
24 | dataGroup.missing should be(data(5).take(3).toSet)
25 | dataGroup.missingReport should be("(100,102)")
26 | }
27 |
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataStreamingIngress.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata;
2 |
3 | import akka.http.javadsl.common.EntityStreamingSupport;
4 | import akka.http.javadsl.marshallers.jackson.Jackson;
5 |
6 | import pipelines.akkastream.AkkaServerStreamlet;
7 |
8 | import pipelines.akkastream.util.javadsl.HttpServerLogic;
9 | import pipelines.akkastream.StreamletLogic;
10 | import pipelines.streamlets.RoundRobinPartitioner;
11 | import pipelines.streamlets.StreamletShape;
12 | import pipelines.streamlets.avro.AvroOutlet;
13 |
14 | public class SensorDataStreamingIngress extends AkkaServerStreamlet {
15 |
16 | AvroOutlet out = AvroOutlet.create("out", SensorData.class)
17 | .withPartitioner(RoundRobinPartitioner.getInstance());
18 |
19 | public StreamletShape shape() {
20 | return StreamletShape.createWithOutlets(out);
21 | }
22 |
23 | public StreamletLogic createLogic() {
24 | EntityStreamingSupport ess = EntityStreamingSupport.json();
25 | return HttpServerLogic.createDefaultStreaming(this, out, Jackson.byteStringUnmarshaller(SensorData.class), ess, getStreamletContext());
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/sensor-data-scala/load-data-into-pvc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script loads the necessary files into the PVC that is mounted by the file based ingress
4 | # For this to work the application has to be deployed and all pods need to have entered `running` state
5 |
6 | streamletName="sensor-data-file-ingress"
7 | podName=$(kubectl get pods -n sensor-data-scala -l com.lightbend.pipelines/streamlet-name=$streamletName --output jsonpath={.items..metadata.name})
8 | if [ $? -ne 0 ]; then
9 | echo "Could not find the streamlet `$streamletName` which contains the mounted PVC this script will copy test files into."
10 | echo "Make sure that the application has been deployed and all pods are running."
11 | exit 1
12 | fi
13 |
14 | echo "Copying files to /mnt/data in pod $podName"
15 | kubectl cp test-data/04-moderate-breeze.json -n sensor-data-scala $podName:/mnt/data
16 | kubectl cp test-data/10-storm.json -n sensor-data-scala $podName:/mnt/data
17 | kubectl cp test-data/11-violent-storm.json -n sensor-data-scala $podName:/mnt/data
18 | kubectl cp test-data/12-hurricane.json -n sensor-data-scala $podName:/mnt/data
19 | kubectl cp test-data/invalid-metric.json -n sensor-data-scala $podName:/mnt/data
20 |
21 | echo "Done"
22 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/blueprint/blueprint.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | ingress = pipelines.example.SparkRandomGenDataIngress
4 |
5 | spark-process1 = pipelines.example.IdentitySparkProcessor1
6 | spark-process2 = pipelines.example.IdentitySparkProcessor2
7 | spark-process-a100 = pipelines.example.IdentitySparkProcessor0
8 | spark-process-b100 = pipelines.example.IdentitySparkProcessor0
9 |
10 | akka-process-a100 = pipelines.example.IdentityAkkaStreamsProcessor0
11 | akka-process-b100 = pipelines.example.IdentityAkkaStreamsProcessor0
12 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1
13 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2
14 |
15 | egress = pipelines.example.SparkConsoleEgress
16 | }
17 | connections {
18 | ingress.out = [spark-process1.in, akka-process1.in]
19 | spark-process1.out = [spark-process-a100.in]
20 | akka-process1.out = [akka-process-a100.in]
21 |
22 | spark-process-a100.out = [spark-process-b100.in]
23 | akka-process-a100.out = [akka-process-b100.in]
24 |
25 | spark-process-b100.out = [spark-process2.in]
26 | akka-process-b100.out = [akka-process2.in]
27 |
28 | spark-process2.out = [egress.in1]
29 | akka-process2.out = [egress.in2]
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/blueprint/parallel-100ms-delay.conf:
--------------------------------------------------------------------------------
1 | blueprint {
2 | streamlets {
3 | ingress = pipelines.example.SparkRandomGenDataIngress
4 |
5 | spark-process1 = pipelines.example.IdentitySparkProcessor1
6 | spark-process2 = pipelines.example.IdentitySparkProcessor2
7 | spark-process-a100 = pipelines.example.IdentitySparkProcessor0
8 | spark-process-b100 = pipelines.example.IdentitySparkProcessor0
9 |
10 | akka-process-a100 = pipelines.example.IdentityAkkaStreamsProcessor0
11 | akka-process-b100 = pipelines.example.IdentityAkkaStreamsProcessor0
12 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1
13 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2
14 |
15 | egress = pipelines.example.SparkConsoleEgress
16 | }
17 | connections {
18 | ingress.out = [spark-process1.in, akka-process1.in]
19 | spark-process1.out = [spark-process-a100.in]
20 | akka-process1.out = [akka-process-a100.in]
21 |
22 | spark-process-a100.out = [spark-process-b100.in]
23 | akka-process-a100.out = [akka-process-b100.in]
24 |
25 | spark-process-b100.out = [spark-process2.in]
26 | akka-process-b100.out = [akka-process2.in]
27 |
28 | spark-process2.out = [egress.in1]
29 | akka-process2.out = [egress.in2]
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/warez/datamodel/src/main/avro/Product.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "namespace": "warez",
3 |
4 | "type": "record",
5 | "name": "Product",
6 |
7 | "fields": [
8 | {
9 | "name": "id",
10 | "type": "string"
11 | },
12 | {
13 | "name": "name",
14 | "type": "string"
15 | },
16 | {
17 | "name": "description",
18 | "type": "string"
19 | },
20 | {
21 | "name": "keywords",
22 | "type": {
23 | "type": "array",
24 | "items": "string"
25 | }
26 | },
27 | {
28 | "name": "skus",
29 | "type": {
30 | "type": "array",
31 | "items": {
32 | "type": "record",
33 | "name": "Sku",
34 |
35 | "fields": [
36 | {
37 | "name": "id",
38 | "type": "string"
39 | },
40 | {
41 | "name": "name",
42 | "type": "string"
43 | },
44 | {
45 | "name": "stock",
46 | "type": ["null", "int"],
47 | "default": null
48 | },
49 | {
50 | "name": "price",
51 | "type": ["null", "int"],
52 | "default": null
53 | }
54 | ]
55 | }
56 | }
57 | }
58 | ]
59 | }
60 |
61 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/test/scala/pipelines/example/SparkSequenceGeneratorIngressTest.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import scala.collection.immutable.Seq
4 | import scala.concurrent.duration._
5 |
6 | import pipelines.spark.testkit._
7 | import pipelines.spark.sql.SQLImplicits._
8 |
9 | class SparkSequenceGeneratorIngressTest extends SparkScalaTestSupport {
10 |
11 | val streamlet = new SparkSequenceGeneratorIngress()
12 | val testKit = SparkStreamletTestkit(session).withConfigParameterValues(ConfigParameterValue(streamlet.RecordsPerSecond, "50"))
13 |
14 | "SparkSequenceGeneratorIngress" should {
15 | "produce data " in {
16 |
17 | // setup inlet tap on inlet(s) port(s)
18 | val out: SparkOutletTap[Data] = testKit.outletAsTap[Data](streamlet.out)
19 |
20 | // Run the streamlet using the testkit and the setup inlet taps and outlet probes
21 | testKit.run(streamlet, Seq.empty, Seq(out), 10.seconds)
22 |
23 | // get data from outlet tap
24 | val results = out.asCollection(session)
25 | val ordered = results.map(data ⇒ data.value).sorted
26 | ordered.size mustBe >(SequenceSettings.RecordsPerSecond) // at least one second of data
27 | assert((ordered zip ordered.tail).forall { case (i, j) ⇒ j == (i + 1) }, "produced list missed elements")
28 |
29 | }
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/dsl/HttpServer.scala:
--------------------------------------------------------------------------------
1 | package warez
2 | package dsl
3 |
4 | import scala.util._
5 |
6 | import akka.actor._
7 | import akka.http.scaladsl._
8 | import akka.http.scaladsl.model._
9 | import akka.stream._
10 | import akka.stream.scaladsl._
11 |
12 | import pipelines.streamlets.Dun
13 | import pipelines.akkastream._
14 |
15 | trait HttpServer {
16 | def startServer(
17 | context: AkkaStreamletContext,
18 | handler: Flow[HttpRequest, HttpResponse, _],
19 | port: Int
20 | )(implicit system: ActorSystem, mat: Materializer): Unit = {
21 | import system.dispatcher
22 | Http()
23 | .bindAndHandle(handler, "0.0.0.0", port)
24 | .map { binding ⇒
25 | context.signalReady()
26 | system.log.info(s"Bound to ${binding.localAddress.getHostName}:${binding.localAddress.getPort}")
27 | // this only completes when StreamletRef executes cleanup.
28 | context.onStop { () ⇒
29 | system.log.info(s"Unbinding from ${binding.localAddress.getHostName}:${binding.localAddress.getPort}")
30 | binding.unbind().map(_ ⇒ Dun)
31 | }
32 | binding
33 | }
34 | .andThen {
35 | case Failure(cause) ⇒
36 | system.log.error(cause, s"Failed to bind to $port.")
37 | context.stop()
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-java-aggregation-output/src/main/java/pipelines/examples/carly/output/AggregateRecordEgress.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.output;
2 |
3 | import akka.NotUsed;
4 | import akka.kafka.ConsumerMessage.CommittableOffset;
5 | import akka.stream.javadsl.*;
6 | import pipelines.streamlets.*;
7 | import pipelines.streamlets.avro.*;
8 | import pipelines.akkastream.*;
9 | import pipelines.akkastream.javadsl.*;
10 |
11 | import pipelines.examples.carly.data.*;
12 |
13 |
14 | public class AggregateRecordEgress extends AkkaStreamlet {
15 | public AvroInlet in = AvroInlet.create("in", AggregatedCallStats.class);
16 |
17 | @Override public StreamletShape shape() {
18 | return StreamletShape.createWithInlets(in);
19 | }
20 |
21 | @Override
22 | public StreamletLogic createLogic() {
23 | return new RunnableGraphStreamletLogic(getStreamletContext()) {
24 | @Override
25 | public RunnableGraph> createRunnableGraph() {
26 | return getSourceWithOffsetContext(in)
27 | .via(
28 | FlowWithOffsetContext.create()
29 | .map(metric -> {
30 | System.out.println(metric);
31 | return metric;
32 | })
33 | )
34 | .to(getSinkWithOffsetContext());
35 | }
36 | };
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/spark-resilience-test/src/main/scala/pipelines/example/SuicidalMonkeyProcessor.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import org.apache.spark.sql.Dataset
4 | import org.apache.spark.sql.streaming.OutputMode
5 |
6 | import pipelines.streamlets.StreamletShape
7 | import pipelines.streamlets.avro._
8 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet }
9 | import pipelines.spark.sql.SQLImplicits._
10 |
11 | class SuicidalMonkeyProcessor extends SparkStreamlet {
12 | val in = AvroInlet[Data]("in")
13 | val out = AvroOutlet[Data]("out", _.key.toString)
14 | val shape = StreamletShape(in, out)
15 |
16 | val rng = scala.util.Random
17 | override def createLogic() = new SparkStreamletLogic {
18 | override def buildStreamingQueries = {
19 | val outStream = process(readStream(in))
20 | writeStream(outStream, out, OutputMode.Append).toQueryExecution
21 | }
22 |
23 | private def process(inDataset: Dataset[Data]): Dataset[Data] = {
24 | inDataset.mapPartitions { iter ⇒
25 | // monkey business
26 | // The logic in this processor causes the current executor to crash with a certain probability.
27 | // comment out to see the process working
28 | if (rng.nextDouble() < SequenceSettings.FailureProbability) {
29 | sys.exit(-1)
30 | }
31 | iter
32 | }
33 |
34 | }
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/spark-sensors/src/main/scala/pipelines/example/MovingAverageSparklet.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import pipelines.streamlets.StreamletShape
4 |
5 | import pipelines.streamlets.avro._
6 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet }
7 |
8 | import org.apache.spark.sql.Dataset
9 | import org.apache.spark.sql.functions._
10 | import org.apache.spark.sql.types.TimestampType
11 | import pipelines.spark.sql.SQLImplicits._
12 | import org.apache.spark.sql.streaming.OutputMode
13 |
14 | class MovingAverageSparklet extends SparkStreamlet {
15 |
16 | val in = AvroInlet[Data]("in")
17 | val out = AvroOutlet[Agg]("out", _.src)
18 | val shape = StreamletShape(in, out)
19 |
20 | override def createLogic() = new SparkStreamletLogic {
21 | override def buildStreamingQueries = {
22 | val dataset = readStream(in)
23 | val outStream = process(dataset)
24 | writeStream(outStream, out, OutputMode.Append).toQueryExecution
25 | }
26 |
27 | private def process(inDataset: Dataset[Data]): Dataset[Agg] = {
28 | val query = inDataset
29 | .withColumn("ts", $"timestamp".cast(TimestampType))
30 | .withWatermark("ts", "1 minutes")
31 | .groupBy(window($"ts", "1 minute", "30 seconds"), $"src", $"gauge").agg(avg($"value") as "avg")
32 | query.select($"src", $"gauge", $"avg" as "value").as[Agg]
33 | }
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/warez/data/README.md:
--------------------------------------------------------------------------------
1 | # Test data generators
2 |
3 | ## Generated data
4 | Generates formatted Json documents to use as test data for the warez test application.
5 |
6 | The generated data is self-consistent:
7 | * The UUIDs for the products are chosen from `data/uuids.txt`.
8 | * The UUIDs for the SKUs are generated by replacing the 2 last charaters of the product UUID it is associated to with `aa` or `bb`.
9 | * The base list of UUID has been checked to ensure that all SKU's UUIDs are unique.
10 | * The keywords for the products are chosen from `data/keywords.txt`.
11 |
12 | Additional information:
13 | * Prices in the generated PriceUpdates are between 1 and 1999.
14 | * Stock diff in the generated StockUpdates are between -10 and 10.
15 | * Product names and description are generated by combining 5 letters words chosen from `data/5-letters-words.txt`.
16 |
17 | ## Usage
18 |
19 | The scripts can be associated to `curl` to send data to the application ingresses.
20 |
21 | ### Products
22 |
23 | ```bash
24 | curl --data "$(./generate-product.sh)" http://appname.apps.clustername.lightbend.com/products
25 | ```
26 |
27 | ### Price updates
28 |
29 | ```bash
30 | curl --data "$(./generate-price-update.sh)" http://appname.apps.clustername.lightbend.com/price-updates
31 | ```
32 |
33 | ### Stock updates
34 |
35 | ```bash
36 | curl --data "$(./generate-stock-update.sh)" http://appname.apps.clustername.lightbend.com/stock-updates
37 | ```
--------------------------------------------------------------------------------
/spark-resilience-test/src/main/scala/pipelines/example/SparkSequenceGeneratorIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import org.apache.spark.sql.Dataset
4 | import org.apache.spark.sql.types.LongType
5 | import org.apache.spark.sql.streaming.OutputMode
6 |
7 | import pipelines.streamlets._
8 | import pipelines.streamlets.StreamletShape
9 | import pipelines.streamlets.avro._
10 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet }
11 | import pipelines.spark.sql.SQLImplicits._
12 |
13 | class SparkSequenceGeneratorIngress extends SparkStreamlet {
14 | val out = AvroOutlet[Data]("out", d ⇒ d.key.toString)
15 | val shape = StreamletShape(out)
16 |
17 | val RecordsPerSecond = IntegerConfigParameter(
18 | "records-per-second",
19 | "Records per second to process.",
20 | Some(50))
21 |
22 | override def configParameters = Vector(RecordsPerSecond)
23 |
24 | override def createLogic() = new SparkStreamletLogic {
25 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
26 |
27 | override def buildStreamingQueries = {
28 | writeStream(process, out, OutputMode.Append).toQueryExecution
29 | }
30 |
31 | private def process: Dataset[Data] = {
32 | session.readStream
33 | .format("rate")
34 | .option("rowsPerSecond", recordsPerSecond)
35 | .load()
36 | .withColumn("key", ($"value" / SequenceSettings.GroupSize).cast(LongType))
37 | .as[Data]
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-java-aggregation-output/src/main/java/pipelines/examples/carly/output/InvalidRecordEgress.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.output;
2 |
3 | import akka.NotUsed;
4 | import akka.kafka.ConsumerMessage.CommittableOffset;
5 | import akka.stream.javadsl.*;
6 |
7 | import pipelines.streamlets.*;
8 | import pipelines.streamlets.avro.*;
9 | import pipelines.akkastream.*;
10 | import pipelines.akkastream.javadsl.*;
11 | import pipelines.examples.carly.data.*;
12 |
13 | public class InvalidRecordEgress extends AkkaStreamlet {
14 | public AvroInlet in = AvroInlet.create("in", InvalidRecord.class);
15 |
16 | private Object doPrint(final InvalidRecord record) {
17 | System.out.println(record);
18 | return record;
19 | }
20 |
21 | @Override public StreamletShape shape() {
22 | return StreamletShape.createWithInlets(in);
23 | }
24 |
25 | @Override
26 | public StreamletLogic createLogic() {
27 | return new RunnableGraphStreamletLogic(getStreamletContext()) {
28 | @Override
29 | public RunnableGraph> createRunnableGraph() {
30 | return getSourceWithOffsetContext(in)
31 | .via(flowWithContext())
32 | .to(getSinkWithOffsetContext());
33 | }
34 | };
35 | }
36 |
37 | private FlowWithContext flowWithContext() {
38 | return FlowWithOffsetContext.create().map(metric -> doPrint(metric));
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/JsonFormats.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import java.time.Instant
4 | import java.util.UUID
5 |
6 | import scala.util.Try
7 |
8 | import spray.json._
9 |
10 | trait UUIDJsonSupport extends DefaultJsonProtocol {
11 | implicit object UUIDFormat extends JsonFormat[UUID] {
12 | def write(uuid: UUID) = JsString(uuid.toString)
13 |
14 | def read(json: JsValue): UUID = json match {
15 | case JsString(uuid) ⇒ Try(UUID.fromString(uuid)).getOrElse(deserializationError(s"Expected valid UUID but got '$uuid'."))
16 | case other ⇒ deserializationError(s"Expected UUID as JsString, but got: $other")
17 | }
18 | }
19 | }
20 |
21 | trait InstantJsonSupport extends DefaultJsonProtocol {
22 | implicit object InstantFormat extends JsonFormat[Instant] {
23 | def write(instant: Instant) = JsNumber(instant.toEpochMilli)
24 |
25 | def read(json: JsValue): Instant = json match {
26 | case JsNumber(value) ⇒ Instant.ofEpochMilli(value.toLong)
27 | case other ⇒ deserializationError(s"Expected Instant as JsNumber, but got: $other")
28 | }
29 | }
30 | }
31 |
32 | object MeasurementsJsonSupport extends DefaultJsonProtocol {
33 | implicit val measurementFormat = jsonFormat3(Measurements.apply)
34 | }
35 |
36 | object SensorDataJsonSupport extends DefaultJsonProtocol with UUIDJsonSupport with InstantJsonSupport {
37 | import MeasurementsJsonSupport._
38 | implicit val sensorDataFormat = jsonFormat3(SensorData.apply)
39 | }
40 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/ValidMetricLogger.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import pipelines.akkastream._
4 | import pipelines.akkastream.scaladsl._
5 | import pipelines.streamlets._
6 | import pipelines.streamlets.avro._
7 |
8 | class ValidMetricLogger extends AkkaStreamlet {
9 | val inlet = AvroInlet[Metric]("in")
10 | val shape = StreamletShape.withInlets(inlet)
11 |
12 | val LogLevel = RegExpConfigParameter(
13 | "log-level",
14 | "Provide one of the following log levels, debug, info, warning or error",
15 | "^debug|info|warning|error$",
16 | Some("debug")
17 | )
18 |
19 | val MsgPrefix = StringConfigParameter(
20 | "msg-prefix",
21 | "Provide a prefix for the log lines",
22 | Some("valid-logger"))
23 |
24 | override def configParameters = Vector(LogLevel, MsgPrefix)
25 |
26 | override def createLogic = new RunnableGraphStreamletLogic() {
27 | val logF: String ⇒ Unit = streamletConfig.getString(LogLevel.key).toLowerCase match {
28 | case "debug" ⇒ system.log.debug _
29 | case "info" ⇒ system.log.info _
30 | case "warning" ⇒ system.log.warning _
31 | case "error" ⇒ system.log.error _
32 | }
33 |
34 | val msgPrefix = streamletConfig.getString(MsgPrefix.key)
35 |
36 | def log(metric: Metric) = {
37 | logF(s"$msgPrefix $metric")
38 | }
39 |
40 | def flow = {
41 | FlowWithOffsetContext[Metric]
42 | .map { validMetric ⇒
43 | log(validMetric)
44 | validMetric
45 | }
46 | }
47 |
48 | def runnableGraph = {
49 | sourceWithOffsetContext(inlet).via(flow).to(sinkWithOffsetContext)
50 | }
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/spark-sensors/src/main/scala/pipelines/example/SparkRandomGenDataIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import java.sql.Timestamp
4 |
5 | import scala.util.Random
6 |
7 | import pipelines.streamlets.{ IntegerConfigParameter, StreamletShape }
8 | import pipelines.streamlets.avro._
9 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
10 | import org.apache.spark.sql.Dataset
11 | import org.apache.spark.sql.streaming.OutputMode
12 |
13 | import pipelines.spark.sql.SQLImplicits._
14 |
15 | case class Rate(timestamp: Timestamp, value: Long)
16 |
17 | class SparkRandomGenDataIngress extends SparkStreamlet {
18 | val out = AvroOutlet[Data]("out", d ⇒ d.src)
19 | val shape = StreamletShape(out)
20 |
21 | val RecordsPerSecond = IntegerConfigParameter(
22 | "records-per-second",
23 | "Records per second to produce.",
24 | Some(50))
25 |
26 | override def configParameters = Vector(RecordsPerSecond)
27 |
28 | override def createLogic() = new SparkStreamletLogic {
29 |
30 | override def buildStreamingQueries = {
31 | writeStream(process, out, OutputMode.Append).toQueryExecution
32 | }
33 |
34 | private def process: Dataset[Data] = {
35 |
36 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
37 |
38 | val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas"
39 |
40 | val rateStream = session.readStream
41 | .format("rate")
42 | .option("rowsPerSecond", recordsPerSecond)
43 | .load()
44 | .as[Rate]
45 |
46 | rateStream.map {
47 | case Rate(timestamp, value) ⇒ Data(s"src-${value % 100}", timestamp.getTime, gaugeGen(), Random.nextDouble() * value)
48 | }
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/spark-sensors/build.sbt:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbt.Keys._
3 |
4 | import scalariform.formatter.preferences._
5 |
6 | lazy val sparkSensors = (project in file("."))
7 | .enablePlugins(PipelinesSparkApplicationPlugin)
8 | .settings(
9 | libraryDependencies ++= Seq(
10 | "ch.qos.logback" % "logback-classic" % "1.2.3",
11 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
12 | ),
13 |
14 | name := "spark-sensors",
15 | organization := "com.lightbend",
16 |
17 | scalaVersion := "2.12.10",
18 | crossScalaVersions := Vector(scalaVersion.value),
19 | scalacOptions ++= Seq(
20 | "-encoding", "UTF-8",
21 | "-target:jvm-1.8",
22 | "-Xlog-reflective-calls",
23 | "-Xlint",
24 | "-Ywarn-unused",
25 | "-Ywarn-unused-import",
26 | "-deprecation",
27 | "-feature",
28 | "-language:_",
29 | "-unchecked"
30 | ),
31 |
32 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
33 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
34 |
35 | scalariformPreferences := scalariformPreferences.value
36 | .setPreference(AlignParameters, false)
37 | .setPreference(AlignSingleLineCaseStatements, true)
38 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
39 | .setPreference(DoubleIndentConstructorArguments, true)
40 | .setPreference(DoubleIndentMethodDeclaration, true)
41 | .setPreference(RewriteArrowSymbols, true)
42 | .setPreference(DanglingCloseParenthesis, Preserve)
43 | .setPreference(NewlineAtEndOfFile, true)
44 | .setPreference(AllowParamGroupsOnNewlines, true)
45 | )
46 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/java/pipelines/examples/sensordata/MetricsValidation.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata;
2 |
3 | import akka.stream.javadsl.*;
4 |
5 | import akka.NotUsed;
6 | import akka.actor.*;
7 | import akka.kafka.ConsumerMessage.CommittableOffset;
8 | import akka.stream.*;
9 |
10 | import com.typesafe.config.Config;
11 |
12 | import pipelines.streamlets.*;
13 | import pipelines.streamlets.avro.*;
14 | import pipelines.akkastream.*;
15 | import pipelines.akkastream.javadsl.util.Either;
16 | import pipelines.akkastream.util.javadsl.*;
17 |
18 | public class MetricsValidation extends AkkaStreamlet {
19 | AvroInlet inlet = AvroInlet.create("in", Metric.class);
20 | AvroOutlet invalidOutlet = AvroOutlet.create("invalid", m -> m.metric.toString(), InvalidMetric.class);
21 | AvroOutlet validOutlet = AvroOutlet.create("valid", m -> m.getDeviceId().toString() + m.getTimestamp().toString(), Metric.class);
22 |
23 | public StreamletShape shape() {
24 | return StreamletShape.createWithInlets(inlet).withOutlets(invalidOutlet, validOutlet);
25 | }
26 |
27 | public SplitterLogic createLogic() {
28 | return new SplitterLogic(inlet, invalidOutlet, validOutlet, getStreamletContext()) {
29 | public FlowWithContext, CommittableOffset, NotUsed> createFlow() {
30 | return createFlowWithOffsetContext()
31 | .map(metric -> {
32 | if (!SensorDataUtils.isValidMetric(metric)) return Either.left(new InvalidMetric(metric, "All measurements must be positive numbers!"));
33 | else return Either.right(metric);
34 | });
35 | }
36 | };
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/spark-resilience-test/build.sbt:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbt.Keys._
3 |
4 | import scalariform.formatter.preferences._
5 |
6 | lazy val sparkSensors = (project in file("."))
7 | .enablePlugins(PipelinesSparkApplicationPlugin)
8 | .settings(
9 | libraryDependencies ++= Seq(
10 | "ch.qos.logback" % "logback-classic" % "1.2.3",
11 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
12 | ),
13 |
14 | name := "spark-resilience-test",
15 | organization := "com.lightbend",
16 |
17 | scalaVersion := "2.12.10",
18 | crossScalaVersions := Vector(scalaVersion.value),
19 | scalacOptions ++= Seq(
20 | "-encoding", "UTF-8",
21 | "-target:jvm-1.8",
22 | "-Xlog-reflective-calls",
23 | "-Xlint",
24 | "-Ywarn-unused",
25 | "-Ywarn-unused-import",
26 | "-deprecation",
27 | "-feature",
28 | "-language:_",
29 | "-unchecked"
30 | ),
31 |
32 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
33 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
34 |
35 | scalariformPreferences := scalariformPreferences.value
36 | .setPreference(AlignParameters, false)
37 | .setPreference(AlignSingleLineCaseStatements, true)
38 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
39 | .setPreference(DoubleIndentConstructorArguments, true)
40 | .setPreference(DoubleIndentMethodDeclaration, true)
41 | .setPreference(RewriteArrowSymbols, true)
42 | .setPreference(DanglingCloseParenthesis, Preserve)
43 | .setPreference(NewlineAtEndOfFile, true)
44 | .setPreference(AllowParamGroupsOnNewlines, true)
45 | )
46 |
--------------------------------------------------------------------------------
/flink-taxi-ride/logger/src/main/scala/pipelines/examples/logger/FarePerRideLogger.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples
2 | package logger
3 |
4 | import pipelines.akkastream._
5 | import pipelines.akkastream.scaladsl._
6 | import pipelines.streamlets._
7 | import pipelines.streamlets.avro._
8 | import pipelines.flink.avro._
9 |
10 | class FarePerRideLogger extends AkkaStreamlet {
11 | val inlet = AvroInlet[TaxiRideFare]("in")
12 | val shape = StreamletShape.withInlets(inlet)
13 |
14 | val LogLevel = RegExpConfigParameter(
15 | "log-level",
16 | "Provide one of the following log levels, debug, info, warning or error",
17 | "^debug|info|warning|error$",
18 | Some("info")
19 | )
20 |
21 | val MsgPrefix = StringConfigParameter(
22 | "msg-prefix",
23 | "Provide a prefix for the log lines",
24 | Some("valid-logger"))
25 |
26 | override def configParameters = Vector(LogLevel, MsgPrefix)
27 |
28 | override def createLogic = new RunnableGraphStreamletLogic() {
29 | val logF: String ⇒ Unit = streamletConfig.getString(LogLevel.key).toLowerCase match {
30 | case "debug" ⇒ system.log.debug _
31 | case "info" ⇒ system.log.info _
32 | case "warning" ⇒ system.log.warning _
33 | case "error" ⇒ system.log.error _
34 | }
35 |
36 | val msgPrefix = streamletConfig.getString(MsgPrefix.key)
37 |
38 | def log(rideFare: TaxiRideFare) = {
39 | logF(s"$msgPrefix $rideFare")
40 | }
41 |
42 | def flow = {
43 | FlowWithOffsetContext[TaxiRideFare]
44 | .map { taxiRideFare ⇒
45 | log(taxiRideFare)
46 | taxiRideFare
47 | }
48 | }
49 |
50 | def runnableGraph =
51 | sourceWithOffsetContext(inlet)
52 | .via(flow)
53 | .to(sinkWithOffsetContext)
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataToMetrics.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata;
2 |
3 | import java.util.Arrays;
4 |
5 | import akka.stream.javadsl.*;
6 | import akka.kafka.ConsumerMessage.CommittableOffset;
7 |
8 | import akka.NotUsed;
9 |
10 | import pipelines.streamlets.*;
11 | import pipelines.streamlets.avro.*;
12 | import pipelines.akkastream.*;
13 | import pipelines.akkastream.javadsl.*;
14 |
15 | public class SensorDataToMetrics extends AkkaStreamlet {
16 | AvroInlet in = AvroInlet.create("in", SensorData.class);
17 | AvroOutlet out = AvroOutlet.create("out", Metric.class)
18 | .withPartitioner(RoundRobinPartitioner.getInstance());
19 |
20 | public StreamletShape shape() {
21 | return StreamletShape.createWithInlets(in).withOutlets(out);
22 | }
23 |
24 | private FlowWithContext flowWithContext() {
25 | return FlowWithOffsetContext.create()
26 | .mapConcat(data ->
27 | Arrays.asList(
28 | new Metric(data.getDeviceId(), data.getTimestamp(), "power", data.getMeasurements().getPower()),
29 | new Metric(data.getDeviceId(), data.getTimestamp(), "rotorSpeed", data.getMeasurements().getRotorSpeed()),
30 | new Metric(data.getDeviceId(), data.getTimestamp(), "windSpeed", data.getMeasurements().getWindSpeed())
31 | )
32 | );
33 | }
34 |
35 | public StreamletLogic createLogic() {
36 | return new RunnableGraphStreamletLogic(getStreamletContext()) {
37 | public RunnableGraph createRunnableGraph() {
38 | return getSourceWithOffsetContext(in).via(flowWithContext()).to(getSinkWithOffsetContext(out));
39 | }
40 | };
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/call-record-aggregator/spark-aggregation/src/test/scala/pipelines/examples/carly/aggregator/CallStatsAggregatorSpec.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.aggregator
2 |
3 | import java.time.Instant
4 | import java.time.temporal.ChronoUnit
5 |
6 | import scala.concurrent.duration._
7 |
8 | import scala.util.Random
9 |
10 | import pipelines.examples.carly.data._
11 |
12 | import pipelines.spark.testkit._
13 | import pipelines.spark.sql.SQLImplicits._
14 |
15 | class CallStatsAggregatorSpec extends SparkScalaTestSupport {
16 |
17 | val streamlet = new CallStatsAggregator()
18 | val testKit = SparkStreamletTestkit(session).withConfigParameterValues(
19 | ConfigParameterValue(streamlet.GroupByWindow, "1 minute"),
20 | ConfigParameterValue(streamlet.Watermark, "1 minute"))
21 |
22 | "CallStatsAggregator" should {
23 | "produce elements to its outlet" in {
24 |
25 | // setup inlet tap on inlet port
26 | val in = testKit.inletAsTap[CallRecord](streamlet.in)
27 |
28 | // setup outlet tap on outlet port
29 | val out = testKit.outletAsTap[AggregatedCallStats](streamlet.out)
30 |
31 | val maxUsers = 10
32 | val crs = (1 to 30).toList.map { i ⇒
33 | CallRecord(
34 | s"user-${Random.nextInt(maxUsers)}",
35 | s"user-${Random.nextInt(maxUsers)}",
36 | (if (i % 2 == 0) "incoming" else "outgoing"),
37 | Random.nextInt(50),
38 | Instant.now.minus(Random.nextInt(40), ChronoUnit.MINUTES).toEpochMilli / 1000
39 | )
40 | }
41 |
42 | in.addData(crs)
43 |
44 | testKit.run(streamlet, Seq(in), Seq(out), 30.seconds)
45 |
46 | // get data from outlet tap
47 | val results = out.asCollection(session)
48 |
49 | // assert
50 | results.size must be > 0
51 | }
52 | }
53 | }
54 |
55 |
--------------------------------------------------------------------------------
/mixed-sensors/build.sbt:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbt.Keys._
3 |
4 | import scalariform.formatter.preferences._
5 |
6 | lazy val mixedSensors = (project in file("."))
7 | .enablePlugins(PipelinesSparkApplicationPlugin,
8 | PipelinesAkkaStreamsApplicationPlugin)
9 | .settings(
10 | libraryDependencies ++= Seq(
11 | "ch.qos.logback" % "logback-classic" % "1.2.3",
12 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
13 | ),
14 |
15 | name := "mixed-sensors",
16 | organization := "com.lightbend",
17 |
18 | scalaVersion := "2.12.10",
19 | crossScalaVersions := Vector(scalaVersion.value),
20 | scalacOptions ++= Seq(
21 | "-encoding", "UTF-8",
22 | "-target:jvm-1.8",
23 | "-Xlog-reflective-calls",
24 | "-Xlint",
25 | "-Ywarn-unused",
26 | "-Ywarn-unused-import",
27 | "-deprecation",
28 | "-feature",
29 | "-language:_",
30 | "-unchecked"
31 | ),
32 |
33 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
34 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
35 |
36 | scalariformPreferences := scalariformPreferences.value
37 | .setPreference(AlignParameters, false)
38 | .setPreference(AlignSingleLineCaseStatements, true)
39 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
40 | .setPreference(DoubleIndentConstructorArguments, true)
41 | .setPreference(DoubleIndentMethodDeclaration, true)
42 | .setPreference(RewriteArrowSymbols, true)
43 | .setPreference(DanglingCloseParenthesis, Preserve)
44 | .setPreference(NewlineAtEndOfFile, true)
45 | .setPreference(AllowParamGroupsOnNewlines, true)
46 | )
47 |
--------------------------------------------------------------------------------
/flink-taxi-ride/test-data/nycTaxiRides-small.json:
--------------------------------------------------------------------------------
1 | {"driverId":2013000006,"endLat":40.76491165161133,"endLon":-73.96133422851562,"endTime":0,"isStart":true,"passengerCnt":6,"rideId":6,"startLat":40.77109146118164,"startLon":-73.86613464355469,"startTime":1356998400000,"taxiId":2013000006}
2 | {"driverId":2013000011,"endLat":40.771759033203125,"endLon":-73.7923583984375,"endTime":0,"isStart":true,"passengerCnt":1,"rideId":11,"startLat":40.77376937866211,"startLon":-73.87083435058594,"startTime":1356998400000,"taxiId":2013000011}
3 | {"driverId":2013000055,"endLat":40.681209564208984,"endLon":-73.8050537109375,"endTime":0,"isStart":true,"passengerCnt":1,"rideId":55,"startLat":40.7739143371582,"startLon":-73.87117004394531,"startTime":1356998400000,"taxiId":2013000055}
4 | {"driverId":2013000031,"endLat":40.74075698852539,"endLon":-73.97993469238281,"endTime":0,"isStart":true,"passengerCnt":2,"rideId":31,"startLat":40.8077278137207,"startLon":-73.9293441772461,"startTime":1356998400000,"taxiId":2013000031}
5 | {"driverId":2013000006,"endLat":40.76491165161133,"endLon":-73.96133422851562,"endTime":1356999420000,"isStart":false,"passengerCnt":6,"rideId":6,"startLat":40.77109146118164,"startLon":-73.86613464355469,"startTime":1356998400000,"taxiId":2013000006}
6 | {"driverId":2013000011,"endLat":40.771759033203125,"endLon":-73.7923583984375,"endTime":1356999300000,"isStart":false,"passengerCnt":1,"rideId":11,"startLat":40.77376937866211,"startLon":-73.87083435058594,"startTime":1356998400000,"taxiId":2013000011}
7 | {"driverId":2013000055,"endLat":40.681209564208984,"endLon":-73.8050537109375,"endTime":1356999060000,"isStart":false,"passengerCnt":1,"rideId":55,"startLat":40.7739143371582,"startLon":-73.87117004394531,"startTime":1356998400000,"taxiId":2013000055}
8 | {"driverId":2013000031,"endLat":40.74075698852539,"endLon":-73.97993469238281,"endTime":1356999120000,"isStart":false,"passengerCnt":2,"rideId":31,"startLat":40.8077278137207,"startLon":-73.9293441772461,"startTime":1356998400000,"taxiId":2013000031}
9 |
--------------------------------------------------------------------------------
/warez/akka-streamlets/src/main/scala/warez/ElasticSearchClient.scala:
--------------------------------------------------------------------------------
1 | package warez
2 |
3 | import akka.NotUsed
4 | import akka.kafka.ConsumerMessage.CommittableOffset
5 | import akka.stream.alpakka.elasticsearch.{ ReadResult, WriteMessage, WriteResult }
6 | import akka.stream.alpakka.elasticsearch.scaladsl.{ ElasticsearchFlow, ElasticsearchSource }
7 | import akka.stream.scaladsl.Source
8 |
9 | import org.apache.http.HttpHost
10 | import org.elasticsearch.client.RestClient
11 | import spray.json.{ JsObject, JsonFormat }
12 |
13 | import pipelines.akkastream.scaladsl.FlowWithOffsetContext
14 |
15 | /**
16 | * Alpakka Kafka graph stages used to index and search for Warez domain entities.
17 | */
18 | object ElasticSearchClient {
19 | case class Config(hostname: String, port: Int, indexName: String, typeName: String = "_doc")
20 |
21 | /**
22 | * Factory method for `ElasticSearchClient`. Uses Context Bound on `JsonFormat` to make the type of our domain
23 | * entity visible (i.e. `Product`) as well as an implicit json format (i.e. `JsonFormat[Product]`).
24 | */
25 | def apply[T: JsonFormat](config: Config): ElasticSearchClient[T] =
26 | new ElasticSearchClient(config)
27 | }
28 |
29 | class ElasticSearchClient[T: JsonFormat](config: ElasticSearchClient.Config) {
30 | import config._
31 |
32 | /**
33 | * An ElasticSearch REST client used by Alpakka ElasticSearch to connect to the ES API.
34 | */
35 | implicit val esClient: RestClient = RestClient.builder(new HttpHost(hostname, port)).build()
36 |
37 | def indexFlow(): FlowWithOffsetContext[WriteMessage[T, NotUsed], WriteResult[T, CommittableOffset]] =
38 | ElasticsearchFlow.createWithContext[T, CommittableOffset](indexName, typeName)
39 |
40 | def querySource(searchCriteria: String): Source[ReadResult[JsObject], NotUsed] =
41 | ElasticsearchSource
42 | .create(indexName, typeName, query = s"""{
43 | "bool": {
44 | "must": {
45 | "query_string": {
46 | "query": "$searchCriteria"
47 | }
48 | }
49 | }
50 | }""")
51 | }
52 |
--------------------------------------------------------------------------------
/sensor-data-java/build.sbt:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbt.Keys._
3 |
4 | import scalariform.formatter.preferences._
5 |
6 | lazy val sensorDataJava = (project in file("."))
7 | .enablePlugins(PipelinesAkkaStreamsApplicationPlugin)
8 | .settings(
9 | libraryDependencies ++= Seq(
10 | "com.lightbend.akka" %% "akka-stream-alpakka-file" % "1.1.2",
11 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10",
12 | "ch.qos.logback" % "logback-classic" % "1.2.3",
13 | "org.scalatest" %% "scalatest" % "3.0.8" % "test",
14 | "junit" % "junit" % "4.12" % "test"
15 | ),
16 |
17 | name := "sensor-data-java",
18 | organization := "com.lightbend",
19 |
20 | schemaCodeGenerator := SchemaCodeGenerator.Java,
21 |
22 | scalaVersion := "2.12.10",
23 | crossScalaVersions := Vector(scalaVersion.value),
24 | scalacOptions ++= Seq(
25 | "-encoding", "UTF-8",
26 | "-target:jvm-1.8",
27 | "-Xlog-reflective-calls",
28 | "-Xlint",
29 | "-Ywarn-unused",
30 | "-Ywarn-unused-import",
31 | "-deprecation",
32 | "-feature",
33 | "-language:_",
34 | "-unchecked"
35 | ),
36 |
37 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
38 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
39 |
40 | scalariformPreferences := scalariformPreferences.value
41 | .setPreference(AlignParameters, false)
42 | .setPreference(AlignSingleLineCaseStatements, true)
43 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
44 | .setPreference(DoubleIndentConstructorArguments, true)
45 | .setPreference(DoubleIndentMethodDeclaration, true)
46 | .setPreference(RewriteArrowSymbols, true)
47 | .setPreference(DanglingCloseParenthesis, Preserve)
48 | .setPreference(NewlineAtEndOfFile, true)
49 | .setPreference(AllowParamGroupsOnNewlines, true)
50 | )
51 |
--------------------------------------------------------------------------------
/sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataFileIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata
2 |
3 | import java.nio.file
4 | import java.nio.file._
5 |
6 | import akka.NotUsed
7 | import akka.stream.IOResult
8 | import akka.stream.alpakka.file.scaladsl.Directory
9 | import akka.stream.scaladsl._
10 | import akka.util.ByteString
11 | import pipelines.akkastream._
12 | import pipelines.akkastream.scaladsl._
13 | import pipelines.streamlets._
14 | import pipelines.streamlets.avro._
15 | import spray.json.JsonParser
16 |
17 | import scala.concurrent.Future
18 | import scala.concurrent.duration._
19 |
20 | class SensorDataFileIngress extends AkkaStreamlet {
21 |
22 | import SensorDataJsonSupport._
23 |
24 | val out = AvroOutlet[SensorData]("out").withPartitioner(RoundRobinPartitioner)
25 | def shape = StreamletShape.withOutlets(out)
26 |
27 | private val sourceData = VolumeMount("source-data-mount", "/mnt/data", ReadWriteMany)
28 |
29 | override def volumeMounts = Vector(sourceData)
30 |
31 | // Streamlet processing steps
32 | // 1. Every X seconds
33 | // 2. Enumerate all files in the mounted path
34 | // 3. Read each file *)
35 | // 4. Deserialize file content to a SensorData value *)
36 |
37 | // *) Note that reading and deserializing the file content is done in separate steps for readability only, in production they should be merged into one step for performance reasons.
38 |
39 | override def createLogic = new RunnableGraphStreamletLogic() {
40 | val listFiles: NotUsed ⇒ Source[file.Path, NotUsed] = { _ ⇒ Directory.ls(getMountedPath(sourceData)) }
41 | val readFile: Path ⇒ Source[ByteString, Future[IOResult]] = { path: Path ⇒ FileIO.fromPath(path).via(JsonFraming.objectScanner(Int.MaxValue)) }
42 | val parseFile: ByteString ⇒ SensorData = { jsonByteString ⇒ JsonParser(jsonByteString.utf8String).convertTo[SensorData] }
43 |
44 | val emitFromFilesContinuously = Source.tick(1.second, 5.second, NotUsed)
45 | .flatMapConcat(listFiles)
46 | .flatMapConcat(readFile)
47 | .map(parseFile)
48 | def runnableGraph = emitFromFilesContinuously.to(plainSink(out))
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/SparkRandomGenDataIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import java.sql.Timestamp
4 |
5 | import scala.util.Random
6 |
7 | import pipelines.streamlets.{ DurationConfigParameter, IntegerConfigParameter, StreamletShape }
8 | import pipelines.streamlets.avro._
9 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
10 | import org.apache.spark.sql.Dataset
11 | import org.apache.spark.sql.streaming.{ OutputMode, Trigger }
12 |
13 | import pipelines.spark.sql.SQLImplicits._
14 |
15 | case class Rate(timestamp: Timestamp, value: Long)
16 |
17 | class SparkRandomGenDataIngress extends SparkStreamlet {
18 | val out = AvroOutlet[Data]("out", d ⇒ d.src)
19 | val shape = StreamletShape(out)
20 |
21 | val RecordsPerSecond = IntegerConfigParameter(
22 | "records-per-second",
23 | "Records per second to produce.",
24 | Some(50))
25 |
26 | val RampUpTime = DurationConfigParameter(
27 | "ramp-up-time",
28 | "Time to reach max records per second.",
29 | Some("0 seconds"))
30 |
31 | override def configParameters = Vector(RecordsPerSecond, RampUpTime)
32 |
33 | override def createLogic() = new SparkStreamletLogic {
34 |
35 | override def buildStreamingQueries = {
36 | writeStream(process, out, OutputMode.Append).toQueryExecution
37 | }
38 |
39 | private def process: Dataset[Data] = {
40 |
41 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
42 | val rampUpTime = context.streamletConfig.getDuration(RampUpTime.key, java.util.concurrent.TimeUnit.SECONDS)
43 | println(s"Using rampup time of $rampUpTime seconds")
44 |
45 | val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas"
46 |
47 | val rateStream = session.readStream
48 | .format("rate")
49 | .option("rowsPerSecond", recordsPerSecond)
50 | .option("rampUpTime", s"${rampUpTime}s")
51 | .load()
52 | .as[Rate]
53 |
54 | rateStream.map {
55 | case Rate(timestamp, value) ⇒ Data(s"src-${value % 1000}", timestamp.getTime, None, None, gaugeGen(), value)
56 | }
57 | }
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/call-record-aggregator/datamodel/data/data-sample-20.json:
--------------------------------------------------------------------------------
1 | {"user":"07610039694","other":"07434677419","direction":"Incoming","duration":211,"timestamp":1284574664}
2 | {"user":"07641036117","other":"01666472054","direction":"Outgoing","duration":31,"timestamp":319101503}
3 | {"user":"07641036117","other":"07371326239","direction":"Incoming","duration":45,"timestamp":319103142}
4 | {"user":"07641036117","other":"07681546436","direction":"Outgoing","duration":10,"timestamp":319104282}
5 | {"user":"07641036117","other":"07681546436","direction":"Outgoing","duration":0,"timestamp":319104331}
6 | {"user":"07641036117","other":"07681546436","direction":"Incoming","duration":0,"timestamp":319104378}
7 | {"user":"07641036117","other":"07981267897","direction":"Outgoing","duration":0,"timestamp":319104391}
8 | {"user":"07641036117","other":"07588304495","direction":"Incoming","duration":124,"timestamp":1284057337}
9 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":474,"timestamp":1284054224}
10 | {"user":"07981267897","other":"07743039441","direction":"Missed","duration":0,"timestamp":1284058290}
11 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":0,"timestamp":1284062275}
12 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":605,"timestamp":1284146220}
13 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":1,"timestamp":1284219896}
14 | {"user":"07981267897","other":"07743039441","direction":"Outgoing","duration":59,"timestamp":1284220859}
15 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":1201,"timestamp":1284386006}
16 | {"user":"07981267897","other":"07641036117","direction":"Outgoing","duration":2,"timestamp":1284445157}
17 | {"user":"07163185791","other":"01850897526","direction":"Outgoing","duration":0,"timestamp":1284062712}
18 | {"user":"07163185791","other":"07066875066","direction":"Outgoing","duration":0,"timestamp":1284138862}
19 | {"user":"07163185791","other":"07066875066","direction":"Outgoing","duration":0,"timestamp":1284138886}
20 | {"user":"07163185791","other":"07691640598","direction":"Outgoing","duration":0,"timestamp":1284138976}
21 |
--------------------------------------------------------------------------------
/warez/spark-streamlets/src/test/scala/pipelines/example/warez/SparkProductJoinerKitSpec.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example.warez
2 |
3 | import scala.collection.immutable.Seq
4 | import scala.concurrent.duration._
5 |
6 | import pipelines.spark.testkit._
7 | import pipelines.spark.sql.SQLImplicits._
8 | import TestUtils._
9 | import warez._
10 |
11 | class SparkProductJoinerKitSpec extends SparkScalaTestSupport {
12 |
13 | val testKit = SparkStreamletTestkit(session)
14 |
15 | "SparkJoin3" should {
16 | "process streaming data" in {
17 | // create spark streamlet
18 | val join3 = new SparkProductJoiner()
19 |
20 | // setup inlet tap on inlet port
21 | val in0: SparkInletTap[Product] = testKit.inletAsTap[Product](join3.in0)
22 | val in1: SparkInletTap[StockUpdate] = testKit.inletAsTap[StockUpdate](join3.in1)
23 | val in2: SparkInletTap[PriceUpdate] = testKit.inletAsTap[PriceUpdate](join3.in2)
24 |
25 | // setup outlet tap on outlet port
26 | val out: SparkOutletTap[Product] = testKit.outletAsTap[Product](join3.out)
27 |
28 | val socksId = uuid
29 | val pantsId = uuid
30 | val socksSkus = genSkus()
31 | val pantsSkus = genSkus()
32 | val socks = Product(socksId, "Socks", "Warm in winter", Seq("clothing", "sock", "socks"), socksSkus)
33 | val pants = Product(pantsId, "Pants", "Denim for the masses", Seq("clothing", "pants"), pantsSkus)
34 |
35 | val stockUpdate = StockUpdate(socksId, socksSkus.head.id, 1)
36 | val priceUpdate = PriceUpdate(pantsId, pantsSkus.head.id, 100)
37 |
38 | // build data and send to inlet tap
39 | val data0 = List(socks, pants)
40 | in0.addData(data0)
41 | // try multiple updates
42 | val data1 = (1 to 100).map(_ ⇒ stockUpdate)
43 | in1.addData(data1)
44 | val data2 = List(priceUpdate)
45 | in2.addData(data2)
46 |
47 | testKit.run(join3, Seq(in0, in1, in2), Seq(out), 60.seconds)
48 |
49 | // get data from outlet tap
50 | val results = out.asCollection(session)
51 |
52 | results.foreach(println)
53 |
54 | // assert
55 | results must have length 2
56 | results.exists { p ⇒ p.name == "Socks" && p.skus.head.stock.contains(100) }
57 | }
58 | }
59 | }
60 |
61 |
--------------------------------------------------------------------------------
/warez/spark-streamlets/src/test/scala/pipelines/example/warez/SparkProductOperationsSpec.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example.warez
2 |
3 | import org.scalatest.{ Matchers, WordSpec }
4 | import org.scalatest.OptionValues._
5 |
6 | import scala.collection.immutable.Seq
7 | import warez.{ PriceUpdate, Product, Sku, StockUpdate }
8 |
9 | class SparkProductOperationsSpec extends WordSpec with Matchers {
10 |
11 | "A Product" should {
12 | "be updated correctly" in {
13 | val skus = Array(
14 | Sku("1", "Small Hole", Some(10), Some(5)),
15 | Sku("2", "Medium Hole", Some(10), Some(10)),
16 | Sku("3", "Large Hole", Some(15), Some(20))
17 | )
18 | val description = "A cartoon hole that can be applied to any surface."
19 | val keywords = Array("black", "hole", "gag", "plot device", "roger rabbit")
20 |
21 | val p = new Product(
22 | "123456789",
23 | "Acme Portable Hole",
24 | description,
25 | keywords,
26 | skus
27 | )
28 |
29 | val priceUpdate = PriceUpdate(
30 | "123456789",
31 | "1",
32 | 10
33 | )
34 | val stockUpdate = StockUpdate(
35 | "123456789",
36 | "1",
37 | 10
38 | )
39 | val zero = SparkProductJoiner.emptyProduct
40 | val p1 = SparkProductJoiner.updateProduct(zero, Seq(p).toIterator)
41 | p1 == p should equal(true)
42 | val prodPrice = SparkProductJoiner.priceUpdate2Products(priceUpdate)
43 | val p2 = SparkProductJoiner.updateProduct(p1, Seq(prodPrice).toIterator)
44 | p2.skus.find(_.id == "1").value.price should equal(Some(10))
45 | val prodStock = SparkProductJoiner.stockUpdate2Product(stockUpdate)
46 | val p3 = SparkProductJoiner.updateProduct(p2, Seq(prodStock).toIterator)
47 | p3.skus.find(_.id == "1").value.stock should equal(Some(20))
48 | // the same price update should cause no change here
49 | val p4 = SparkProductJoiner.updateProduct(p3, Seq(prodPrice).toIterator)
50 | p4.skus.find(_.id == "1").value.price should equal(Some(10))
51 | p4.skus.find(_.id == "1").value.stock should equal(Some(20))
52 | p4.description should be(description)
53 | p4.keywords should be(keywords)
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/sensor-data-scala/build.sbt:
--------------------------------------------------------------------------------
1 | //tag::docs-projectSetup-example[]
2 | import sbt._
3 | import sbt.Keys._
4 |
5 | import scalariform.formatter.preferences._
6 |
7 | lazy val sensorData = (project in file("."))
8 | .enablePlugins(PipelinesAkkaStreamsApplicationPlugin)
9 | .settings(
10 | //end::docs-projectSetup-example[]
11 | libraryDependencies ++= Seq(
12 | "com.lightbend.akka" %% "akka-stream-alpakka-file" % "1.1.2",
13 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10",
14 | "ch.qos.logback" % "logback-classic" % "1.2.3",
15 | "com.typesafe.akka" %% "akka-http-testkit" % "10.1.10" % "test",
16 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
17 |
18 | //tag::docs-projectName-example[]
19 | ),
20 | name := "sensor-data-scala",
21 | //end::docs-projectName-example[]
22 | organization := "com.lightbend",
23 |
24 | scalaVersion := "2.12.10",
25 | crossScalaVersions := Vector(scalaVersion.value),
26 | scalacOptions ++= Seq(
27 | "-encoding", "UTF-8",
28 | "-target:jvm-1.8",
29 | "-Xlog-reflective-calls",
30 | "-Xlint",
31 | "-Ywarn-unused",
32 | "-Ywarn-unused-import",
33 | "-deprecation",
34 | "-feature",
35 | "-language:_",
36 | "-unchecked"
37 | ),
38 | runLocalConfigFile := Some("resources/local.conf"),
39 |
40 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
41 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
42 |
43 | scalariformPreferences := scalariformPreferences.value
44 | .setPreference(AlignParameters, false)
45 | .setPreference(AlignSingleLineCaseStatements, true)
46 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
47 | .setPreference(DoubleIndentConstructorArguments, true)
48 | .setPreference(DoubleIndentMethodDeclaration, true)
49 | .setPreference(RewriteArrowSymbols, true)
50 | .setPreference(DanglingCloseParenthesis, Preserve)
51 | .setPreference(NewlineAtEndOfFile, true)
52 | .setPreference(AllowParamGroupsOnNewlines, true)
53 | )
54 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-cdr-ingestor/src/test/scala/pipelines/examples/carly/ingestor/CallRecordValidationSpec.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.ingestor
2 |
3 | import java.time.Instant
4 | import java.time.temporal.ChronoUnit
5 |
6 | import akka.actor._
7 | import akka.stream._
8 | import akka.stream.scaladsl._
9 | import akka.testkit._
10 | import org.scalatest._
11 | import org.scalatest.concurrent._
12 |
13 | import pipelines.akkastream.testkit.scaladsl._
14 |
15 | import pipelines.examples.carly.data._
16 |
17 | class CallRecordValidationSpec extends WordSpec with MustMatchers with ScalaFutures with BeforeAndAfterAll {
18 | private implicit val system = ActorSystem("CallRecordValidationSpec")
19 | private implicit val mat = ActorMaterializer()
20 |
21 | override def afterAll: Unit = {
22 | TestKit.shutdownActorSystem(system)
23 | }
24 |
25 | "A CallRecordValidation" should {
26 | "split incoming data into valid call records and those outside the time range" in {
27 | val testkit = AkkaStreamletTestKit(system, mat)
28 | val streamlet = new CallRecordValidation()
29 |
30 | val instant = Instant.now.toEpochMilli / 1000
31 | val past = Instant.now.minus(5000, ChronoUnit.DAYS).toEpochMilli / 1000
32 |
33 | val cr1 = CallRecord("user-1", "user-2", "f", 10L, instant)
34 | val cr2 = CallRecord("user-1", "user-2", "f", 15L, instant)
35 | val cr3 = CallRecord("user-1", "user-2", "f", 18L, instant)
36 | val cr4 = CallRecord("user-1", "user-2", "f", 40L, past)
37 | val cr5 = CallRecord("user-1", "user-2", "f", 70L, past)
38 |
39 | val source = Source(Vector(cr1, cr2, cr3, cr4, cr5))
40 |
41 | val in = testkit.inletFromSource(streamlet.in, source)
42 | val left = testkit.outletAsTap(streamlet.left)
43 | val right = testkit.outletAsTap(streamlet.right)
44 |
45 | testkit.run(streamlet, in, List(left, right), () ⇒ {
46 | right.probe.expectMsg(("user-1", cr1))
47 | right.probe.expectMsg(("user-1", cr2))
48 | right.probe.expectMsg(("user-1", cr3))
49 | left.probe.expectMsg((cr4.toString, InvalidRecord(cr4.toString, "Timestamp outside range!")))
50 | left.probe.expectMsg((cr5.toString, InvalidRecord(cr5.toString, "Timestamp outside range!")))
51 | })
52 |
53 | left.probe.expectMsg(Completed)
54 | right.probe.expectMsg(Completed)
55 | }
56 | }
57 | }
58 |
59 |
--------------------------------------------------------------------------------
/flink-taxi-ride/README.md:
--------------------------------------------------------------------------------
1 | ## Flink based Pipelines Application
2 |
3 | ### Problem Definition
4 |
5 | We work with two data streams, one with `TaxiRide` events generated by a Akka stream streamlet (ingress) and the other with `TaxiFare` events generated by another Akka stream streamlet (ingress). The 2 streams are then connected together through a Flink streamlet based processor which does a stateful enrichment that builds up an aggregate of `TaxiRide` to `TaxiFare` mappings.
6 |
7 | The mapping is then posted on to an Akka stream streamlet (egress) as a tuple.
8 |
9 | ### Sub projects
10 |
11 | The following sub-projects constitute the whole application:
12 |
13 | * `datamodel` - contains the Avro schema for `TaxiRide`, `TaxiFare` and `TaxiRideFare`
14 | * `ingestor` - contains the Akka stream ingresses that read data streams from http
15 | * `processor` - the Flink streamlet that connects the input streams and does stateful processing to generate the output stream
16 | * `logger` - contains the Akka stream egress that writes to Kafka. The logger streamlet has the following configurable parameters:
17 | * `valid-logger.log-level` - Log level for `*-logger` streamlets to log to. e.g. `info`
18 | * `valid-logger.msg-prefix` - Log line prefix for `*-logger` streamlets to include. e.g. `VALID`
19 | * `taxi-ride-pipeline` - the entry point containing the blueprint definition
20 |
21 | ### Build the application
22 |
23 | Here's the sequence of steps that you need to follow:
24 |
25 | ```
26 | $ pwd
27 | .../flink-taxi-ride
28 | $ sbt
29 | $ clean
30 | $ buildAndPublish
31 | ```
32 |
33 | The above will build the application and publish application Docker images to the Docker registry, as configured in `target-env.sbt`.
34 |
35 | > **Note:** You need to copy `target-env.sbt.example` to `target-env.sbt` with appropriate settings for the Docker registry in order for the build and publish to go through.
36 |
37 | The `buildAndPublish` command, if successful, will publish the exact command to use for deployment in the cluster.
38 |
39 | ### Feeding data into the application
40 |
41 | The project comes with scripts that can be used to feed data into the ingresses using http.
42 |
43 | The folder `test-data` contains 2 bash scripts, `send-data-rides.sh` and `send-data-fares.sh` that can be used to feed data through http to the 2 ingresses. You need to change the cluster names in the scripts to match your own environment.
44 |
45 |
--------------------------------------------------------------------------------
/call-record-aggregator/akka-cdr-ingestor/src/test/scala/pipelines/examples/carly/ingestor/CallRecordMergeSpec.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.ingestor
2 |
3 | import java.time.Instant
4 | import java.time.temporal.ChronoUnit
5 |
6 | import akka.actor._
7 | import akka.stream._
8 | import akka.stream.scaladsl._
9 | import akka.testkit._
10 | import org.scalatest._
11 | import org.scalatest.concurrent._
12 |
13 | import pipelines.akkastream.testkit.scaladsl._
14 | import pipelines.examples.carly.data._
15 |
16 | class CallRecordMergeSpec extends WordSpec with MustMatchers with ScalaFutures with BeforeAndAfterAll {
17 |
18 | private implicit val system = ActorSystem("CallRecordMergeSpec")
19 | private implicit val mat = ActorMaterializer()
20 |
21 | override def afterAll: Unit = {
22 | TestKit.shutdownActorSystem(system)
23 | }
24 |
25 | "A CallRecordMerge" should {
26 | "merge incoming data" in {
27 | val testkit = AkkaStreamletTestKit(system, mat)
28 | val streamlet = new CallRecordMerge
29 |
30 | val instant = Instant.now.toEpochMilli / 1000
31 | val past = Instant.now.minus(5000, ChronoUnit.DAYS).toEpochMilli / 1000
32 |
33 | val cr1 = CallRecord("user-1", "user-2", "f", 10L, instant)
34 | val cr2 = CallRecord("user-1", "user-2", "f", 15L, instant)
35 | val cr3 = CallRecord("user-1", "user-2", "f", 18L, instant)
36 | val cr4 = CallRecord("user-1", "user-2", "f", 40L, past)
37 | val cr5 = CallRecord("user-1", "user-2", "f", 70L, past)
38 | val cr6 = CallRecord("user-3", "user-1", "f", 80L, past)
39 |
40 | val source0 = Source(Vector(cr1, cr2, cr3))
41 | val source1 = Source(Vector(cr4, cr5))
42 | val source2 = Source(Vector(cr6))
43 |
44 | val in0 = testkit.inletFromSource(streamlet.in0, source0)
45 | val in1 = testkit.inletFromSource(streamlet.in1, source1)
46 | val in2 = testkit.inletFromSource(streamlet.in2, source2)
47 | val out = testkit.outletAsTap(streamlet.out)
48 |
49 | testkit.run(streamlet, List(in0, in1, in2), out, () ⇒ {
50 | out.probe.expectMsg(("user-1", cr1))
51 | out.probe.expectMsg(("user-1", cr4))
52 | out.probe.expectMsg(("user-3", cr6))
53 | out.probe.expectMsg(("user-1", cr2))
54 | out.probe.expectMsg(("user-1", cr5))
55 | out.probe.expectMsg(("user-1", cr3))
56 | })
57 |
58 | out.probe.expectMsg(Completed)
59 | }
60 | }
61 | }
62 |
63 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pipelines Examples Applications
2 |
3 | ## `sensor-data-scala`
4 |
5 | A simple pipeline that processes events from a wind turbine farm.
6 |
7 | ## `sensor-data-java`
8 |
9 | The same as `sensor-data-scala`, but implemented using the Java DSL.
10 |
11 | ## `call-record-aggregator`
12 |
13 | An aggregation of user call record data (metadata of phone calls).
14 |
15 | ## `spark-sensors`
16 |
17 | A simple pipeline that generates events from energy devices.
18 |
19 | ## `spark-resilience-test`
20 |
21 | A simple pipeline that generates events from energy devices. This pipeline
22 | will fail based on a pre-defined probability percentage. Its purpose is to
23 | demonstrate the failure recovery features of Pipelines and Spark.
24 |
25 | ## `warez`
26 |
27 | An event-based e-commerce streaming platform. Events are generated based on
28 | user events such as purchases and merchant actions such as the addition of
29 | products and their stock numbers.
30 |
31 | # Pipelines Feature Grid
32 |
33 | | Application | Akka Streams (Scala) | Akka Streams (Java) | Spark | Testkit | Ingress | Egress | Auto Data Generation |
34 | |---------------------|----------------------|---------------------|-------|---------|---------|-------------------------|-------------------------|
35 | | `sensor-data-scala` | Yes | No | No | No | HTTP | stdout (logs) | Yes (Client Lua Script) |
36 | | `sensor-data-java` | No | Yes | No | No | HTTP | stdout (logs) | Yes (Client Lua Script) |
37 | | `call-record-aggregator` | Yes | Yes | Yes | Yes | HTTP | stdout (logs) | Yes |
38 | | `spark-sensors` | No | No | Yes | No | HTTP | stdout (logs) | Yes |
39 | | `spark-resilience-test` | Yes | No | No | Yes | HTTP | stdout (logs) | Yes |
40 | | `warez` | Yes | No | Yes | Yes | HTTP | ElasticSearch, HTTP API | No |
41 |
42 | # Running Examples
43 |
44 | Consult the [Pipelines Documentation](https://developer.lightbend.com/docs/pipelines/current/)
45 | for instructions on building, deploying and running Pipelines applications.
46 |
47 | ---
48 |
49 | **NOTE**
50 |
51 | Before building any examples remember to update the Docker registry in the `target-env.sbt` file.
52 |
53 | ---
54 |
--------------------------------------------------------------------------------
/sensor-data-java/src/test/java/pipelines/examples/sensordata/MetricsValidationTest.java:
--------------------------------------------------------------------------------
1 | package pipelines.examples.sensordata;
2 |
3 | import java.util.*;
4 |
5 | import scala.concurrent.duration.Duration;
6 |
7 | import akka.NotUsed;
8 | import akka.actor.ActorSystem;
9 | import akka.japi.Pair;
10 | import akka.kafka.ConsumerMessage.CommittableOffset;
11 | import akka.stream.ActorMaterializer;
12 | import akka.stream.javadsl.*;
13 | import akka.stream.javadsl.Flow;
14 | import akka.testkit.TestKit;
15 | import pipelines.akkastream.*;
16 | import pipelines.akkastream.javadsl.util.*;
17 | import pipelines.akkastream.testkit.OutletTap;
18 | import pipelines.akkastream.testkit.javadsl.*;
19 |
20 | import pipelines.streamlets.*;
21 | import pipelines.streamlets.avro.*;
22 | import pipelines.streamlets.descriptors.*;
23 |
24 | import org.apache.avro.Schema;
25 | import org.scalatest.junit.JUnitSuite;
26 | import org.junit.*;
27 | import static org.junit.Assert.*;
28 |
29 | public class MetricsValidationTest extends JUnitSuite {
30 | static ActorMaterializer mat;
31 | static ActorSystem system;
32 |
33 | @BeforeClass
34 | public static void setUp() throws Exception {
35 | system = ActorSystem.create();
36 | mat = ActorMaterializer.create(system);
37 | }
38 |
39 | @AfterClass
40 | public static void tearDown() throws Exception {
41 | TestKit.shutdownActorSystem(system, Duration.create(10, "seconds"), false);
42 | system = null;
43 | }
44 |
45 | @Test
46 | public void shouldProcessInvalidMetric() {
47 | MetricsValidation streamlet = new MetricsValidation();
48 | AkkaStreamletTestKit testkit = AkkaStreamletTestKit.create(system, mat);
49 |
50 | QueueInletTap in = testkit.makeInletAsTap(streamlet.inlet);
51 | ProbeOutletTap valid = testkit.makeOutletAsTap(streamlet.validOutlet);
52 | ProbeOutletTap invalid = testkit.makeOutletAsTap(streamlet.invalidOutlet);
53 | long timestamp = System.currentTimeMillis();
54 | Metric metric = new Metric("dev1", timestamp, "metric-name", -1.0d);
55 | in.queue().offer(metric);
56 | InvalidMetric expectedInvalidMetric = new InvalidMetric(metric, "All measurements must be positive numbers!");
57 |
58 | String expectedKey = streamlet.invalidOutlet.partitioner().apply(expectedInvalidMetric);
59 | List> outlets = Arrays.asList(new OutletTap[] {valid, invalid});
60 |
61 | testkit.run(streamlet, in, outlets, () -> {
62 | return invalid.probe().expectMsg(new Pair(expectedKey, expectedInvalidMetric));
63 | });
64 |
65 | invalid.probe().expectMsg(Completed.completed());
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/warez/ml-training/README.md:
--------------------------------------------------------------------------------
1 | # Recommender model generation
2 |
3 | The iPython notebook does the following:
4 |
5 | 1. Reads data from the `data/` folder. The `data` folder contains 2 variant of data files - 1 containing a large dataset and the other a smaller one.
6 | 2. Builds a neural network model for learning
7 | 3. Runs the training
8 | 4. Exports the model in TensorFlow format
9 | 5. Generates an Avro binary file containing all information needed to be transferred to the model serving streamlet
10 |
11 | **Note:** In the current implementation of the notebook, we have the `model_path` hardcoded. This is the folder where all models, graphs, avro files are generated. Needs to change appropriately when running the notebook.
12 |
13 |
14 | ## Mapping Ids
15 |
16 | In `warez` the product (sku) ids and customer ids are modeled as strings (UUIDs) while the machine learning classifier neural network needs integers. Hence we do a mapping of the UUIDs to a unique integer value for all the customer and product ids.
17 |
18 | This mapping information also needs to be exported along with the model itself. The notebook also does this.
19 |
20 | ## Model Id
21 |
22 | In the current implementation, the model id for the generated model is specified as "recommender-model-[current timestamp]". This id will be present in the final avro that the notebook generates.
23 |
24 | ## Model Avro Schema
25 |
26 | The avro file that the notebook generates is based on the schema present in `avro/` folder, named `RecommenderModel.avsc`. This schema has to match with the one present on the Scala side where streamlets are defined. The schema is:
27 |
28 | ```
29 | {
30 | "namespace": "warez",
31 |
32 | "type": "record",
33 | "name": "RecommenderModel",
34 |
35 | "fields": [
36 | {
37 | "name": "modelId",
38 | "type": "string"
39 | },
40 | {
41 | "name": "tensorFlowModel",
42 | "type": "bytes"
43 | },
44 | {
45 | "name": "productMap",
46 | "type": {
47 | "type": "map",
48 | "values": "int"
49 | }
50 | },
51 | {
52 | "name": "customerMap",
53 | "type": {
54 | "type": "map",
55 | "values": "int"
56 | }
57 | }
58 | ]
59 | }
60 | ```
61 |
62 | ## Generated Avro
63 |
64 | The notebook generates 2 Avro files:
65 |
66 | * With schema embedded within the binary file saved in `recommender.avro` under the `model_path` folder. This can be imported to the streamlet for model serving
67 | * Without schema embedded within the binary file saved in `recommender-no-schema.avro` under the `model_path` folder. This can also be imported to the streamlet for model serving
68 |
--------------------------------------------------------------------------------
/warez/build.sbt:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbt.Keys._
3 | import scalariform.formatter.preferences._
4 |
5 | lazy val root = blueprint
6 |
7 | lazy val datamodel = (project in file("./datamodel"))
8 | .enablePlugins(PipelinesLibraryPlugin)
9 |
10 | lazy val blueprint = (project in file("./blueprint"))
11 | .enablePlugins(PipelinesApplicationPlugin)
12 | .settings(
13 | /**
14 | * NOTE: Can we namespace or sandbox developer instances of this deployment?
15 | */
16 | name := "warez"
17 | )
18 | .dependsOn(akkaStreamlets, sparkStreamlets)
19 |
20 | lazy val akkaStreamlets = (project in file("./akka-streamlets"))
21 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin)
22 | .settings(
23 | commonSettings,
24 | libraryDependencies ++= Seq(
25 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10",
26 | "com.lightbend.akka" %% "akka-stream-alpakka-elasticsearch" % "1.1.2",
27 | "ch.qos.logback" % "logback-classic" % "1.2.3",
28 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
29 | )
30 | )
31 | .dependsOn(datamodel)
32 |
33 | lazy val sparkStreamlets = (project in file("./spark-streamlets"))
34 | .enablePlugins(PipelinesSparkLibraryPlugin)
35 | .settings(
36 | commonSettings,
37 | libraryDependencies ++= Seq(
38 | "ch.qos.logback" % "logback-classic" % "1.2.3",
39 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
40 | )
41 | )
42 | .dependsOn(datamodel)
43 |
44 |
45 | lazy val commonSettings = Seq(
46 | scalaVersion := "2.12.10",
47 | scalacOptions ++= Seq(
48 | "-encoding", "UTF-8",
49 | "-target:jvm-1.8",
50 | "-Xlog-reflective-calls",
51 | "-Xlint",
52 | "-Ywarn-unused",
53 | "-Ywarn-unused-import",
54 | "-deprecation",
55 | "-feature",
56 | "-language:_",
57 | "-unchecked"
58 | ),
59 |
60 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
61 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
62 |
63 | scalariformPreferences := scalariformPreferences.value
64 | .setPreference(AlignParameters, false)
65 | .setPreference(AlignSingleLineCaseStatements, true)
66 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
67 | .setPreference(DoubleIndentConstructorArguments, true)
68 | .setPreference(DoubleIndentMethodDeclaration, true)
69 | .setPreference(RewriteArrowSymbols, true)
70 | .setPreference(DanglingCloseParenthesis, Preserve)
71 | .setPreference(NewlineAtEndOfFile, true)
72 | .setPreference(AllowParamGroupsOnNewlines, true)
73 | )
74 |
--------------------------------------------------------------------------------
/call-record-aggregator/spark-aggregation/src/main/scala/pipelines/examples/carly/aggregator/CallStatsAggregator.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.aggregator
2 |
3 | import org.apache.spark.sql.Dataset
4 | import org.apache.spark.sql.functions._
5 | import org.apache.spark.sql.types._
6 |
7 | import pipelines.streamlets._
8 | import pipelines.streamlets.avro._
9 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
10 | import org.apache.spark.sql.streaming.OutputMode
11 | import pipelines.spark.sql.SQLImplicits._
12 | import org.apache.log4j.{ Level, Logger }
13 |
14 | import pipelines.examples.carly.data._
15 | class CallStatsAggregator extends SparkStreamlet {
16 |
17 | val rootLogger = Logger.getRootLogger()
18 | rootLogger.setLevel(Level.ERROR)
19 |
20 | //tag::docs-schemaAware-example[]
21 | val in = AvroInlet[CallRecord]("in")
22 | val out = AvroOutlet[AggregatedCallStats]("out", _.startTime.toString)
23 | val shape = StreamletShape(in, out)
24 | //end::docs-schemaAware-example[]
25 |
26 | val GroupByWindow = DurationConfigParameter(
27 | "group-by-window",
28 | "Window duration for the moving average computation",
29 | Some("1 minute"))
30 |
31 | val Watermark = DurationConfigParameter(
32 | "watermark",
33 | "Late events watermark duration: how long to wait for late events",
34 | Some("1 minute"))
35 |
36 | override def configParameters = Vector(GroupByWindow, Watermark)
37 | override def createLogic = new SparkStreamletLogic {
38 | val watermark = context.streamletConfig.getDuration(Watermark.key)
39 | val groupByWindow = context.streamletConfig.getDuration(GroupByWindow.key)
40 |
41 | //tag::docs-aggregationQuery-example[]
42 | override def buildStreamingQueries = {
43 | val dataset = readStream(in)
44 | val outStream = process(dataset)
45 | writeStream(outStream, out, OutputMode.Update).toQueryExecution
46 | }
47 |
48 | private def process(inDataset: Dataset[CallRecord]): Dataset[AggregatedCallStats] = {
49 | val query =
50 | inDataset
51 | .withColumn("ts", $"timestamp".cast(TimestampType))
52 | .withWatermark("ts", s"${watermark.toMillis()} milliseconds")
53 | .groupBy(window($"ts", s"${groupByWindow.toMillis()} milliseconds"))
54 | .agg(avg($"duration") as "avgCallDuration", sum($"duration") as "totalCallDuration")
55 | .withColumn("windowDuration", $"window.end".cast(LongType) - $"window.start".cast(LongType))
56 |
57 | query
58 | .select($"window.start".cast(LongType) as "startTime", $"windowDuration", $"avgCallDuration", $"totalCallDuration")
59 | .as[AggregatedCallStats]
60 | }
61 | //end::docs-aggregationQuery-example[]
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/sensor-data-java/README.md:
--------------------------------------------------------------------------------
1 | # `sensor-data-java`
2 |
3 | A simple Java based pipeline that ingests, converts, and filters data
4 |
5 | # Required configuration
6 |
7 | The application requires a persistent volume claim (PVC) to be created before deployment. This PVC is mounted by the `FilterStreamlet` pod, which checks the mounted directory for a configuration file containing device ids that should be filtered out from the data stream.
8 |
9 | Example PVC:
10 |
11 | ```
12 | apiVersion: v1
13 | kind: PersistentVolumeClaim
14 | metadata:
15 | name: source-data-claim
16 | namespace: sensor-data-java
17 | spec:
18 | accessModes:
19 | - ReadWriteMany
20 | resources:
21 | requests:
22 | storage: 10Mi
23 | ```
24 |
25 | # Upload device id filter list
26 |
27 | The filter streamlet will read a configuration file from the mounted volume. The file should contain the device ids that should be filtered out, with one device id per line. If the file is empty or does not exist, all device ids are accepted.
28 |
29 | To upload a prepared file that will filter out one device id (c75cb448-df0e-4692-8e06-0321b7703992), run the following script.
30 |
31 | ./load-data-into-pvc.sh
32 |
33 | The file uploaded is named `test-data/device-ids.txt`.
34 |
35 | # Generate data
36 |
37 | To send data to the HTTP ingress, do the following:
38 |
39 | - Get `sensor-data` ingress HTTP endpoint with `kubectl pipelines status sensor-data-java`
40 |
41 | In the example output below the HTTP endpoint would be `docker-registry-default.my.kubernetes.cluster/sensor-data`:
42 |
43 | ```
44 | kubectl pipelines status sensor-data-java
45 | Name: sensor-data-java
46 | Namespace: sensor-data-java
47 | Version: 445-fcd70ca
48 | Created: 2019-08-20 11:24:54 +0200 CEST
49 | Status: Running
50 |
51 | STREAMLET ENDPOINT
52 | sensor-data docker-registry-default.my.kubernetes.cluster/sensor-data
53 |
54 | STREAMLET POD STATUS RESTARTS READY
55 | metrics sensor-data-java-metrics-67bc5c45f7-7v5p9 Running 0 True
56 | sensor-data sensor-data-java-sensor-data-f8fb77d85-bgtb9 Running 0 True
57 | filter sensor-data-java-filter-667d85d44b-8ltmg Running 0 True
58 | validation sensor-data-java-validation-7754885f99-h4l67 Running 0 True
59 | ```
60 |
61 | - Pick a test data file from `./test-data`, for example `test-data/04-moderate-breeze.json`
62 | - Send the file to the HTTP endpoint of the ingress using the following `curl` command
63 |
64 |
65 | curl -i -X POST sensor-data-java.robert-test.ingestion.io/sensor-data -H "Content-Type: application/json" --data '@test-data/04-moderate-breeze.json'
66 |
--------------------------------------------------------------------------------
/mixed-sensors/src/main/scala/pipelines/example/SparkConsoleEgress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example
2 |
3 | import pipelines.streamlets.StreamletShape
4 | import pipelines.streamlets.avro._
5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic, StreamletQueryExecution }
6 | import pipelines.spark.sql.SQLImplicits._
7 | import org.apache.spark.sql.streaming.Trigger
8 | import org.apache.spark.sql.functions._
9 | import org.apache.spark.sql.DataFrame
10 |
11 | class SparkConsoleEgress extends SparkStreamlet {
12 | val in1 = AvroInlet[Data]("in1")
13 | val in2 = AvroInlet[Data]("in2")
14 | val shape = StreamletShape.withInlets(in1, in2)
15 |
16 | def asTimestamp = udf((t: Long) ⇒ new java.sql.Timestamp(t))
17 | def elapsedTime = udf((t1: Long, t0: Long) ⇒ t1 - t0)
18 |
19 | override def createLogic() = new SparkStreamletLogic {
20 | override def buildStreamingQueries = {
21 | val stream1 = readStream(in1).withColumn("source", lit("spark")).withColumn("elapsed", elapsedTime($"t2", $"t1"))
22 | val stream2 = readStream(in2).withColumn("source", lit("akka")).withColumn("elapsed", elapsedTime($"t2", $"t1"))
23 |
24 | // commented-out process: simple stats to compute min/max/mean on a window
25 | // val dataCount = stream1.union(stream2).withColumn("ts", asTimestamp($"timestamp"))
26 | // val stats = dataCount
27 | // .withWatermark("ts", "1 second")
28 | // .groupBy(window($"ts", "5 minutes", "1 minute"), $"source")
29 | // //.agg(max($"elapsed"), min($"elapsed"), avg($"elapsed"), count($"source"))
30 |
31 | val quantiles: (String ⇒ Long ⇒ (DataFrame, Long) ⇒ Unit) = { name ⇒ period ⇒ (df, time) ⇒
32 | df.cache()
33 | val count = df.count()
34 | val cps = count.toDouble / period
35 | val quans = df.stat.approxQuantile("elapsed", Array(0.1, 0.5, 0.9, 0.99), 0.01)
36 | println(s"$time, $name, $count, $cps, " + quans.mkString(", "))
37 | }
38 |
39 | val period = 60 * 5 // seconds
40 |
41 | val q1 = stream1.writeStream.foreachBatch(quantiles("spark")(period))
42 | .trigger(Trigger.ProcessingTime(s"$period seconds"))
43 | .option("checkpointLocation", context.checkpointDir("console-egress-q1"))
44 | .start()
45 | val q2 = stream2.writeStream.foreachBatch(quantiles("akka")(period))
46 | .trigger(Trigger.ProcessingTime(s"$period seconds"))
47 | .option("checkpointLocation", context.checkpointDir("console-egress-q2"))
48 | .start()
49 |
50 | new Thread() {
51 | override def run(): Unit = {
52 | while (true) {
53 | val progress = q1.lastProgress
54 | if (progress != null) {
55 | println("***************** [PROGRESS] *********************")
56 | println(progress.toString())
57 | println("**************************************************")
58 | }
59 | Thread.sleep(60 * 1000)
60 | }
61 | }
62 | } //.start // uncomment to enable the query progress
63 |
64 | StreamletQueryExecution(q1, q2)
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/flink-taxi-ride/build.sbt:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbt.Keys._
3 | import pipelines.sbt.CommonSettingsAndTasksPlugin._
4 |
5 | import scalariform.formatter.preferences._
6 |
7 | lazy val taxiRidePipeline = (project in file("./taxi-ride-pipeline"))
8 | .enablePlugins(PipelinesApplicationPlugin)
9 | .settings(commonSettings)
10 | .settings(
11 | name := "taxi-ride-fare"
12 | )
13 | .dependsOn(ingestor, processor, ridelogger)
14 |
15 | lazy val datamodel = (project in file("./datamodel"))
16 | .enablePlugins(PipelinesLibraryPlugin)
17 | .settings(
18 | commonSettings,
19 | (sourceGenerators in Compile) += (avroScalaGenerateSpecific in Test).taskValue
20 | )
21 |
22 | lazy val ingestor = (project in file("./ingestor"))
23 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin)
24 | .settings(
25 | commonSettings,
26 | libraryDependencies ++= Seq(
27 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10",
28 | "ch.qos.logback" % "logback-classic" % "1.2.3",
29 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
30 | )
31 | )
32 | .dependsOn(datamodel)
33 |
34 |
35 | lazy val processor = (project in file("./processor"))
36 | .enablePlugins(PipelinesFlinkLibraryPlugin)
37 | .settings(
38 | commonSettings,
39 | libraryDependencies ++= Seq(
40 | "ch.qos.logback" % "logback-classic" % "1.2.3",
41 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
42 | )
43 | )
44 | .settings(
45 | parallelExecution in Test := false
46 | )
47 | .dependsOn(datamodel)
48 |
49 | lazy val ridelogger = (project in file("./logger"))
50 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin)
51 | .settings(
52 | commonSettings,
53 | libraryDependencies ++= Seq(
54 | "ch.qos.logback" % "logback-classic" % "1.2.3",
55 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
56 | )
57 | )
58 | .dependsOn(datamodel)
59 |
60 |
61 | lazy val commonSettings = Seq(
62 | scalaVersion := "2.12.8",
63 | scalacOptions ++= Seq(
64 | "-encoding", "UTF-8",
65 | "-target:jvm-1.8",
66 | "-Xlog-reflective-calls",
67 | "-Xlint",
68 | "-Ywarn-unused",
69 | "-Ywarn-unused-import",
70 | "-deprecation",
71 | "-feature",
72 | "-language:_",
73 | "-unchecked"
74 | ),
75 |
76 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
77 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
78 |
79 | scalariformPreferences := scalariformPreferences.value
80 | .setPreference(AlignParameters, false)
81 | .setPreference(AlignSingleLineCaseStatements, true)
82 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
83 | .setPreference(DoubleIndentConstructorArguments, true)
84 | .setPreference(DoubleIndentMethodDeclaration, true)
85 | .setPreference(RewriteArrowSymbols, true)
86 | .setPreference(DanglingCloseParenthesis, Preserve)
87 | .setPreference(NewlineAtEndOfFile, true)
88 | .setPreference(AllowParamGroupsOnNewlines, true)
89 | )
90 |
--------------------------------------------------------------------------------
/call-record-aggregator/build.sbt:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbt.Keys._
3 | import scalariform.formatter.preferences._
4 |
5 | //tag::docs-PipelinesApplicationPlugin-example[]
6 | lazy val callRecordPipeline = (project in file("./call-record-pipeline"))
7 | .enablePlugins(PipelinesApplicationPlugin)
8 | .settings(commonSettings)
9 | .settings(
10 | name := "call-record-aggregator"
11 | )
12 | .dependsOn(akkaCdrIngestor, akkaJavaAggregationOutput, sparkAggregation)
13 | //end::docs-PipelinesApplicationPlugin-example[]
14 |
15 | lazy val datamodel = (project in file("./datamodel"))
16 | .enablePlugins(PipelinesLibraryPlugin)
17 |
18 | lazy val akkaCdrIngestor= (project in file("./akka-cdr-ingestor"))
19 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin)
20 | .settings(
21 | commonSettings,
22 | libraryDependencies ++= Seq(
23 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10",
24 | "ch.qos.logback" % "logback-classic" % "1.2.3",
25 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
26 | )
27 | )
28 | .dependsOn(datamodel)
29 |
30 | lazy val akkaJavaAggregationOutput= (project in file("./akka-java-aggregation-output"))
31 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin)
32 | .settings(
33 | commonSettings,
34 | libraryDependencies ++= Seq(
35 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10",
36 | "ch.qos.logback" % "logback-classic" % "1.2.3",
37 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
38 | )
39 | )
40 | .dependsOn(datamodel)
41 |
42 | lazy val sparkAggregation = (project in file("./spark-aggregation"))
43 | .enablePlugins(PipelinesSparkLibraryPlugin)
44 | .settings(
45 | commonSettings,
46 | Test / parallelExecution := false,
47 | Test / fork := true,
48 | libraryDependencies ++= Seq(
49 | "ch.qos.logback" % "logback-classic" % "1.2.3",
50 | "org.scalatest" %% "scalatest" % "3.0.8" % "test"
51 | )
52 | )
53 | .dependsOn(datamodel)
54 |
55 |
56 | lazy val commonSettings = Seq(
57 | scalaVersion := "2.12.10",
58 | scalacOptions ++= Seq(
59 | "-encoding", "UTF-8",
60 | "-target:jvm-1.8",
61 | "-Xlog-reflective-calls",
62 | "-Xlint",
63 | "-Ywarn-unused",
64 | "-Ywarn-unused-import",
65 | "-deprecation",
66 | "-feature",
67 | "-language:_",
68 | "-unchecked"
69 | ),
70 |
71 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"),
72 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value,
73 |
74 | scalariformPreferences := scalariformPreferences.value
75 | .setPreference(AlignParameters, false)
76 | .setPreference(AlignSingleLineCaseStatements, true)
77 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90)
78 | .setPreference(DoubleIndentConstructorArguments, true)
79 | .setPreference(DoubleIndentMethodDeclaration, true)
80 | .setPreference(RewriteArrowSymbols, true)
81 | .setPreference(DanglingCloseParenthesis, Preserve)
82 | .setPreference(NewlineAtEndOfFile, true)
83 | .setPreference(AllowParamGroupsOnNewlines, true)
84 | )
85 |
--------------------------------------------------------------------------------
/flink-taxi-ride/processor/src/main/scala/pipelines/examples/processor/TaxiRideProcessor.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples
2 | package processor
3 |
4 | import org.apache.flink.streaming.api.scala._
5 | import org.apache.flink.streaming.api.functions.co._
6 | import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor }
7 | import org.apache.flink.util.Collector
8 |
9 | import pipelines.streamlets.StreamletShape
10 | import pipelines.streamlets.avro._
11 | import pipelines.flink.avro._
12 | import pipelines.flink._
13 |
14 | class TaxiRideProcessor extends FlinkStreamlet {
15 |
16 | // Step 1: Define inlets and outlets. Note for the outlet you need to specify
17 | // the partitioner function explicitly : here we are using the
18 | // rideId as the partitioner
19 | @transient val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride")
20 | @transient val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare")
21 | @transient val out = AvroOutlet[TaxiRideFare]("out", _.rideId.toString)
22 |
23 | // Step 2: Define the shape of the streamlet. In this example the streamlet
24 | // has 2 inlets and 1 outlet
25 | @transient val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out)
26 |
27 | // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines
28 | // the behavior of the streamlet
29 | override def createLogic() = new FlinkStreamletLogic {
30 | override def buildExecutionGraph = {
31 | val rides: DataStream[TaxiRide] =
32 | readStream(inTaxiRide)
33 | .filter { ride ⇒ ride.isStart.booleanValue }
34 | .keyBy("rideId")
35 |
36 | val fares: DataStream[TaxiFare] =
37 | readStream(inTaxiFare)
38 | .keyBy("rideId")
39 |
40 | val processed: DataStream[TaxiRideFare] =
41 | rides
42 | .connect(fares)
43 | .flatMap(new EnrichmentFunction)
44 |
45 | writeStream(out, processed)
46 | }
47 | }
48 |
49 | import org.apache.flink.configuration.Configuration
50 | class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] {
51 |
52 | @transient var rideState: ValueState[TaxiRide] = null
53 | @transient var fareState: ValueState[TaxiFare] = null
54 |
55 | override def open(params: Configuration): Unit = {
56 | super.open(params)
57 | rideState = getRuntimeContext.getState(
58 | new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide]))
59 | fareState = getRuntimeContext.getState(
60 | new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare]))
61 | }
62 |
63 | override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = {
64 | val fare = fareState.value
65 | if (fare != null) {
66 | fareState.clear()
67 | out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
68 | } else {
69 | rideState.update(ride)
70 | }
71 | }
72 |
73 | override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = {
74 | val ride = rideState.value
75 | if (ride != null) {
76 | rideState.clear()
77 | out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
78 | } else {
79 | fareState.update(fare)
80 | }
81 | }
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/call-record-aggregator/spark-aggregation/src/main/scala/pipelines/examples/carly/aggregator/CallRecordGeneratorIngress.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples.carly.aggregator
2 |
3 | import java.sql.Timestamp
4 |
5 | import scala.util.Random
6 | import scala.concurrent.duration._
7 |
8 | import org.apache.spark.sql.{ Dataset, SparkSession }
9 | import org.apache.spark.sql.streaming.OutputMode
10 |
11 | import org.apache.spark.sql.functions._
12 | import org.apache.spark.sql.types.LongType
13 |
14 | import pipelines.streamlets._
15 | import pipelines.streamlets.avro._
16 | import pipelines.spark.sql.SQLImplicits._
17 | import pipelines.examples.carly.data.CallRecord
18 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
19 | import org.apache.log4j.{ Level, Logger }
20 |
21 | case class Rate(timestamp: Timestamp, value: Long)
22 |
23 | class CallRecordGeneratorIngress extends SparkStreamlet {
24 |
25 | val rootLogger = Logger.getRootLogger()
26 | rootLogger.setLevel(Level.ERROR)
27 |
28 | val RecordsPerSecond = IntegerConfigParameter(
29 | "records-per-second",
30 | "Records per second to process.",
31 | Some(50))
32 |
33 | override def configParameters = Vector(RecordsPerSecond)
34 |
35 | val out = AvroOutlet[CallRecord]("out", _.user)
36 | val shape = StreamletShape(out)
37 |
38 | override def createLogic() = new SparkStreamletLogic {
39 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
40 | override def buildStreamingQueries = {
41 | val outStream = DataGenerator.mkData(super.session, recordsPerSecond)
42 | writeStream(outStream, out, OutputMode.Append).toQueryExecution
43 | }
44 | }
45 | }
46 |
47 | object DataGenerator {
48 | def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = {
49 | // do we need to expose this through configuration?
50 |
51 | val MaxTime = 2.hours.toMillis
52 | val MaxUsers = 100000
53 | val TS0 = new java.sql.Timestamp(0)
54 | val ZeroTimestampProb = 0.05 // error rate
55 |
56 | // Random Data Generator
57 | val usersUdf = udf(() ⇒ "user-" + Random.nextInt(MaxUsers))
58 | val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing")
59 |
60 | // Time-biased randomized filter - 1/2 hour cycles
61 | val sinTime: Long ⇒ Double = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI)
62 | val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob
63 | val timeFilterUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng))
64 | val zeroTimestampUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ {
65 | if (rng < ZeroTimestampProb) {
66 | TS0
67 | } else {
68 | ts
69 | }
70 | })
71 |
72 | val rateStream = session.readStream
73 | .format("rate")
74 | .option("rowsPerSecond", recordsPerSecond)
75 | .load()
76 | .as[Rate]
77 |
78 | val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand())
79 | val sampledData = randomDataset.where(timeFilterUdf($"timestamp", $"rng"))
80 | .withColumn("user", usersUdf())
81 | .withColumn("other", usersUdf())
82 | .withColumn("direction", directionUdf())
83 | .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType))
84 | .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng"))
85 | .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp" as "timestamp")
86 | .as[CallRecord]
87 | sampledData
88 | }
89 | }
90 |
91 |
--------------------------------------------------------------------------------
/sensor-data-scala/README.md:
--------------------------------------------------------------------------------
1 | # `sensor-data-scala`
2 |
3 | A simple pipeline that processes events from a wind turbine farm.
4 |
5 | # Required configuration
6 |
7 | `valid-logger.log-level`
8 |
9 | Log level for `*-logger` streamlets to log to. Ex) `info`
10 |
11 | `valid-logger.msg-prefix` - Log line prefix for `*-logger` streamlets to include. Ex) `VALID`
12 |
13 | kubectl-pipelines deploy docker-registry-default.purplehat.lightbend.com/lightbend/sensor-data-scala:382-55e76fe-dirty valid-logger.log-level=info valid-logger.msg-prefix=VALID
14 |
15 | # Generating data
16 |
17 | This example has two ingresses that are combined using a merge operation. Data can be sent to either of the ingresses or to both.
18 |
19 | First deploy the app with `kubectl pipelines deploy [image]`
20 |
21 | To send data to the HTTP ingress, do the following:
22 |
23 | - Get `sensor-data` ingress HTTP endpoint with `kubectl pipelines status sensor-data-scala`
24 |
25 | In the example output below the HTTP endpoint would be `docker-registry-default.my.kubernetes.cluster/sensor-data-http-ingress`:
26 |
27 | ```
28 | kubectl pipelines status sensor-data-scala
29 | Name: sensor-data-scala
30 | Namespace: sensor-data-scala
31 | Version: 445-fcd70ca
32 | Created: 2019-08-20 13:55:35 +0200 CEST
33 | Status: Running
34 |
35 | STREAMLET ENDPOINT
36 | http-ingress docker-registry-default.my.kubernetes.cluster/sensor-data-http-ingress
37 |
38 | STREAMLET POD STATUS RESTARTS READY
39 | invalid-logger sensor-data-scala-invalid-logger-854dd5b47b-rhg7p Running 0 True
40 | http-ingress sensor-data-scala-http-ingress-6b7c586d6-jtd9x Running 0 True
41 | rotor-avg-logger sensor-data-scala-rotor-avg-logger-86c44d896-4f4gb Running 0 True
42 | metrics sensor-data-scala-metrics-f6f749d48-n7qss Running 0 True
43 | file-ingress sensor-data-scala-file-ingress-7f5b966755-jtbnv Running 0 True
44 | validation sensor-data-scala-validation-6f4b59b678-dd4gg Running 0 True
45 | rotorizer sensor-data-scala-rotorizer-55956cb47b-l7kng Running 0 True
46 | merge sensor-data-scala-merge-548994576-k8k8h Running 0 True
47 | valid-logger sensor-data-scala-valid-logger-86449cb958-wztsq Running 0 True
48 | ```
49 |
50 | - Pick a test data file from `./test-data`, for example `test-data/04-moderate-breeze.json`
51 | - Send the file to the HTTP ingress using `curl` using following command
52 |
53 |
54 | curl -i -X POST sensor-data-scala.apps.purplehat.lightbend.com/sensor-data -H "Content-Type: application/json" --data '@test-data/04-moderate-breeze.json'
55 |
56 | To send data to the file ingress, use the following shell script found in the project root directory:
57 |
58 | ./load-data-into-pvc.sh
59 |
60 | The shell script will load a number of files from the `test-data` directory and the ingress will continuously read those files and emit their content to the merge streamlet.
61 |
62 | ## Using [`wrk`](https://github.com/wg/wrk) benchmarking tool
63 |
64 | To send a continuous stream of data.
65 |
66 | ### Install
67 |
68 | * Ubuntu: `apt-get install wrk`
69 | * MacOS: `brew install wrk`
70 |
71 | ### Run
72 |
73 | Ex)
74 |
75 | ```
76 | wrk -c 400 -t 400 -d 500 -s wrk-04-moderate-breeze.lua http://sensor-data-scala.apps.purplehat.lightbend.com/sensor-data
77 | ```
78 |
--------------------------------------------------------------------------------
/flink-taxi-ride/ingestor/src/main/scala/pipelines/examples/ingestor/JsonFormats.scala:
--------------------------------------------------------------------------------
1 | package pipelines.examples
2 | package ingestor
3 |
4 | import spray.json._
5 | import pipelines.flink.avro._
6 |
7 | object TaxiRideJsonProtocol extends DefaultJsonProtocol {
8 | implicit object TaxiRideJsonFormat extends RootJsonFormat[TaxiRide] {
9 | def write(t: TaxiRide) = JsObject(
10 | "rideId" -> JsNumber(t.rideId),
11 | "isStart" -> JsBoolean(t.isStart),
12 | "taxiId" -> JsNumber(t.taxiId),
13 | "passengerCnt" -> JsNumber(t.passengerCnt),
14 | "driverId" -> JsNumber(t.driverId),
15 | "startLon" -> JsNumber(t.startLon.doubleValue()),
16 | "startLat" -> JsNumber(t.startLat.doubleValue()),
17 | "endLon" -> JsNumber(t.endLon.doubleValue()),
18 | "endLat" -> JsNumber(t.endLat.doubleValue()),
19 | "startTime" -> JsNumber(t.startTime),
20 | "endTime" -> JsNumber(t.endTime)
21 | )
22 | def read(value: JsValue) = {
23 | value.asJsObject.getFields(
24 | "rideId",
25 | "isStart",
26 | "taxiId",
27 | "passengerCnt",
28 | "driverId",
29 | "startLon",
30 | "startLat",
31 | "endLon",
32 | "endLat",
33 | "startTime",
34 | "endTime") match {
35 | case Seq(JsNumber(rideId),
36 | JsBoolean(isStart),
37 | JsNumber(taxiId),
38 | JsNumber(passengerCnt),
39 | JsNumber(driverId),
40 | JsNumber(startLon),
41 | JsNumber(startLat),
42 | JsNumber(endLon),
43 | JsNumber(endLat),
44 | JsNumber(startTime),
45 | JsNumber(endTime)) ⇒
46 | new TaxiRide(
47 | rideId.longValue(),
48 | isStart,
49 | taxiId.longValue(),
50 | passengerCnt.intValue(),
51 | driverId.longValue(),
52 | startLon.floatValue(),
53 | startLat.floatValue(),
54 | endLon.floatValue(),
55 | endLat.floatValue(),
56 | startTime.longValue(),
57 | endTime.longValue())
58 | case _ ⇒ throw new DeserializationException("TaxiRide expected")
59 | }
60 | }
61 | }
62 | }
63 |
64 | object TaxiFareJsonProtocol extends DefaultJsonProtocol {
65 | implicit object TaxiFareJsonFormat extends RootJsonFormat[TaxiFare] {
66 | def write(t: TaxiFare) = JsObject(
67 | "rideId" -> JsNumber(t.rideId),
68 | "taxiId" -> JsNumber(t.taxiId),
69 | "paymentType" -> JsString(t.paymentType),
70 | "driverId" -> JsNumber(t.driverId),
71 | "startTime" -> JsNumber(t.startTime),
72 | "tip" -> JsNumber(t.tip.floatValue()),
73 | "tolls" -> JsNumber(t.tolls.floatValue()),
74 | "totalFare" -> JsNumber(t.totalFare.floatValue())
75 | )
76 | def read(value: JsValue) = {
77 | value.asJsObject.getFields(
78 | "rideId",
79 | "taxiId",
80 | "paymentType",
81 | "driverId",
82 | "startTime",
83 | "tip",
84 | "tolls",
85 | "totalFare") match {
86 | case Seq(JsNumber(rideId),
87 | JsNumber(taxiId),
88 | JsString(paymentType),
89 | JsNumber(driverId),
90 | JsNumber(startTime),
91 | JsNumber(tip),
92 | JsNumber(tolls),
93 | JsNumber(totalFare)) ⇒
94 | new TaxiFare(
95 | rideId.longValue(),
96 | taxiId.longValue(),
97 | paymentType,
98 | driverId.longValue(),
99 | startTime.longValue(),
100 | tip.floatValue(),
101 | tolls.floatValue(),
102 | totalFare.floatValue())
103 | case _ ⇒ throw new DeserializationException("TaxiFare expected")
104 | }
105 | }
106 | }
107 | }
108 |
109 |
--------------------------------------------------------------------------------
/warez/data/values/uuids.txt:
--------------------------------------------------------------------------------
1 | 5728b0c7-e561-4faf-a958-af69e415b91e
2 | bdadecc1-7cf5-431c-bba4-f5cb06903222
3 | 0349fbe5-3570-4868-906f-acd0243a36d9
4 | 928c7e09-6e2f-43e3-8537-ba92eebf6651
5 | 1367950c-1904-4bfa-ae61-5a5d79a8003f
6 | bf1b0266-6088-4369-9699-0e55e778c585
7 | 0da5586e-298c-4d03-9a64-1b87654ced88
8 | f692b634-e332-46fc-92f4-368e1c473923
9 | 914d0e4c-4897-4579-b67b-bc6d3fd91a15
10 | b681f516-70b6-4f3e-a71d-dd24426a9f42
11 | bcf44d72-8a4c-4f43-8e06-2fe607a0b0d2
12 | 6da26f79-4857-4727-8836-f359c8872ecd
13 | 244185ac-6515-48e8-89ad-ee4d3af2212b
14 | d39f2962-e913-4670-9ea9-0681e9b4eaca
15 | 280cfa0f-7540-4937-bb59-38dc24fef161
16 | 3ae8c7a0-003c-4fd8-95da-03767827b135
17 | 074c1b79-6574-4db2-95af-d1043650886e
18 | bd4db512-cf72-4cb9-84dd-69d5f19fc003
19 | 11efa973-9720-4f3c-a7f4-f29deb62da50
20 | 4cef7e0c-72ae-4afd-87d5-22a8db3e15e9
21 | d36ef2e5-c205-4b2e-b631-15d59cf081e0
22 | 507f2f01-dc2a-410a-bdc5-4ca532e8c202
23 | e9b32847-9adf-43e2-874b-127eaa90a8c6
24 | 838d0d0f-5213-4896-9e94-6733d99ffd94
25 | a4a8d1df-26af-4440-abb3-3b363716a619
26 | 6132864f-8cb1-4de2-a286-7670c9ffc72d
27 | bd3afa11-d499-40bd-9e8f-45676f5ce458
28 | 07136375-a251-4d2a-9364-887c01cc63cc
29 | bb61adc9-cdfe-4e6f-bfd0-7bb07fd3e4e7
30 | 5865f31c-7677-4ab1-9ab3-4872abeaeade
31 | f3b5e71f-64fc-4c1d-8f13-1ec0f2d2ccc0
32 | 613507a9-8ffd-4ed0-a0ba-3b7b9956ae7a
33 | 612a1236-abba-49a5-a638-5e2e064c84f1
34 | b8f0f760-5ee8-434c-8828-59eb61094d27
35 | c715864c-0557-4e23-bf59-0a266621896c
36 | 067078f8-3dea-467d-b9fa-2a8d0157b80f
37 | 3d543837-81fc-44c6-8c0d-ccae056392c4
38 | 7db392a7-d2bd-487b-aab8-9e27fc1d9597
39 | 74f3bf36-8aea-4e12-9299-a8f9e482df81
40 | 35eae45b-eb50-4ace-9a09-49cc55012bd2
41 | 3f875710-5512-44ad-9241-0133083b1e88
42 | 1d2bd727-4780-4499-8865-7a282da46d0d
43 | 7bf45b41-4b2c-428b-b07e-9d3579c1789b
44 | d331bd99-897b-480b-8fd3-9dd36a83ab1c
45 | 9816c9d8-bf07-4bae-bbd2-e685f5f96511
46 | 01b54c3b-913b-42cc-bde5-2e3efba4e083
47 | c2d5cbe0-f42e-425c-87cf-c902734912e0
48 | 95c9a275-627b-4e22-8717-848b5b58bc6f
49 | f34dec58-57c8-4251-8e77-c7ae1f79f457
50 | 8a9ad446-d62f-495b-a636-c0ad9fa9880b
51 | 7b2c233b-2ea7-4f34-a33f-3c25272c85c5
52 | 44af5782-f9e8-4c81-b69a-36f29a213b11
53 | 2a1389ad-0261-4d55-9384-9b5d36493474
54 | 6df1a590-e4db-4297-9d99-d602e3e87645
55 | 44822f06-b1ce-4d63-9daf-588f03622bc4
56 | dee251f4-1347-4dca-aac0-0c855d2d534c
57 | 13fec854-b3ef-4e9d-ae5a-e26b0c093b0f
58 | 413ba5e4-e826-44e6-a4b7-55ec35befe09
59 | 735ded00-7acc-442b-a4ee-b393aef4d007
60 | 99ed94f1-92cd-4f54-b2d0-4639509938f6
61 | 51caaa68-d224-404e-8909-a6082bcdb2e5
62 | 84006f1d-3011-40ab-b62a-f2cecaf4a9f5
63 | e2d37e6f-ae5d-41b2-abdf-15e00f081f11
64 | e21a2d68-d6ee-46fb-8d36-1da6b0c89ffb
65 | d9889c07-97cc-4488-920d-79efdd576385
66 | 8f0aa2e3-2014-49bc-8d14-b2ed27ef3fa4
67 | b78232dc-222b-4a21-ab49-c3ce69639f53
68 | ea80afa3-63e6-481b-9c63-9681945ec33b
69 | 72d59883-2b47-483a-a682-1901c7040012
70 | 608b1cec-f017-49a9-a4dc-5be44f03f398
71 | b56f3954-a80a-4056-a689-b25274cb0365
72 | 29e7fdb7-cdbb-45d9-b6f3-d3a6ffeee056
73 | 13efa3db-bdab-437b-8f3f-9e653ce06a77
74 | d3d2e381-0aa4-4848-b911-85b6d9a9505c
75 | 36f31a4b-c74d-4ea3-b0a0-f4454b2352d7
76 | 6e181d59-0804-4a6b-a3a7-14169ae66764
77 | 9de6e256-cf4c-490c-bd8a-1c91d38e7cb0
78 | e46864bb-5f44-4df2-b2a8-9afe57adee2e
79 | 93fcdb08-f6fc-46ae-bd20-effe8107f0d6
80 | 6802c9e8-d156-410b-8500-369396bbf5ef
81 | dc700a91-0b0b-45b4-a0a9-52c1cebe79e3
82 | 2f6fcb74-27c8-43ef-b4b8-d9428958f46b
83 | caf10de4-b9d6-4156-8d7d-e9c31981cf94
84 | 59b0c7aa-ef9d-4b36-85ff-0cfb4eaedaaa
85 | 98b00e25-d6d1-4fb1-b714-26486cebee40
86 | 57456ad3-a02a-4ccc-b1ed-35d3460e6e9f
87 | dd2c3500-b06a-4eae-9a1a-dcb12bc872e3
88 | 5fc0e05f-4cbb-43f8-8596-219308858598
89 | c0f8fbdf-48be-4b70-af72-76969fba0bbd
90 | 7b692c68-ffae-48d1-981a-12871db3474f
91 | f77cc77b-7595-4f88-b6a3-b56e135e0c06
92 | c24a9f79-d12e-44f3-a665-4d36bfaef08f
93 | 27bf19e7-8686-46bf-81aa-b3860196cb5c
94 | b8ad4125-7364-4f08-a61e-e863094caef4
95 | 4679ef35-f82d-4853-bcc1-a6da5e618b62
96 | 6db42963-5909-4e4a-a92a-3d009078e293
97 | 73608eb1-aa43-4cca-8cc5-9bba3278f4ff
98 | cada8794-b9e2-4b1d-b565-de7772d96b94
99 | 12aeb1b1-38bd-4a78-bc47-764d30a132cd
100 | e0e1443c-b276-4a13-aa4a-32998db55db0
101 |
--------------------------------------------------------------------------------
/warez/spark-streamlets/src/main/scala/pipelines/example/warez/SparkProductJoiner.scala:
--------------------------------------------------------------------------------
1 | package pipelines.example.warez
2 |
3 | import scala.collection.immutable.Seq
4 | import org.apache.spark.sql.Dataset
5 | import org.apache.spark.sql.streaming.{ GroupState, GroupStateTimeout, OutputMode }
6 | import pipelines.streamlets.StreamletShape
7 | import pipelines.streamlets.avro._
8 | import pipelines.spark.sql.SQLImplicits._
9 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet }
10 | import warez.{ PriceUpdate, Sku, StockUpdate }
11 | import SparkProductJoiner._
12 |
13 | class SparkProductJoiner extends SparkStreamlet {
14 |
15 | val in0 = AvroInlet[warez.Product]("in-0")
16 | val in1 = AvroInlet[warez.StockUpdate]("in-1")
17 | val in2 = AvroInlet[warez.PriceUpdate]("in-2")
18 | val out = AvroOutlet[warez.Product]("out", _.id.toString)
19 |
20 | val shape = StreamletShape(out).withInlets(in0, in1, in2)
21 |
22 | override def createLogic = new SparkStreamletLogic {
23 |
24 | override def buildStreamingQueries = {
25 | val products = readStream(in0)
26 | val stocks = readStream(in1)
27 | val prices = readStream(in2)
28 | val outStream = process(products, stocks, prices)
29 | val query = writeStream(outStream, out, OutputMode.Append)
30 | query.toQueryExecution
31 | }
32 | private def process(products: Dataset[warez.Product], stocks: Dataset[warez.StockUpdate], prices: Dataset[warez.PriceUpdate]): Dataset[warez.Product] = {
33 | val stocksAsProducts = stocks.map(stockUpdate2Product)
34 | val pricesAsProducts = prices.map(priceUpdate2Products)
35 | val withStocks = products
36 | .union(stocksAsProducts)
37 | .union(pricesAsProducts)
38 | .groupByKey(p ⇒ p.id)
39 | .flatMapGroupsWithState(OutputMode.Append(), GroupStateTimeout.NoTimeout)(stateFunc)
40 | withStocks
41 | }
42 | }
43 | }
44 |
45 | object SparkProductJoiner {
46 | private[warez] def stockUpdate2Product(s: StockUpdate): warez.Product = {
47 | warez.Product(s.productId, "", "", Seq.empty[String], Seq(Sku(s.skuId, "", stock = Option(s.diff), price = None)))
48 | }
49 |
50 | private[warez] def priceUpdate2Products(p: PriceUpdate): warez.Product = {
51 | warez.Product(p.productId, "", "", Seq.empty[String], Seq(Sku(p.skuId, "", stock = None, price = Option(p.price))))
52 | }
53 |
54 | type ProductId = String
55 |
56 | private[warez] def emptyProduct: warez.Product = new warez.Product
57 |
58 | private[warez] def calcStockDiff(a: Option[Int], b: Option[Int]): Option[Int] = (a, b) match {
59 | case (Some(i), Some(j)) ⇒ Some(i + j)
60 | case (Some(i), None) ⇒ Some(i)
61 | case (None, Some(j)) ⇒ Some(j)
62 | case _ ⇒ None
63 | }
64 |
65 | private[warez] def mergeSkus(a: Sku, b: Sku): Sku = {
66 | val name = if (a.name.length > b.name.length) a.name else b.name
67 | val stock = calcStockDiff(a.stock, b.stock)
68 | Sku(a.id, name, stock, b.price)
69 | }
70 |
71 | private[warez] def mergeProducts(acc: warez.Product, skuId: String, newSku: Sku) = {
72 | val skuIndex = acc.skus.indexWhere(_.id == skuId)
73 | if (skuIndex < 0) {
74 | acc.copy(skus = acc.skus :+ newSku)
75 | } else {
76 | val sku = acc.skus(skuIndex)
77 | acc.copy(skus = acc.skus.updated(skuIndex, mergeSkus(sku, newSku)))
78 | }
79 | }
80 |
81 | private[warez] def updateProduct(currentProduct: warez.Product, prods: Iterator[warez.Product]): warez.Product = {
82 | val empty = emptyProduct
83 | prods.foldLeft(currentProduct) { (acc, p) ⇒
84 | p match {
85 | // Is StockUpdate
86 | case warez.Product(_, "", "", _, Seq(Sku(skuId, "", Some(diff), None))) ⇒ {
87 | acc match {
88 | case warez.Product("", "", "", Seq(), Seq()) ⇒ empty
89 | case _ ⇒
90 | val newSku = Sku(skuId, "", Some(diff), None)
91 | mergeProducts(acc, skuId, newSku)
92 | }
93 | }
94 | // Is PriceUpdate
95 | case warez.Product(_, "", "", _, Seq(Sku(skuId, "", None, Some(price)))) ⇒ {
96 | acc match {
97 | case warez.Product("", "", "", Seq(), Seq()) ⇒ empty
98 | case _ ⇒
99 | val newSku = Sku(skuId, "", None, Some(price))
100 | mergeProducts(acc, skuId, newSku)
101 | }
102 | }
103 | // Is Product
104 | case newProd ⇒ acc.copy(id = newProd.id, name = newProd.name, description = newProd.description, keywords = newProd.keywords, skus = newProd.skus)
105 | }
106 | }
107 | }
108 |
109 | private[warez] def invalid(p: warez.Product): Boolean = p.description.isEmpty && p.name.isEmpty && p.keywords.isEmpty
110 |
111 | val stateFunc: (ProductId, Iterator[warez.Product], GroupState[warez.Product]) ⇒ Iterator[warez.Product] = (_, prods, state) ⇒ {
112 | val out = updateProduct(state.getOption.getOrElse(emptyProduct), prods)
113 | (if (invalid(out)) {
114 | // return nothing
115 | None
116 | } else {
117 | // update state only when output is valid
118 | state.update(out)
119 | Some(out)
120 | }).toIterator
121 | }
122 | }
123 |
--------------------------------------------------------------------------------