├── warez ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── test-data │ ├── product-black-hole-price-update.json │ ├── product-black-hole-stock-update.json │ ├── product-black-hole.json │ └── product-singing-sword.json ├── data │ ├── values │ │ ├── keywords.txt │ │ └── uuids.txt │ ├── generate-stock-update.sh │ ├── generate-price-update.sh │ ├── generate-product.sh │ └── README.md ├── akka-streamlets │ └── src │ │ └── main │ │ └── scala │ │ ├── warez │ │ ├── ProductIngress.scala │ │ ├── PriceUpdateIngress.scala │ │ ├── StockUpdateIngress.scala │ │ ├── JsonFormats.scala │ │ ├── RecommenderModelIngress.scala │ │ └── ElasticSearchClient.scala │ │ ├── wip │ │ ├── ProductLogger.scala │ │ ├── PriceUpdateLogger.scala │ │ └── StockUpdateLogger.scala │ │ └── dsl │ │ ├── HttpIngress.scala │ │ ├── FlowEgress.scala │ │ └── HttpServer.scala ├── spark-streamlets │ └── src │ │ ├── test │ │ └── scala │ │ │ └── pipelines │ │ │ └── example │ │ │ └── warez │ │ │ ├── TestUtils.scala │ │ │ ├── SparkProductJoinerKitSpec.scala │ │ │ └── SparkProductOperationsSpec.scala │ │ └── main │ │ └── scala │ │ └── pipelines │ │ └── example │ │ └── warez │ │ └── SparkProductJoiner.scala ├── .gitignore ├── datamodel │ └── src │ │ └── main │ │ └── avro │ │ ├── PriceUpdate.avsc │ │ ├── StockUpdate.avsc │ │ ├── RecommenderModel.avsc │ │ └── Product.avsc ├── blueprint │ └── src │ │ └── main │ │ ├── blueprint │ │ └── blueprint.conf │ │ └── resources │ │ └── logback.xml ├── ml-training │ ├── avro │ │ └── RecommenderModel.avsc │ └── README.md ├── target-env.sbt.example └── build.sbt ├── mixed-sensors ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── src │ └── main │ │ ├── scala │ │ ├── README.md │ │ └── pipelines │ │ │ └── example │ │ │ ├── TimeOps.scala │ │ │ ├── IdentityAkkaStreamsProcessor0.scala │ │ │ ├── IdentityAkkaStreamsProcessor1.scala │ │ │ ├── IdentityAkkaStreamsProcessor2.scala │ │ │ ├── IdentitySparkProcessor0.scala │ │ │ ├── IdentitySparkProcessor2.scala │ │ │ ├── IdentitySparkProcessor1.scala │ │ │ ├── SparkRandomGenDataIngress.scala │ │ │ └── SparkConsoleEgress.scala │ │ ├── blueprint │ │ ├── akka-spark-single-processor.conf │ │ ├── t0-t1-blueprint.conf │ │ ├── t0-process-t1-blueprint.conf │ │ ├── blueprint.conf │ │ └── parallel-100ms-delay.conf │ │ └── avro │ │ └── data.avsc ├── target-env.sbt.example └── build.sbt ├── spark-sensors ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── src │ └── main │ │ ├── scala │ │ ├── README.md │ │ └── pipelines │ │ │ └── example │ │ │ ├── SparkConsoleEgress.scala │ │ │ ├── MovingAverageSparklet.scala │ │ │ └── SparkRandomGenDataIngress.scala │ │ ├── blueprint │ │ └── blueprint.conf │ │ └── avro │ │ ├── agg.avsc │ │ └── data.avsc ├── .gitignore ├── target-env.sbt.example └── build.sbt ├── flink-taxi-ride ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── test-data │ ├── send-data-rides.sh │ ├── send-data-fares.sh │ ├── send-data-small.sh │ ├── nycTaxiFares-small.json │ └── nycTaxiRides-small.json ├── datamodel │ └── src │ │ └── main │ │ └── avro │ │ ├── taxiridefare.avsc │ │ ├── taxifare.avsc │ │ └── taxiride.avsc ├── taxi-ride-pipeline │ └── src │ │ └── main │ │ └── blueprint │ │ └── blueprint.conf ├── ingestor │ └── src │ │ └── main │ │ ├── scala │ │ └── pipelines │ │ │ └── examples │ │ │ └── ingestor │ │ │ ├── TaxiFareIngress.scala │ │ │ ├── TaxiRideIngress.scala │ │ │ └── JsonFormats.scala │ │ └── resources │ │ ├── log4j.properties │ │ └── logback.xml ├── logger │ └── src │ │ └── main │ │ ├── resources │ │ ├── log4j.properties │ │ └── logback.xml │ │ └── scala │ │ └── pipelines │ │ └── examples │ │ └── logger │ │ └── FarePerRideLogger.scala ├── target-env.sbt.example ├── README.md ├── build.sbt └── processor │ └── src │ └── main │ └── scala │ └── pipelines │ └── examples │ └── processor │ └── TaxiRideProcessor.scala ├── sensor-data-java ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── test-data │ ├── device-ids.txt │ ├── 10-storm.json │ ├── 10-storm-1.json │ ├── 12-hurricane.json │ ├── invalid-metric.json │ ├── 04-moderate-breeze.json │ ├── 11-violent-storm.json │ ├── wrk-04-moderate-breeze.lua │ └── future-data.json ├── src │ ├── main │ │ ├── java │ │ │ └── pipelines │ │ │ │ └── examples │ │ │ │ └── sensordata │ │ │ │ ├── SensorDataUtils.java │ │ │ │ ├── SensorDataIngress.java │ │ │ │ ├── SensorDataStreamingIngress.java │ │ │ │ ├── MetricsValidation.java │ │ │ │ └── SensorDataToMetrics.java │ │ ├── avro │ │ │ ├── InvalidMetric.avsc │ │ │ ├── Measurements.avsc │ │ │ ├── SensorData.avsc │ │ │ └── Metric.avsc │ │ ├── blueprint │ │ │ └── blueprint.conf │ │ └── resources │ │ │ └── logback.xml │ └── test │ │ └── java │ │ └── pipelines │ │ └── examples │ │ └── sensordata │ │ └── MetricsValidationTest.java ├── .gitignore ├── load-data-into-pvc.sh ├── target-env.sbt.example ├── build.sbt └── README.md ├── sensor-data-scala ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── src │ └── main │ │ ├── resources │ │ ├── local.conf │ │ └── logback.xml │ │ ├── scala │ │ └── pipelines │ │ │ └── examples │ │ │ ├── sensordata │ │ │ ├── SensorDataUtils.scala │ │ │ ├── SensorDataHttpIngress.scala │ │ │ ├── SensorDataMerge.scala │ │ │ ├── RotorSpeedFilter.scala │ │ │ ├── InvalidMetricLogger.scala │ │ │ ├── SensorDataStreamingIngress.scala │ │ │ ├── MetricsValidation.scala │ │ │ ├── RotorspeedWindowLogger.scala │ │ │ ├── SensorDataToMetrics.scala │ │ │ ├── JsonFormats.scala │ │ │ ├── ValidMetricLogger.scala │ │ │ └── SensorDataFileIngress.scala │ │ │ └── package.scala │ │ ├── avro │ │ ├── InvalidMetric.avsc │ │ ├── Measurements.avsc │ │ ├── SensorData.avsc │ │ └── Metric.avsc │ │ └── blueprint │ │ └── blueprint.conf ├── test-data │ ├── 10-storm.json │ ├── 12-hurricane.json │ ├── 04-moderate-breeze.json │ ├── 11-violent-storm.json │ ├── invalid-metric.json │ ├── wrk-04-moderate-breeze.lua │ └── future-data.json ├── .gitignore ├── target-env.sbt.example ├── load-data-into-pvc.sh ├── build.sbt └── README.md ├── call-record-aggregator ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── spark-aggregation │ └── src │ │ ├── main │ │ └── scala │ │ │ ├── README.md │ │ │ └── pipelines │ │ │ └── examples │ │ │ └── carly │ │ │ └── aggregator │ │ │ ├── CallAggregatorConsoleEgress.scala │ │ │ ├── CallStatsAggregator.scala │ │ │ └── CallRecordGeneratorIngress.scala │ │ └── test │ │ └── scala │ │ └── pipelines │ │ └── examples │ │ └── carly │ │ └── aggregator │ │ ├── CallRecordGeneratorIngressSpec.scala │ │ └── CallStatsAggregatorSpec.scala ├── akka-cdr-ingestor │ └── src │ │ ├── main │ │ └── scala │ │ │ └── pipelines │ │ │ └── examples │ │ │ └── carly │ │ │ └── ingestor │ │ │ ├── JsonFormats.scala │ │ │ ├── CallRecordMerge.scala │ │ │ ├── CallRecordIngress.scala │ │ │ ├── CallRecordStreamingIngress.scala │ │ │ └── CallRecordValidation.scala │ │ └── test │ │ └── scala │ │ └── pipelines │ │ └── examples │ │ └── carly │ │ └── ingestor │ │ ├── CallRecordValidationSpec.scala │ │ └── CallRecordMergeSpec.scala ├── datamodel │ ├── src │ │ └── main │ │ │ └── avro │ │ │ ├── InvalidRecord.avsc │ │ │ ├── AggregatedCallStats.avsc │ │ │ └── CallRecord.avsc │ └── data │ │ └── data-sample-20.json ├── .gitignore ├── send_data.sh ├── target-env.sbt.example ├── call-record-pipeline │ └── src │ │ └── main │ │ ├── blueprint │ │ └── blueprint.conf │ │ └── resources │ │ └── logback.xml ├── akka-java-aggregation-output │ └── src │ │ └── main │ │ └── java │ │ └── pipelines │ │ └── examples │ │ └── carly │ │ └── output │ │ ├── AggregateRecordEgress.java │ │ └── InvalidRecordEgress.java └── build.sbt ├── spark-resilience-test ├── project │ ├── build.properties │ ├── plugins.sbt │ └── pipelines-plugins.sbt ├── src │ ├── main │ │ ├── scala │ │ │ ├── pipelines │ │ │ │ └── example │ │ │ │ │ ├── SequenceSettings.scala │ │ │ │ │ ├── SuicidalMonkeyProcessor.scala │ │ │ │ │ └── SparkSequenceGeneratorIngress.scala │ │ │ └── README.md │ │ ├── blueprint │ │ │ └── blueprint.conf │ │ └── avro │ │ │ └── data.avsc │ └── test │ │ └── scala │ │ └── pipelines │ │ └── example │ │ ├── SparkSequenceValidatorEgressTest.scala │ │ ├── DataGroupTest.scala │ │ └── SparkSequenceGeneratorIngressTest.scala ├── .gitignore ├── target-env.sbt.example └── build.sbt ├── .gitignore └── README.md /warez/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /mixed-sensors/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /spark-sensors/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /flink-taxi-ride/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /sensor-data-java/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /sensor-data-scala/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /call-record-aggregator/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /spark-resilience-test/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/device-ids.txt: -------------------------------------------------------------------------------- 1 | c75cb448-df0e-4692-8e06-0321b7703992 2 | -------------------------------------------------------------------------------- /warez/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") -------------------------------------------------------------------------------- /mixed-sensors/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") 2 | -------------------------------------------------------------------------------- /sensor-data-scala/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") -------------------------------------------------------------------------------- /spark-sensors/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") 2 | -------------------------------------------------------------------------------- /call-record-aggregator/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") -------------------------------------------------------------------------------- /sensor-data-java/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") 2 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/resources/local.conf: -------------------------------------------------------------------------------- 1 | file-ingress { 2 | source-data-mount="/tmp/pipelines" 3 | } 4 | -------------------------------------------------------------------------------- /spark-sensors/src/main/scala/README.md: -------------------------------------------------------------------------------- 1 | # This is an example project that illustrates the use of the Spark Support in Pipelines 2 | -------------------------------------------------------------------------------- /warez/test-data/product-black-hole-price-update.json: -------------------------------------------------------------------------------- 1 | { 2 | "productId":"123456789", 3 | "skuId":"1", 4 | "price":10 5 | } 6 | -------------------------------------------------------------------------------- /warez/test-data/product-black-hole-stock-update.json: -------------------------------------------------------------------------------- 1 | { 2 | "productId":"123456789", 3 | "skuId":"1", 4 | "diff":10 5 | } 6 | -------------------------------------------------------------------------------- /call-record-aggregator/spark-aggregation/src/main/scala/README.md: -------------------------------------------------------------------------------- 1 | # This is an example project that illustrates the use of the Spark Support in Pipelines 2 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/README.md: -------------------------------------------------------------------------------- 1 | # This is an example project used to compare the execution of Spark and AkkaStreams components in one Pipeline. 2 | -------------------------------------------------------------------------------- /flink-taxi-ride/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") 2 | addSbtPlugin("com.cavorite" % "sbt-avro-1-8" % "1.1.6") 3 | -------------------------------------------------------------------------------- /spark-resilience-test/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2") 2 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2") 3 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/TimeOps.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | object TimeOps { 4 | 5 | def nowAsOption: Option[Long] = Some(System.currentTimeMillis()) 6 | 7 | } 8 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataUtils.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | object SensorDataUtils { 4 | def isValidMetric(m: Metric) = m.value >= 0.0 5 | } 6 | -------------------------------------------------------------------------------- /flink-taxi-ride/test-data/send-data-rides.sh: -------------------------------------------------------------------------------- 1 | for str in $(cat nycTaxiRides.json) 2 | do 3 | echo "Using $str" 4 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-ride -H "Content-Type: application/json" --data "$str" 5 | done 6 | -------------------------------------------------------------------------------- /flink-taxi-ride/test-data/send-data-fares.sh: -------------------------------------------------------------------------------- 1 | for str in $(cat nycTaxiFares.json) 2 | do 3 | echo "Using $str" 4 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-fare -H "Content-Type: application/json" --data "$str" 5 | done 6 | 7 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/package.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples 2 | 3 | import java.time.Instant 4 | 5 | package object sensordata { 6 | implicit def toInstant(millis: Long): Instant = Instant.ofEpochMilli(millis) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataUtils.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata; 2 | 3 | public final class SensorDataUtils { 4 | public static boolean isValidMetric(Metric m) { 5 | return m.getValue() >= 0.0; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/10-storm.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545346279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 100.1 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/10-storm-1.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545346279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 100.1 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/12-hurricane.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495546546279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 129.4 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/invalid-metric.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545346279, 4 | "measurements": { 5 | "power": -1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 25.3 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-scala/test-data/10-storm.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545346279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 23.4, 7 | "windSpeed": 100.1 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-scala/test-data/12-hurricane.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495546546279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 78.3, 7 | "windSpeed": 129.4 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/04-moderate-breeze.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "d75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545346279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 25.3 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/11-violent-storm.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545646279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 105.9 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-scala/test-data/04-moderate-breeze.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545346279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 25.3 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-scala/test-data/11-violent-storm.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545646279, 4 | "measurements": { 5 | "power": 1.7, 6 | "rotorSpeed": 45.7, 7 | "windSpeed": 105.9 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sensor-data-scala/test-data/invalid-metric.json: -------------------------------------------------------------------------------- 1 | { 2 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 3 | "timestamp": 1495545346279, 4 | "measurements": { 5 | "power": -1.7, 6 | "rotorSpeed": 3.9, 7 | "windSpeed": 25.3 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /warez/data/values/keywords.txt: -------------------------------------------------------------------------------- 1 | crude 2 | ivray 3 | hajes 4 | commy 5 | jerky 6 | lanum 7 | miaul 8 | skied 9 | pidan 10 | paled 11 | board 12 | blots 13 | molar 14 | pareu 15 | stong 16 | cadua 17 | dhoti 18 | urutu 19 | claws 20 | tardy 21 | ramed 22 | shuln 23 | boult 24 | brian 25 | ketal 26 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/warez/ProductIngress.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 4 | import pipelines.streamlets.avro._ 5 | import JsonFormats._ 6 | import warez.dsl._ 7 | 8 | class ProductIngress extends HttpIngress[Product](AvroOutlet[Product]("out", _.id.toString)) 9 | -------------------------------------------------------------------------------- /flink-taxi-ride/datamodel/src/main/avro/taxiridefare.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.flink.avro", 3 | "type": "record", 4 | "name": "TaxiRideFare", 5 | "fields":[ 6 | { 7 | "name": "rideId", "type": "long" 8 | }, 9 | { 10 | "name": "totalFare", "type": "float" 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /spark-resilience-test/src/main/scala/pipelines/example/SequenceSettings.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import scala.concurrent.duration._ 4 | 5 | object SequenceSettings { 6 | 7 | val GroupSize: Int = 1500 8 | val FailureProbability: Double = 0.05 9 | val TimeoutDuration: Long = 1.minute.toMillis 10 | val RecordsPerSecond: Int = 50 11 | 12 | } 13 | -------------------------------------------------------------------------------- /spark-sensors/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | ingress = pipelines.example.SparkRandomGenDataIngress 4 | process = pipelines.example.MovingAverageSparklet 5 | egress = pipelines.example.SparkConsoleEgress 6 | } 7 | connections { 8 | ingress.out = [process.in] 9 | process.out = [egress.in] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/warez/PriceUpdateIngress.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 4 | import pipelines.streamlets.avro._ 5 | import JsonFormats._ 6 | import warez.dsl._ 7 | 8 | class PriceUpdateIngress extends HttpIngress[PriceUpdate](AvroOutlet[PriceUpdate]("out", _.productId.toString)) 9 | 10 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/warez/StockUpdateIngress.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 4 | import pipelines.streamlets.avro._ 5 | import JsonFormats._ 6 | import warez.dsl._ 7 | 8 | class StockUpdateIngress extends HttpIngress[StockUpdate](AvroOutlet[StockUpdate]("out", _.productId.toString)) 9 | 10 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/avro/InvalidMetric.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "InvalidMetric", 5 | "fields":[ 6 | { 7 | "name": "metric", "type": "pipelines.examples.sensordata.Metric" 8 | }, 9 | { 10 | "name": "error", "type": "string" 11 | } 12 | ] 13 | } 14 | 15 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/avro/InvalidMetric.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "InvalidMetric", 5 | "fields":[ 6 | { 7 | "name": "metric", "type": "pipelines.examples.sensordata.Metric" 8 | }, 9 | { 10 | "name": "error", "type": "string" 11 | } 12 | ] 13 | } 14 | 15 | -------------------------------------------------------------------------------- /warez/spark-streamlets/src/test/scala/pipelines/example/warez/TestUtils.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example.warez 2 | import java.util.UUID 3 | 4 | import warez.Sku 5 | 6 | import scala.collection.immutable.Seq 7 | 8 | object TestUtils { 9 | def uuid: String = UUID.randomUUID().toString 10 | def genSkus(names: Seq[String] = Seq("small", "med", "large")): Seq[Sku] = names.map(Sku(uuid, _)) 11 | } 12 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/JsonFormats.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.ingestor 2 | 3 | import spray.json._ 4 | import pipelines.examples.carly.data.CallRecord 5 | 6 | case object JsonCallRecord extends DefaultJsonProtocol { 7 | implicit val crFormat = jsonFormat(CallRecord.apply, "user", "other", "direction", "duration", "timestamp") 8 | } 9 | -------------------------------------------------------------------------------- /spark-resilience-test/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | rs-ingress = pipelines.example.SparkSequenceGeneratorIngress 4 | rs-process = pipelines.example.SuicidalMonkeyProcessor 5 | rs-egress = pipelines.example.SparkSequenceValidatorEgress 6 | } 7 | connections { 8 | rs-ingress.out = [rs-process.in] 9 | rs-process.out = [rs-egress.in] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /warez/.gitignore: -------------------------------------------------------------------------------- 1 | etc/bootstrap-local.conf 2 | 3 | .class 4 | *.log 5 | .history 6 | 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | project/activator-sbt* 13 | 14 | .env 15 | 16 | **/values.sbt 17 | 18 | # IntelliJ 19 | .idea/ 20 | *.iml 21 | *.iws 22 | 23 | # Mac 24 | .DS_Store 25 | 26 | # vim swap files 27 | *.swp 28 | .*.swp 29 | 30 | target-env.sbt 31 | -------------------------------------------------------------------------------- /warez/datamodel/src/main/avro/PriceUpdate.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "warez", 3 | 4 | "type": "record", 5 | "name": "PriceUpdate", 6 | 7 | "fields": [ 8 | { 9 | "name": "productId", 10 | "type": "string" 11 | }, 12 | { 13 | "name": "skuId", 14 | "type": "string" 15 | }, 16 | { 17 | "name": "price", 18 | "type": "int" 19 | } 20 | ] 21 | } 22 | 23 | -------------------------------------------------------------------------------- /warez/datamodel/src/main/avro/StockUpdate.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "warez", 3 | 4 | "type": "record", 5 | "name": "StockUpdate", 6 | 7 | "fields": [ 8 | { 9 | "name": "productId", 10 | "type": "string" 11 | }, 12 | { 13 | "name": "skuId", 14 | "type": "string" 15 | }, 16 | { 17 | "name": "diff", 18 | "type": "int" 19 | } 20 | ] 21 | } 22 | 23 | -------------------------------------------------------------------------------- /call-record-aggregator/datamodel/src/main/avro/InvalidRecord.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.carly.data", 3 | "type": "record", 4 | "name": "InvalidRecord", 5 | "fields":[ 6 | { 7 | "name": "record", 8 | "type": "string" 9 | }, 10 | { 11 | "name": "error", 12 | "type": "string" 13 | } 14 | ] 15 | } 16 | 17 | -------------------------------------------------------------------------------- /spark-sensors/.gitignore: -------------------------------------------------------------------------------- 1 | etc/bootstrap-local.conf 2 | 3 | .class 4 | *.log 5 | .history 6 | 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | project/activator-sbt* 13 | 14 | .env 15 | 16 | **/values.sbt 17 | 18 | # IntelliJ 19 | .idea/ 20 | *.iml 21 | *.iws 22 | 23 | # Mac 24 | .DS_Store 25 | 26 | # vim swap files 27 | *.swp 28 | .*.swp 29 | 30 | target-env.sbt 31 | -------------------------------------------------------------------------------- /sensor-data-java/.gitignore: -------------------------------------------------------------------------------- 1 | etc/bootstrap-local.conf 2 | 3 | .class 4 | *.log 5 | .history 6 | 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | project/activator-sbt* 13 | 14 | .env 15 | 16 | **/values.sbt 17 | 18 | # IntelliJ 19 | .idea/ 20 | *.iml 21 | *.iws 22 | 23 | # Mac 24 | .DS_Store 25 | 26 | # vim swap files 27 | *.swp 28 | .*.swp 29 | 30 | target-env.sbt 31 | -------------------------------------------------------------------------------- /sensor-data-scala/.gitignore: -------------------------------------------------------------------------------- 1 | etc/bootstrap-local.conf 2 | 3 | .class 4 | *.log 5 | .history 6 | 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | project/activator-sbt* 13 | 14 | .env 15 | 16 | **/values.sbt 17 | 18 | # IntelliJ 19 | .idea/ 20 | *.iml 21 | *.iws 22 | 23 | # Mac 24 | .DS_Store 25 | 26 | # vim swap files 27 | *.swp 28 | .*.swp 29 | 30 | target-env.sbt 31 | -------------------------------------------------------------------------------- /call-record-aggregator/.gitignore: -------------------------------------------------------------------------------- 1 | etc/bootstrap-local.conf 2 | 3 | .class 4 | *.log 5 | .history 6 | 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | project/activator-sbt* 13 | 14 | .env 15 | 16 | **/values.sbt 17 | 18 | # IntelliJ 19 | .idea/ 20 | *.iml 21 | *.iws 22 | 23 | # Mac 24 | .DS_Store 25 | 26 | # vim swap files 27 | *.swp 28 | .*.swp 29 | 30 | target-env.sbt 31 | -------------------------------------------------------------------------------- /spark-resilience-test/.gitignore: -------------------------------------------------------------------------------- 1 | etc/bootstrap-local.conf 2 | 3 | .class 4 | *.log 5 | .history 6 | 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | project/activator-sbt* 13 | 14 | .env 15 | 16 | **/values.sbt 17 | 18 | # IntelliJ 19 | .idea/ 20 | *.iml 21 | *.iws 22 | 23 | # Mac 24 | .DS_Store 25 | 26 | # vim swap files 27 | *.swp 28 | .*.swp 29 | 30 | target-env.sbt 31 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/avro/Measurements.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "Measurements", 5 | "fields":[ 6 | { 7 | "name": "power", "type": "double" 8 | }, 9 | { 10 | "name": "rotorSpeed", "type": "double" 11 | }, 12 | { 13 | "name": "windSpeed", "type": "double" 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/avro/Measurements.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "Measurements", 5 | "fields":[ 6 | { 7 | "name": "power", "type": "double" 8 | }, 9 | { 10 | "name": "rotorSpeed", "type": "double" 11 | }, 12 | { 13 | "name": "windSpeed", "type": "double" 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /spark-sensors/src/main/avro/agg.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.example", 3 | "type": "record", 4 | "name": "Agg", 5 | "fields":[ 6 | { 7 | "name": "src", 8 | "type": "string" 9 | }, 10 | { 11 | "name": "gauge", 12 | "type": "string" 13 | }, 14 | { 15 | "name": "value", 16 | "type": "double" 17 | } 18 | ] 19 | } 20 | 21 | -------------------------------------------------------------------------------- /flink-taxi-ride/test-data/send-data-small.sh: -------------------------------------------------------------------------------- 1 | for str in $(cat nycTaxiRides-small.json) 2 | do 3 | echo "Using $str" 4 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-ride -H "Content-Type: application/json" --data "$str" 5 | done 6 | 7 | for str in $(cat nycTaxiFares-small.json) 8 | do 9 | echo "Using $str" 10 | curl -i -X POST taxi-ride-fare.apps.purplehat.lightbend.com/taxi-fare -H "Content-Type: application/json" --data "$str" 11 | done 12 | -------------------------------------------------------------------------------- /spark-resilience-test/src/main/avro/data.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.example", 3 | "type": "record", 4 | "name": "Data", 5 | "fields":[ 6 | { 7 | "name": "timestamp", 8 | "type": "long" 9 | }, 10 | { 11 | "name": "key", 12 | "type": "long" 13 | }, 14 | { 15 | "name": "value", 16 | "type": "long" 17 | } 18 | ] 19 | } 20 | 21 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/avro/SensorData.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "SensorData", 5 | "fields":[ 6 | { 7 | "name": "deviceId", "type": "string" 8 | }, 9 | { 10 | "name": "timestamp", "type": "long" 11 | }, 12 | { 13 | "name": "measurements", "type": "pipelines.examples.sensordata.Measurements" 14 | } 15 | ] 16 | } 17 | 18 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/wip/ProductLogger.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import pipelines.streamlets.avro._ 4 | import pipelines.akkastream.scaladsl._ 5 | import akka.actor.ActorSystem 6 | import warez.dsl._ 7 | 8 | object ProductLogger extends FlowEgress[Product](AvroInlet[Product]("in")) { 9 | def flowWithContext(system: ActorSystem) = 10 | FlowWithOffsetContext[Product].map { product ⇒ 11 | system.log.warning(s"Product! $product") 12 | product 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | etc/bootstrap-local.conf 2 | 3 | .class 4 | *.log 5 | .history 6 | 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | project/activator-sbt* 13 | 14 | .env 15 | 16 | **/values.sbt 17 | 18 | # IntelliJ 19 | .idea/ 20 | *.iml 21 | *.iws 22 | 23 | # Mac 24 | .DS_Store 25 | 26 | # vim swap files 27 | *.swp 28 | .*.swp 29 | .metals/ 30 | .vscode/ 31 | *.code-workspace 32 | */.bloop/ 33 | */project/.bloop/ 34 | .gitignore 35 | target-env.sbt 36 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | sensor-data = pipelines.examples.sensordata.SensorDataIngress 4 | filter = pipelines.examples.sensordata.FilterStreamlet 5 | metrics = pipelines.examples.sensordata.SensorDataToMetrics 6 | validation = pipelines.examples.sensordata.MetricsValidation 7 | } 8 | 9 | connections { 10 | sensor-data.out = [metrics.in] 11 | metrics.out = [filter.in] 12 | filter.out = [validation.in] 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /warez/data/generate-stock-update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | ########## 4 | # Generate a Json Document representing a stock update. 5 | ########## 6 | 7 | #set -x 8 | 9 | ROOTDIR=$(cd $(dirname $0); pwd) 10 | 11 | uuid=$(shuf -n 1 "${ROOTDIR}/values/uuids.txt") 12 | if [ $((RANDOM%2)) -eq 0 ] 13 | then 14 | sku_suffix="aa" 15 | else 16 | sku_suffix="bb" 17 | fi 18 | 19 | cat << EOF 20 | { 21 | "productId": "$uuid", 22 | "skuId": "${uuid%..}$sku_suffix", 23 | "diff": $(((RANDOM%21)-10)) 24 | } 25 | EOF 26 | 27 | -------------------------------------------------------------------------------- /flink-taxi-ride/taxi-ride-pipeline/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | taxi-ride = pipelines.examples.ingestor.TaxiRideIngress 4 | taxi-fare = pipelines.examples.ingestor.TaxiFareIngress 5 | processor = pipelines.examples.processor.TaxiRideProcessor 6 | logger = pipelines.examples.logger.FarePerRideLogger 7 | } 8 | connections { 9 | taxi-ride.out = [processor.in-taxiride] 10 | taxi-fare.out = [processor.in-taxifare] 11 | processor.out = [logger.in] 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/avro/Metric.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "Metric", 5 | "fields":[ 6 | { 7 | "name": "deviceId", "type": "string" 8 | }, 9 | { 10 | "name": "timestamp", "type": "long" 11 | }, 12 | { 13 | "name": "name", "type": "string" 14 | }, 15 | { 16 | "name": "value", "type": "double" 17 | } 18 | ] 19 | } 20 | 21 | -------------------------------------------------------------------------------- /warez/data/generate-price-update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | ########## 4 | # Generate a Json Document representing a price update. 5 | ########## 6 | 7 | #set -x 8 | 9 | ROOTDIR=$(cd $(dirname $0); pwd) 10 | 11 | uuid=$(shuf -n 1 "${ROOTDIR}/values/uuids.txt") 12 | if [ $((RANDOM%2)) -eq 0 ] 13 | then 14 | sku_suffix="aa" 15 | else 16 | sku_suffix="bb" 17 | fi 18 | 19 | cat << EOF 20 | { 21 | "productId": "$uuid", 22 | "skuId": "${uuid%..}$sku_suffix", 23 | "price": $(((RANDOM%1999) + 1)) 24 | } 25 | EOF 26 | 27 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/wip/PriceUpdateLogger.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import pipelines.streamlets.avro._ 4 | import pipelines.akkastream.scaladsl._ 5 | import akka.actor.ActorSystem 6 | import warez.dsl._ 7 | 8 | object PriceUpdateLogger extends FlowEgress[PriceUpdate](AvroInlet[PriceUpdate]("in")) { 9 | def flowWithContext(system: ActorSystem) = 10 | FlowWithOffsetContext[PriceUpdate].map { priceUpdate ⇒ 11 | system.log.warning(s"Price Update! $priceUpdate") 12 | priceUpdate 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/wip/StockUpdateLogger.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import pipelines.streamlets.avro._ 4 | import pipelines.akkastream.scaladsl._ 5 | import akka.actor.ActorSystem 6 | import warez.dsl._ 7 | 8 | object StockUpdateLogger extends FlowEgress[StockUpdate](AvroInlet[StockUpdate]("in")) { 9 | def flowWithContext(system: ActorSystem) = 10 | FlowWithOffsetContext[StockUpdate].map { stockUpdate ⇒ 11 | system.log.warning(s"Stock Update! $stockUpdate") 12 | stockUpdate 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/blueprint/akka-spark-single-processor.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | ingress = pipelines.example.SparkRandomGenDataIngress 4 | 5 | spark-process1 = pipelines.example.IdentitySparkProcessor1 6 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1 7 | 8 | egress = pipelines.example.SparkConsoleEgress 9 | } 10 | connections { 11 | ingress.out = [spark-process1.in, akka-process1.in] 12 | 13 | spark-process1.out = [egress.in1] 14 | akka-process1.out = [egress.in2] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/warez/JsonFormats.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import spray.json._ 4 | 5 | object JsonFormats extends DefaultJsonProtocol { 6 | 7 | implicit val skuFormat: JsonFormat[Sku] = jsonFormat4(Sku.apply) 8 | 9 | implicit val priceUpdateFormat = jsonFormat3(PriceUpdate.apply) 10 | 11 | implicit val stockUpdateFormat = jsonFormat3(StockUpdate.apply) 12 | 13 | implicit val productFormat = jsonFormat5(Product.apply) 14 | 15 | implicit val recommenderModelFormat = jsonFormat4(RecommenderModel.apply) 16 | } 17 | 18 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/dsl/HttpIngress.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | package dsl 3 | 4 | import akka.http.scaladsl.unmarshalling._ 5 | 6 | import pipelines.streamlets._ 7 | import pipelines.akkastream._ 8 | import pipelines.akkastream.util.scaladsl.HttpServerLogic 9 | 10 | abstract class HttpIngress[Out: FromByteStringUnmarshaller](val out: CodecOutlet[Out]) 11 | extends AkkaServerStreamlet { 12 | 13 | final override val shape = StreamletShape.withOutlets(out) 14 | 15 | override final def createLogic = HttpServerLogic.default(this, out) 16 | } 17 | -------------------------------------------------------------------------------- /call-record-aggregator/send_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DEFAULT_DATASET="./datamodel/data/data-sample-20.json" 4 | if [ "$1" == "" ] 5 | then 6 | RESOURCE=$DEFAULT_DATASET 7 | else 8 | RESOURCE="$1" 9 | fi 10 | 11 | echo "Using $RESOURCE" 12 | 13 | ROUTE_HOST=$(kubectl pipelines status call-record-pipeline | grep /cdr-ingress | awk '{print $2}') 14 | 15 | for str in $( cat $RESOURCE ); do 16 | echo Sending $str 17 | curl -i \ 18 | -X POST $ROUTE_HOST \ 19 | -u assassin:4554551n \ 20 | -H "Content-Type: application/json" \ 21 | --data "$str" 22 | done -------------------------------------------------------------------------------- /spark-sensors/src/main/avro/data.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.example", 3 | "type": "record", 4 | "name": "Data", 5 | "fields":[ 6 | { 7 | "name": "src", 8 | "type": "string" 9 | }, 10 | { 11 | "name": "timestamp", 12 | "type": "long" 13 | }, 14 | { 15 | "name": "gauge", 16 | "type": "string" 17 | }, 18 | { 19 | "name": "value", 20 | "type": "double" 21 | } 22 | ] 23 | } 24 | 25 | -------------------------------------------------------------------------------- /warez/blueprint/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | products = warez.ProductIngress 4 | stock-updates = warez.StockUpdateIngress 5 | price-updates = warez.PriceUpdateIngress 6 | products-search = warez.ProductSearchApiEgress 7 | product-joiner = pipelines.example.warez.SparkProductJoiner 8 | } 9 | connections { 10 | products.out = [product-joiner.in-0] 11 | stock-updates.out = [product-joiner.in-1] 12 | price-updates.out = [product-joiner.in-2] 13 | product-joiner.out = [products-search.in] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/wrk-04-moderate-breeze.lua: -------------------------------------------------------------------------------- 1 | function read_txt_file(path) 2 | local file, errorMessage = io.open(path, "r") 3 | if not file then 4 | error("Could not read the file:" .. errorMessage .. "\n") 5 | end 6 | 7 | local content = file:read "*all" 8 | file:close() 9 | return content 10 | end 11 | 12 | init = function(args) 13 | local FileBody = read_txt_file("04-moderate-breeze.json") 14 | 15 | wrk.method = "POST" 16 | wrk.headers["Content-Type"] = "application/json" 17 | wrk.headers["Connection"] = "Keep-Alive" 18 | wrk.body = FileBody 19 | 20 | end 21 | -------------------------------------------------------------------------------- /sensor-data-scala/test-data/wrk-04-moderate-breeze.lua: -------------------------------------------------------------------------------- 1 | function read_txt_file(path) 2 | local file, errorMessage = io.open(path, "r") 3 | if not file then 4 | error("Could not read the file:" .. errorMessage .. "\n") 5 | end 6 | 7 | local content = file:read "*all" 8 | file:close() 9 | return content 10 | end 11 | 12 | init = function(args) 13 | local FileBody = read_txt_file("04-moderate-breeze.json") 14 | 15 | wrk.method = "POST" 16 | wrk.headers["Content-Type"] = "application/json" 17 | wrk.headers["Connection"] = "Keep-Alive" 18 | wrk.body = FileBody 19 | 20 | end 21 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/warez/RecommenderModelIngress.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 4 | import pipelines.streamlets.avro._ 5 | import JsonFormats._ 6 | import warez.dsl._ 7 | 8 | /** 9 | * Ingress that reads the recommender model in base64 string format. We assume that the model 10 | * file is transferred after converting to base64. This should be the start of the model serving 11 | * pipeline. 12 | */ 13 | class RecommenderModelIngress extends HttpIngress[RecommenderModel](AvroOutlet[RecommenderModel]("out", _.modelId.toString)) 14 | 15 | -------------------------------------------------------------------------------- /warez/datamodel/src/main/avro/RecommenderModel.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "warez", 3 | 4 | "type": "record", 5 | "name": "RecommenderModel", 6 | 7 | "fields": [ 8 | { 9 | "name": "modelId", 10 | "type": "string" 11 | }, 12 | { 13 | "name": "tensorFlowModel", 14 | "type": "bytes" 15 | }, 16 | { 17 | "name": "productMap", 18 | "type": { 19 | "type": "map", 20 | "values": "int" 21 | } 22 | }, 23 | { 24 | "name": "customerMap", 25 | "type": { 26 | "type": "map", 27 | "values": "int" 28 | } 29 | } 30 | ] 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /warez/ml-training/avro/RecommenderModel.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "warez", 3 | 4 | "type": "record", 5 | "name": "RecommenderModel", 6 | 7 | "fields": [ 8 | { 9 | "name": "modelId", 10 | "type": "string" 11 | }, 12 | { 13 | "name": "tensorFlowModel", 14 | "type": "bytes" 15 | }, 16 | { 17 | "name": "productMap", 18 | "type": { 19 | "type": "map", 20 | "values": "int" 21 | } 22 | }, 23 | { 24 | "name": "customerMap", 25 | "type": { 26 | "type": "map", 27 | "values": "int" 28 | } 29 | } 30 | ] 31 | } 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /flink-taxi-ride/test-data/nycTaxiFares-small.json: -------------------------------------------------------------------------------- 1 | {"driverId":2013000006,"paymentType":"CSH","rideId":6,"startTime":1356998400000,"taxiId":2013000006,"tip":0.0,"tolls":4.800000190734863,"totalFare":34.29999923706055} 2 | {"driverId":2013000011,"paymentType":"CRD","rideId":11,"startTime":1356998400000,"taxiId":2013000011,"tip":4.699999809265137,"tolls":0.0,"totalFare":28.700000762939453} 3 | {"driverId":2013000031,"paymentType":"CSH","rideId":31,"startTime":1356998400000,"taxiId":2013000031,"tip":0.0,"tolls":0.0,"totalFare":20.5} 4 | {"driverId":2013000055,"paymentType":"CSH","rideId":55,"startTime":1356998400000,"taxiId":2013000055,"tip":0.0,"tolls":0.0,"totalFare":26.5} 5 | -------------------------------------------------------------------------------- /call-record-aggregator/datamodel/src/main/avro/AggregatedCallStats.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.carly.data", 3 | "type": "record", 4 | "name": "AggregatedCallStats", 5 | "fields":[ 6 | { 7 | "name": "startTime", 8 | "type": "long" 9 | }, 10 | { 11 | "name": "windowDuration", 12 | "type": "long" 13 | }, 14 | { 15 | "name": "avgCallDuration", 16 | "type": "double" 17 | }, 18 | { 19 | "name": "totalCallDuration", 20 | "type": "long" 21 | } 22 | ] 23 | } 24 | 25 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataHttpIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 4 | import pipelines.akkastream._ 5 | import pipelines.akkastream.util.scaladsl._ 6 | 7 | import pipelines.streamlets._ 8 | import pipelines.streamlets.avro._ 9 | import SensorDataJsonSupport._ 10 | 11 | class SensorDataHttpIngress extends AkkaServerStreamlet { 12 | val out = AvroOutlet[SensorData]("out").withPartitioner(RoundRobinPartitioner) 13 | def shape = StreamletShape.withOutlets(out) 14 | override def createLogic = HttpServerLogic.default(this, out) 15 | } 16 | 17 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataMerge.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import pipelines.streamlets._ 4 | import pipelines.streamlets.avro._ 5 | import pipelines.akkastream._ 6 | import pipelines.akkastream.util.scaladsl.MergeLogic 7 | 8 | class SensorDataMerge extends AkkaStreamlet { 9 | val in0 = AvroInlet[SensorData]("in-0") 10 | val in1 = AvroInlet[SensorData]("in-1") 11 | val out = AvroOutlet[SensorData]("out", _.deviceId.toString) 12 | 13 | final override val shape = StreamletShape.withInlets(in0, in1).withOutlets(out) 14 | final override def createLogic = new MergeLogic(Vector(in0, in1), out) 15 | } 16 | -------------------------------------------------------------------------------- /warez/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /flink-taxi-ride/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /mixed-sensors/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /sensor-data-java/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /sensor-data-scala/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /spark-sensors/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /call-record-aggregator/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /spark-resilience-test/project/pipelines-plugins.sbt: -------------------------------------------------------------------------------- 1 | // Resolver for the pipelines-sbt plugin 2 | // 3 | // NOTE: Lightbend Commercial repository! 4 | // Please add your Lightbend Commercial download credentials to the global SBT config. 5 | // 6 | // Refer to https://github.com/lightbend/pipelines-docs/blob/master/user-guide/getting-started.md 7 | // for details on how to setup your Lightbend Commercial download credentials. 8 | // 9 | resolvers += Resolver.url("lightbend-commercial", url("https://repo.lightbend.com/commercial-releases"))(Resolver.ivyStylePatterns) 10 | resolvers += "Akka Snapshots" at "https://repo.akka.io/snapshots/" 11 | 12 | addSbtPlugin("com.lightbend.pipelines" % "sbt-pipelines" % "1.2.2") 13 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/dsl/FlowEgress.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | package dsl 3 | 4 | import akka.actor.ActorSystem 5 | 6 | import pipelines.streamlets._ 7 | import pipelines.akkastream._ 8 | import pipelines.akkastream.scaladsl._ 9 | 10 | abstract class FlowEgress[In](val in: CodecInlet[In]) 11 | extends AkkaStreamlet { 12 | 13 | final override val shape = StreamletShape.withInlets(in) 14 | def flowWithContext(system: ActorSystem): FlowWithOffsetContext[In, In] 15 | 16 | override def createLogic = new RunnableGraphStreamletLogic { 17 | def runnableGraph = 18 | sourceWithOffsetContext(in) 19 | .via(flowWithContext(system)) 20 | .to(sinkWithOffsetContext) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /call-record-aggregator/datamodel/src/main/avro/CallRecord.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.carly.data", 3 | "type": "record", 4 | "name": "CallRecord", 5 | "fields":[ 6 | { 7 | "name": "user", 8 | "type": "string" 9 | }, 10 | { 11 | "name": "other", 12 | "type": "string" 13 | }, 14 | { 15 | "name": "direction", 16 | "type": "string" 17 | }, 18 | { 19 | "name": "duration", 20 | "type": "long" 21 | }, 22 | { 23 | "name": "timestamp", 24 | "type": "long" 25 | } 26 | ] 27 | } 28 | 29 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/avro/SensorData.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "SensorData", 5 | "fields":[ 6 | { 7 | "name": "deviceId", 8 | "type": { 9 | "type": "string", 10 | "logicalType": "uuid" 11 | } 12 | }, 13 | { 14 | "name": "timestamp", 15 | "type": { 16 | "type": "long", 17 | "logicalType": "timestamp-millis" 18 | } 19 | }, 20 | { 21 | "name": "measurements", "type": "pipelines.examples.sensordata.Measurements" 22 | } 23 | ] 24 | } 25 | 26 | -------------------------------------------------------------------------------- /warez/test-data/product-black-hole.json: -------------------------------------------------------------------------------- 1 | { 2 | "id":"123456789", 3 | "name":"Acme Portable Hole", 4 | "description":"A cartoon hole that can be applied to any surface. https://www.youtube.com/watch?v=znzkdE-QQp0", 5 | "keywords":[ 6 | "black", 7 | "hole", 8 | "gag", 9 | "plot device", 10 | "roger rabbit" 11 | ], 12 | "skus":[ 13 | { 14 | "id":"1", 15 | "name":"Small Hole", 16 | "stock":10, 17 | "price":5 18 | }, 19 | { 20 | "id":"2", 21 | "name":"Medium Hole", 22 | "stock":10, 23 | "price":10 24 | }, 25 | { 26 | "id":"3", 27 | "name":"Large Hole", 28 | "stock":15, 29 | "price":20 30 | } 31 | ] 32 | } -------------------------------------------------------------------------------- /warez/test-data/product-singing-sword.json: -------------------------------------------------------------------------------- 1 | { 2 | "id":"912345678", 3 | "name":"Acme Singing Sword", 4 | "description":"A cartoon singing sword that belts out show tunes. https://www.youtube.com/watch?v=6u8wBfDtZkE", 5 | "keywords":[ 6 | "sword", 7 | "sings", 8 | "roger rabbit", 9 | "ineffective", 10 | "weapon" 11 | ], 12 | "skus":[ 13 | { 14 | "id":"5", 15 | "name":"Tenor", 16 | "stock":50, 17 | "price":10 18 | }, 19 | { 20 | "id":"6", 21 | "name":"Baritone", 22 | "stock":5, 23 | "price":25 24 | }, 25 | { 26 | "id":"7", 27 | "name":"Bass", 28 | "stock":12, 29 | "price":50 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /flink-taxi-ride/ingestor/src/main/scala/pipelines/examples/ingestor/TaxiFareIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples 2 | package ingestor 3 | 4 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 5 | 6 | import pipelines.streamlets.avro._ 7 | import pipelines.streamlets._ 8 | import pipelines.akkastream._ 9 | import pipelines.flink.avro._ 10 | import TaxiFareJsonProtocol._ 11 | import pipelines.akkastream.util.scaladsl.HttpServerLogic 12 | 13 | class TaxiFareIngress extends AkkaServerStreamlet { 14 | val out = AvroOutlet[TaxiFare]("out", _.rideId.toString) 15 | 16 | final override val shape = StreamletShape.withOutlets(out) 17 | final override def createLogic = HttpServerLogic.default(this, out) 18 | } 19 | -------------------------------------------------------------------------------- /flink-taxi-ride/ingestor/src/main/scala/pipelines/examples/ingestor/TaxiRideIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples 2 | package ingestor 3 | 4 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 5 | 6 | import pipelines.streamlets.avro._ 7 | import pipelines.streamlets._ 8 | import pipelines.akkastream._ 9 | import pipelines.flink.avro._ 10 | import TaxiRideJsonProtocol._ 11 | import pipelines.akkastream.util.scaladsl.HttpServerLogic 12 | 13 | class TaxiRideIngress extends AkkaServerStreamlet { 14 | val out = AvroOutlet[TaxiRide]("out", _.rideId.toString) 15 | 16 | final override val shape = StreamletShape.withOutlets(out) 17 | final override def createLogic = HttpServerLogic.default(this, out) 18 | } 19 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/avro/Metric.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.examples.sensordata", 3 | "type": "record", 4 | "name": "Metric", 5 | "fields":[ 6 | { 7 | "name": "deviceId", 8 | "type": { 9 | "type": "string", 10 | "logicalType": "uuid" 11 | } 12 | }, 13 | { 14 | "name": "timestamp", 15 | "type": { 16 | "type": "long", 17 | "logicalType": "timestamp-millis" 18 | } 19 | }, 20 | { 21 | "name": "name", "type": "string" 22 | }, 23 | { 24 | "name": "value", "type": "double" 25 | } 26 | ] 27 | } 28 | 29 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/RotorSpeedFilter.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl._ 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class RotorSpeedFilter extends AkkaStreamlet { 9 | val in = AvroInlet[Metric]("in") 10 | val out = AvroOutlet[Metric]("out").withPartitioner(RoundRobinPartitioner) 11 | val shape = StreamletShape(in, out) 12 | 13 | override def createLogic = new RunnableGraphStreamletLogic() { 14 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out)) 15 | def flow = FlowWithOffsetContext[Metric].filter(_.name == "rotorSpeed") 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/blueprint/t0-t1-blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | ingress = pipelines.example.SparkRandomGenDataIngress 4 | 5 | spark-process1 = pipelines.example.IdentitySparkProcessor1 6 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1 7 | 8 | spark-process2 = pipelines.example.IdentitySparkProcessor2 9 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2 10 | 11 | egress = pipelines.example.SparkConsoleEgress 12 | } 13 | connections { 14 | ingress.out = [spark-process1.in, akka-process1.in] 15 | spark-process1.out = [spark-process2.in] 16 | akka-process1.out = [akka-process2.in] 17 | spark-process2.out = [egress.in1] 18 | akka-process2.out = [egress.in2] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordMerge.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.ingestor 2 | 3 | import pipelines.streamlets._ 4 | import pipelines.streamlets.avro._ 5 | import pipelines.akkastream._ 6 | import pipelines.akkastream.util.scaladsl.MergeLogic 7 | 8 | import pipelines.examples.carly.data._ 9 | 10 | class CallRecordMerge extends AkkaStreamlet { 11 | val in0 = AvroInlet[CallRecord]("in-0") 12 | val in1 = AvroInlet[CallRecord]("in-1") 13 | val in2 = AvroInlet[CallRecord]("in-2") 14 | val out = AvroOutlet[CallRecord]("out", _.user) 15 | final override val shape = StreamletShape.withInlets(in0, in1, in2).withOutlets(out) 16 | final override def createLogic = new MergeLogic(Vector(in0, in1, in2), out) 17 | } 18 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/IdentityAkkaStreamsProcessor0.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl.{ FlowWithOffsetContext, RunnableGraphStreamletLogic } 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class IdentityAkkaStreamsProcessor0 extends AkkaStreamlet { 9 | val in = AvroInlet[Data]("in") 10 | val out = AvroOutlet[Data]("out", _.src) 11 | 12 | val shape = StreamletShape(in).withOutlets(out) 13 | 14 | override def createLogic = new RunnableGraphStreamletLogic() { 15 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out)) 16 | def flow = FlowWithOffsetContext[Data].map { d ⇒ Thread.sleep(100); d } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/IdentityAkkaStreamsProcessor1.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl.{ FlowWithOffsetContext, RunnableGraphStreamletLogic } 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class IdentityAkkaStreamsProcessor1 extends AkkaStreamlet { 9 | val in = AvroInlet[Data]("in") 10 | val out = AvroOutlet[Data]("out", _.src) 11 | 12 | val shape = StreamletShape(in).withOutlets(out) 13 | 14 | override def createLogic = new RunnableGraphStreamletLogic() { 15 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out)) 16 | def flow = FlowWithOffsetContext[Data].map(d ⇒ d.copy(t1 = TimeOps.nowAsOption)) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/IdentityAkkaStreamsProcessor2.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl.{ FlowWithOffsetContext, RunnableGraphStreamletLogic } 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class IdentityAkkaStreamsProcessor2 extends AkkaStreamlet { 9 | val in = AvroInlet[Data]("in") 10 | val out = AvroOutlet[Data]("out", _.src) 11 | 12 | val shape = StreamletShape(in).withOutlets(out) 13 | 14 | override def createLogic = new RunnableGraphStreamletLogic() { 15 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out)) 16 | def flow = FlowWithOffsetContext[Data].map(d ⇒ d.copy(t2 = TimeOps.nowAsOption)) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/IdentitySparkProcessor0.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import org.apache.spark.sql.streaming.OutputMode 4 | 5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } 6 | import pipelines.spark.sql.SQLImplicits._ 7 | import pipelines.streamlets.StreamletShape 8 | import pipelines.streamlets.avro._ 9 | 10 | class IdentitySparkProcessor0 extends SparkStreamlet { 11 | 12 | val in = AvroInlet[Data]("in") 13 | val out = AvroOutlet[Data]("out", _.src) 14 | val shape = StreamletShape(in, out) 15 | 16 | override def createLogic() = new SparkStreamletLogic { 17 | override def buildStreamingQueries = { 18 | writeStream(readStream(in).map { d ⇒ Thread.sleep(200); d }, out, OutputMode.Append).toQueryExecution 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/IdentitySparkProcessor2.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import org.apache.spark.sql.streaming.OutputMode 4 | 5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } 6 | import pipelines.spark.sql.SQLImplicits._ 7 | import pipelines.streamlets.StreamletShape 8 | import pipelines.streamlets.avro._ 9 | 10 | class IdentitySparkProcessor2 extends SparkStreamlet { 11 | 12 | val in = AvroInlet[Data]("in") 13 | val out = AvroOutlet[Data]("out", _.src) 14 | val shape = StreamletShape(in, out) 15 | 16 | override def createLogic() = new SparkStreamletLogic { 17 | override def buildStreamingQueries = { 18 | writeStream(readStream(in).map(d ⇒ d.copy(t2 = TimeOps.nowAsOption)), out, OutputMode.Append).toQueryExecution 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/avro/data.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.example", 3 | "type": "record", 4 | "name": "Data", 5 | "fields":[ 6 | { 7 | "name": "src", 8 | "type": "string" 9 | }, 10 | { 11 | "name": "timestamp", 12 | "type": "long" 13 | }, 14 | { 15 | "name": "t1", 16 | "type":["null", "long"], 17 | "default": null 18 | }, 19 | { 20 | "name": "t2", 21 | "type":["null", "long"], 22 | "default": null 23 | }, 24 | { 25 | "name": "gauge", 26 | "type": "string" 27 | }, 28 | { 29 | "name": "value", 30 | "type": "double" 31 | } 32 | ] 33 | } 34 | 35 | -------------------------------------------------------------------------------- /flink-taxi-ride/datamodel/src/main/avro/taxifare.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.flink.avro", 3 | "type": "record", 4 | "name": "TaxiFare", 5 | "fields":[ 6 | { 7 | "name": "rideId", "type": "long" 8 | }, 9 | { 10 | "name": "taxiId", "type": "long" 11 | }, 12 | { 13 | "name": "paymentType", "type": "string" 14 | }, 15 | { 16 | "name": "driverId", "type": "long" 17 | }, 18 | { 19 | "name": "startTime", "type": "long" 20 | }, 21 | { 22 | "name": "tip", "type": "float" 23 | }, 24 | { 25 | "name": "tolls", "type": "float" 26 | }, 27 | { 28 | "name": "totalFare", "type": "float" 29 | } 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/InvalidMetricLogger.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl._ 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class InvalidMetricLogger extends AkkaStreamlet { 9 | val inlet = AvroInlet[InvalidMetric]("in") 10 | val shape = StreamletShape.withInlets(inlet) 11 | 12 | override def createLogic = new RunnableGraphStreamletLogic() { 13 | val flow = FlowWithOffsetContext[InvalidMetric] 14 | .map { invalidMetric ⇒ 15 | system.log.warning(s"Invalid metric detected! $invalidMetric") 16 | invalidMetric 17 | } 18 | 19 | def runnableGraph = { 20 | sourceWithOffsetContext(inlet).via(flow).to(sinkWithOffsetContext) 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.ingestor 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 4 | 5 | import JsonCallRecord._ 6 | import pipelines.streamlets.avro._ 7 | import pipelines.examples.carly.data._ 8 | import pipelines.streamlets._ 9 | import pipelines.akkastream._ 10 | import pipelines.akkastream.util.scaladsl.HttpServerLogic 11 | 12 | class CallRecordIngress extends AkkaServerStreamlet { 13 | 14 | //tag::docs-outlet-partitioner-example[] 15 | val out = AvroOutlet[CallRecord]("out").withPartitioner(RoundRobinPartitioner) 16 | //end::docs-outlet-partitioner-example[] 17 | 18 | final override val shape = StreamletShape.withOutlets(out) 19 | final override def createLogic = HttpServerLogic.default(this, out) 20 | } 21 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataStreamingIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import akka.http.scaladsl.common.EntityStreamingSupport 4 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 5 | 6 | import SensorDataJsonSupport._ 7 | import pipelines.akkastream.AkkaServerStreamlet 8 | import pipelines.akkastream.util.scaladsl._ 9 | import pipelines.streamlets.{ RoundRobinPartitioner, StreamletShape } 10 | import pipelines.streamlets.avro._ 11 | 12 | class SensorDataStreamingIngress extends AkkaServerStreamlet { 13 | val out = AvroOutlet[SensorData]("out", RoundRobinPartitioner) 14 | def shape = StreamletShape.withOutlets(out) 15 | 16 | implicit val entityStreamingSupport = EntityStreamingSupport.json() 17 | override def createLogic = HttpServerLogic.defaultStreaming(this, out) 18 | } 19 | -------------------------------------------------------------------------------- /sensor-data-java/load-data-into-pvc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script loads the test file containing device ids that should be filtered out by the pipeline. 4 | # For this to work the application has to be deployed and all pods need to have entered `running` state 5 | 6 | streamletName="filter" 7 | podName=$(kubectl get pods -n sensor-data-java -l com.lightbend.pipelines/streamlet-name=$streamletName --output jsonpath={.items..metadata.name}) 8 | if [ $? -ne 0 ]; then 9 | echo "Could not find the streamlet `$streamletName` which contains the mounted PVC this script will copy the filter file into." 10 | echo "Make sure that the application has been deployed and all pods are running." 11 | exit 1 12 | fi 13 | 14 | echo "Copying files to /mnt/data in pod $podName" 15 | kubectl cp test-data/device-ids.txt -n sensor-data-java $podName:/mnt/data 16 | 17 | echo "Done" 18 | -------------------------------------------------------------------------------- /flink-taxi-ride/ingestor/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, console 2 | 3 | # Uncomment this if you want to _only_ change Flink's logging 4 | #log4j.logger.org.apache.flink=INFO 5 | 6 | # The following lines keep the log level of common libraries/connectors on 7 | # log level INFO. The root logger does not override this. You have to manually 8 | # change the log levels here. 9 | log4j.logger.akka=WARN 10 | log4j.logger.org.apache.kafka=WARN 11 | log4j.logger.org.apache.hadoop=WARN 12 | log4j.logger.org.apache.zookeeper=WARN 13 | 14 | log4j.appender.console=org.apache.log4j.ConsoleAppender 15 | log4j.appender.console.target=System.out 16 | log4j.appender.console.immediateFlush=true 17 | log4j.appender.console.encoding=UTF-8 18 | 19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.console.layout.conversionPattern=%d [%t] %-5p %c - %m%n 21 | -------------------------------------------------------------------------------- /flink-taxi-ride/logger/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, console 2 | 3 | # Uncomment this if you want to _only_ change Flink's logging 4 | #log4j.logger.org.apache.flink=INFO 5 | 6 | # The following lines keep the log level of common libraries/connectors on 7 | # log level INFO. The root logger does not override this. You have to manually 8 | # change the log levels here. 9 | log4j.logger.akka=WARN 10 | log4j.logger.org.apache.kafka=WARN 11 | log4j.logger.org.apache.hadoop=WARN 12 | log4j.logger.org.apache.zookeeper=WARN 13 | 14 | log4j.appender.console=org.apache.log4j.ConsoleAppender 15 | log4j.appender.console.target=System.out 16 | log4j.appender.console.immediateFlush=true 17 | log4j.appender.console.encoding=UTF-8 18 | 19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.console.layout.conversionPattern=%d [%t] %-5p %c - %m%n 21 | -------------------------------------------------------------------------------- /warez/data/generate-product.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | ########## 4 | # Generate a Json Document representing a product. 5 | ########## 6 | 7 | #set -x 8 | 9 | ROOTDIR=$(cd $(dirname $0); pwd) 10 | 11 | uuid=$(shuf -n 1 "${ROOTDIR}/values/uuids.txt") 12 | mapfile -t words < <(shuf -n 12 "${ROOTDIR}/values/5-letters-words.txt") 13 | mapfile -t keywords < <(shuf -n 2 "${ROOTDIR}/values/keywords.txt") 14 | 15 | cat << EOF 16 | { 17 | "id": "$uuid", 18 | "name": "${words[0]}-${words[1]}", 19 | "description": "${words[2]} ${words[3]} ${words[4]}, ${words[5]} ${words[6]}.", 20 | "keywords": [ 21 | "${keywords[0]}", 22 | "${keywords[1]}" 23 | ], 24 | "skus": [ 25 | { 26 | "id": "${uuid%..}aa", 27 | "name": "${words[0]}-${words[1]}-${words[7]}" 28 | }, 29 | { 30 | "id": "${uuid%..}bb", 31 | "name": "${words[0]}-${words[1]}-${words[8]}" 32 | } 33 | ] 34 | } 35 | EOF 36 | 37 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/IdentitySparkProcessor1.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import pipelines.streamlets.StreamletShape 4 | 5 | import pipelines.streamlets.avro._ 6 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet } 7 | 8 | import org.apache.spark.sql.Dataset 9 | import org.apache.spark.sql.functions._ 10 | import org.apache.spark.sql.types.TimestampType 11 | import pipelines.spark.sql.SQLImplicits._ 12 | import org.apache.spark.sql.streaming.OutputMode 13 | 14 | class IdentitySparkProcessor1 extends SparkStreamlet { 15 | 16 | val in = AvroInlet[Data]("in") 17 | val out = AvroOutlet[Data]("out", _.src) 18 | val shape = StreamletShape(in, out) 19 | 20 | override def createLogic() = new SparkStreamletLogic { 21 | override def buildStreamingQueries = { 22 | writeStream(readStream(in).map(d ⇒ d.copy(t1 = TimeOps.nowAsOption)), out, OutputMode.Append).toQueryExecution 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /spark-sensors/src/main/scala/pipelines/example/SparkConsoleEgress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import pipelines.streamlets.StreamletShape 4 | 5 | import pipelines.streamlets.avro._ 6 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet } 7 | import pipelines.spark.sql.SQLImplicits._ 8 | import org.apache.spark.sql.streaming.OutputMode 9 | 10 | class SparkConsoleEgress extends SparkStreamlet { 11 | val in = AvroInlet[Agg]("in") 12 | val shape = StreamletShape(in) 13 | 14 | override def createLogic() = new SparkStreamletLogic { 15 | //tag::docs-checkpointDir-example[] 16 | override def buildStreamingQueries = { 17 | readStream(in).writeStream 18 | .format("console") 19 | .option("checkpointLocation", context.checkpointDir("console-egress")) 20 | .outputMode(OutputMode.Append()) 21 | .start() 22 | .toQueryExecution 23 | } 24 | //end::docs-checkpointDir-example[] 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /spark-resilience-test/src/main/scala/README.md: -------------------------------------------------------------------------------- 1 | # Spark Resilience Test 2 | 3 | This example attempts to validate that the spark components in a Pipeline are resilient to failure. 4 | It creates 3 components: 5 | - A data producer ingress, 6 | - A processor, and 7 | - A stateful validator egress. 8 | 9 | The producer creates a monotonically increasing index with a timestamp 10 | 11 | The egress keeps track of the indexes received and detects if any gaps in the stream occur 12 | 13 | The processor is an ephemeral pass-through. Like a suicidal monkey, it kills itself randomly. 14 | If all resilience features are working properly, it must come alive and keep its work where it left, leaving no holes in the data stream. 15 | 16 | Note: We have currently determined that spark-driver pods are not correctly reporting health. 17 | While this gets solved, the suicidal monkey will stay alive. 18 | 19 | 20 | The egress should detect and report if this is not the case. 21 | 22 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordStreamingIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.ingestor 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ 4 | import akka.http.scaladsl.common.EntityStreamingSupport 5 | 6 | import pipelines.streamlets.avro._ 7 | import pipelines.examples.carly.ingestor.JsonCallRecord._ 8 | import pipelines.examples.carly.data._ 9 | import pipelines.streamlets._ 10 | import pipelines.akkastream._ 11 | import pipelines.akkastream.util.scaladsl.HttpServerLogic 12 | 13 | class CallRecordStreamingIngress extends AkkaServerStreamlet { 14 | implicit val entityStreamingSupport = EntityStreamingSupport.json() 15 | 16 | val out = AvroOutlet[CallRecord]("out").withPartitioner(RoundRobinPartitioner) 17 | 18 | final override val shape = StreamletShape.withOutlets(out) 19 | 20 | override final def createLogic = HttpServerLogic.defaultStreaming(this, out) 21 | } 22 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/MetricsValidation.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.util.scaladsl._ 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class MetricsValidation extends AkkaStreamlet { 9 | val in = AvroInlet[Metric]("in") 10 | val invalid = AvroOutlet[InvalidMetric]("invalid").withPartitioner(metric ⇒ metric.metric.deviceId.toString) 11 | val valid = AvroOutlet[Metric]("valid").withPartitioner(RoundRobinPartitioner) 12 | val shape = StreamletShape(in).withOutlets(invalid, valid) 13 | 14 | override def createLogic = new SplitterLogic(in, invalid, valid) { 15 | def flow = flowWithOffsetContext() 16 | .map { metric ⇒ 17 | if (!SensorDataUtils.isValidMetric(metric)) Left(InvalidMetric(metric, "All measurements must be positive numbers!")) 18 | else Right(metric) 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /mixed-sensors/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | // This file contains settings that are specific to a deployment environment. 2 | 3 | /* 4 | * The `pipelinesDockerRegistry` setting specifies the Docker registry 5 | * that the Pipelines sbt plugin uses for pushing application Docker images. 6 | * This registry needs to have been configured as a "pullable" registry on any 7 | * Kubernetes cluster you want to deploy this application to. 8 | * 9 | * The specified Docker registry URL needs to be configured on the cluster that 10 | * the application should be deployed to. There are two common models for setting 11 | * this up: 12 | * 13 | * 1. A central docker registry, used by multiple Kubernetes clusters 14 | * 2. A Docker registry per Kubernetes cluster (common for Openshift) 15 | * 16 | * The sbt plugin expects you to have logged in to the specified registry using 17 | * the `docker login` command. 18 | */ 19 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 20 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | System.out 15 | 16 | %d{ISO8601} %-5level [%logger{0}] - %msg%n 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /flink-taxi-ride/ingestor/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | System.out 15 | 16 | %d{ISO8601} %-5level [%logger{0}] - %msg%n 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /flink-taxi-ride/logger/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | System.out 15 | 16 | %d{ISO8601} %-5level [%logger{0}] - %msg%n 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/blueprint/t0-process-t1-blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | ingress = pipelines.example.SparkRandomGenDataIngress 4 | 5 | spark-process1 = pipelines.example.IdentitySparkProcessor1 6 | spark-process2 = pipelines.example.IdentitySparkProcessor2 7 | spark-process = pipelines.example.IdentitySparkProcessor0 8 | 9 | akka-process = pipelines.example.IdentityAkkaStreamsProcessor0 10 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1 11 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2 12 | 13 | egress = pipelines.example.SparkConsoleEgress 14 | } 15 | connections { 16 | ingress.out = [spark-process1.in, akka-process1.in] 17 | spark-process1.out = [spark-process.in] 18 | akka-process1.out = [akka-process.in] 19 | 20 | spark-process.out = [spark-process2.in] 21 | akka-process.out = [akka-process2.in] 22 | 23 | spark-process2.out = [egress.in1] 24 | akka-process2.out = [egress.in2] 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataIngress.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata; 2 | 3 | import pipelines.akkastream.AkkaServerStreamlet; 4 | 5 | import pipelines.akkastream.StreamletLogic; 6 | import pipelines.akkastream.util.javadsl.HttpServerLogic; 7 | 8 | import pipelines.streamlets.RoundRobinPartitioner; 9 | import pipelines.streamlets.StreamletShape; 10 | import pipelines.streamlets.avro.AvroOutlet; 11 | 12 | import akka.http.javadsl.marshallers.jackson.Jackson; 13 | 14 | public class SensorDataIngress extends AkkaServerStreamlet { 15 | AvroOutlet out = AvroOutlet.create("out", SensorData.class) 16 | .withPartitioner(RoundRobinPartitioner.getInstance()); 17 | 18 | public StreamletShape shape() { 19 | return StreamletShape.createWithOutlets(out); 20 | } 21 | 22 | public StreamletLogic createLogic() { 23 | return HttpServerLogic.createDefault(this, out, Jackson.byteStringUnmarshaller(SensorData.class), getStreamletContext()); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /sensor-data-java/test-data/future-data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 4 | "timestamp": 1495545346279, 5 | "measurements": { 6 | "power": 1.7, 7 | "rotorSpeed": 3.9, 8 | "windSpeed": 25.3 9 | } 10 | }, 11 | { 12 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 13 | "timestamp": 1495545346279, 14 | "measurements": { 15 | "power": -1.7, 16 | "rotorSpeed": 3.9, 17 | "windSpeed": 25.3 18 | } 19 | } 20 | , 21 | { 22 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 23 | "timestamp": 3134584800000, 24 | "measurements": { 25 | "power": 1.7, 26 | "rotorSpeed": 3.9, 27 | "windSpeed": 100.1 28 | } 29 | }, 30 | { 31 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 32 | "timestamp": 3134584800000, 33 | "measurements": { 34 | "power": 1.7, 35 | "rotorSpeed": 3.9, 36 | "windSpeed": 100.1 37 | } 38 | } 39 | ] 40 | 41 | 42 | -------------------------------------------------------------------------------- /sensor-data-scala/test-data/future-data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 4 | "timestamp": 1495545346279, 5 | "measurements": { 6 | "power": 1.7, 7 | "rotorSpeed": 3.9, 8 | "windSpeed": 25.3 9 | } 10 | }, 11 | { 12 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 13 | "timestamp": 1495545346279, 14 | "measurements": { 15 | "power": -1.7, 16 | "rotorSpeed": 3.9, 17 | "windSpeed": 25.3 18 | } 19 | } 20 | , 21 | { 22 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 23 | "timestamp": 3134584800000, 24 | "measurements": { 25 | "power": 1.7, 26 | "rotorSpeed": 3.9, 27 | "windSpeed": 100.1 28 | } 29 | }, 30 | { 31 | "deviceId": "c75cb448-df0e-4692-8e06-0321b7703992", 32 | "timestamp": 3134584800000, 33 | "measurements": { 34 | "power": 1.7, 35 | "rotorSpeed": 3.9, 36 | "windSpeed": 100.1 37 | } 38 | } 39 | ] 40 | 41 | 42 | -------------------------------------------------------------------------------- /warez/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | /* 2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify 3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host) 4 | * that the Pipelines sbt plugin uses for pushing application Docker images. 5 | * 6 | * Example: 7 | * 8 | * pipelinesDockerRegistry := Some("foo.com") 9 | * pipelinesDockerRepository := Some("bar/baz") 10 | * 11 | * This will cause your application Docker images to be pushed as: 12 | * 13 | * `foo.com/bar/baz/[image name]:[tag]` 14 | * 15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.: 16 | * 17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com") 18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz") 19 | * 20 | * The sbt plugin expects you to have logged in to the specified registry using 21 | * the `docker login` command. 22 | */ 23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY") 25 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/RotorspeedWindowLogger.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl._ 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class RotorspeedWindowLogger extends AkkaStreamlet { 9 | val in = AvroInlet[Metric]("in") 10 | val shape = StreamletShape(in) 11 | override def createLogic = new RunnableGraphStreamletLogic() { 12 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext) 13 | def flow = { 14 | FlowWithOffsetContext[Metric] 15 | .grouped(5) 16 | .map { rotorSpeedWindow ⇒ 17 | val (avg, _) = rotorSpeedWindow.map(_.value).foldLeft((0.0, 1)) { case ((avg, idx), next) ⇒ (avg + (next - avg) / idx, idx + 1) } 18 | 19 | system.log.info(s"Average rotorspeed is: $avg") 20 | 21 | avg 22 | } 23 | .mapContext(_.last) // TODO: this is a tricky one to understand... 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /flink-taxi-ride/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | /* 2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify 3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host) 4 | * that the Pipelines sbt plugin uses for pushing application Docker images. 5 | * 6 | * Example: 7 | * 8 | * pipelinesDockerRegistry := Some("foo.com") 9 | * pipelinesDockerRepository := Some("bar/baz") 10 | * 11 | * This will cause your application Docker images to be pushed as: 12 | * 13 | * `foo.com/bar/baz/[image name]:[tag]` 14 | * 15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.: 16 | * 17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com") 18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz") 19 | * 20 | * The sbt plugin expects you to have logged in to the specified registry using 21 | * the `docker login` command. 22 | */ 23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY") 25 | -------------------------------------------------------------------------------- /sensor-data-java/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | /* 2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify 3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host) 4 | * that the Pipelines sbt plugin uses for pushing application Docker images. 5 | * 6 | * Example: 7 | * 8 | * pipelinesDockerRegistry := Some("foo.com") 9 | * pipelinesDockerRepository := Some("bar/baz") 10 | * 11 | * This will cause your application Docker images to be pushed as: 12 | * 13 | * `foo.com/bar/baz/[image name]:[tag]` 14 | * 15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.: 16 | * 17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com") 18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz") 19 | * 20 | * The sbt plugin expects you to have logged in to the specified registry using 21 | * the `docker login` command. 22 | */ 23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY") 25 | -------------------------------------------------------------------------------- /spark-sensors/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | /* 2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify 3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host) 4 | * that the Pipelines sbt plugin uses for pushing application Docker images. 5 | * 6 | * Example: 7 | * 8 | * pipelinesDockerRegistry := Some("foo.com") 9 | * pipelinesDockerRepository := Some("bar/baz") 10 | * 11 | * This will cause your application Docker images to be pushed as: 12 | * 13 | * `foo.com/bar/baz/[image name]:[tag]` 14 | * 15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.: 16 | * 17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com") 18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz") 19 | * 20 | * The sbt plugin expects you to have logged in to the specified registry using 21 | * the `docker login` command. 22 | */ 23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY") 25 | -------------------------------------------------------------------------------- /call-record-aggregator/spark-aggregation/src/main/scala/pipelines/examples/carly/aggregator/CallAggregatorConsoleEgress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.aggregator 2 | 3 | import pipelines.streamlets._ 4 | import pipelines.streamlets.avro._ 5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } 6 | import pipelines.spark.sql.SQLImplicits._ 7 | import org.apache.spark.sql.streaming.OutputMode 8 | 9 | import org.apache.log4j.{ Level, Logger } 10 | 11 | import pipelines.examples.carly.data._ 12 | 13 | class CallAggregatorConsoleEgress extends SparkStreamlet { 14 | 15 | val rootLogger = Logger.getRootLogger() 16 | rootLogger.setLevel(Level.ERROR) 17 | 18 | val in = AvroInlet[AggregatedCallStats]("in") 19 | val shape = StreamletShape(in) 20 | 21 | override def createLogic = new SparkStreamletLogic { 22 | override def buildStreamingQueries = { 23 | readStream(in).writeStream 24 | .format("console") 25 | .outputMode(OutputMode.Append()) 26 | .start() 27 | .toQueryExecution 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /sensor-data-scala/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | /* 2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify 3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host) 4 | * that the Pipelines sbt plugin uses for pushing application Docker images. 5 | * 6 | * Example: 7 | * 8 | * pipelinesDockerRegistry := Some("foo.com") 9 | * pipelinesDockerRepository := Some("bar/baz") 10 | * 11 | * This will cause your application Docker images to be pushed as: 12 | * 13 | * `foo.com/bar/baz/[image name]:[tag]` 14 | * 15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.: 16 | * 17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com") 18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz") 19 | * 20 | * The sbt plugin expects you to have logged in to the specified registry using 21 | * the `docker login` command. 22 | */ 23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY") 25 | -------------------------------------------------------------------------------- /spark-resilience-test/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | /* 2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify 3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host) 4 | * that the Pipelines sbt plugin uses for pushing application Docker images. 5 | * 6 | * Example: 7 | * 8 | * pipelinesDockerRegistry := Some("foo.com") 9 | * pipelinesDockerRepository := Some("bar/baz") 10 | * 11 | * This will cause your application Docker images to be pushed as: 12 | * 13 | * `foo.com/bar/baz/[image name]:[tag]` 14 | * 15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.: 16 | * 17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com") 18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz") 19 | * 20 | * The sbt plugin expects you to have logged in to the specified registry using 21 | * the `docker login` command. 22 | */ 23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY") 25 | -------------------------------------------------------------------------------- /warez/blueprint/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | System.out 18 | 19 | %d{ISO8601} %-5level [%logger{0}] - %msg%n 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /call-record-aggregator/target-env.sbt.example: -------------------------------------------------------------------------------- 1 | /* 2 | * The `pipelinesDockerRegistry` and `pipelinesDockerRepository` settings specify 3 | * the Docker registry (e.g. hostname) and repository (e.g. path on that host) 4 | * that the Pipelines sbt plugin uses for pushing application Docker images. 5 | * 6 | * Example: 7 | * 8 | * pipelinesDockerRegistry := Some("foo.com") 9 | * pipelinesDockerRepository := Some("bar/baz") 10 | * 11 | * This will cause your application Docker images to be pushed as: 12 | * 13 | * `foo.com/bar/baz/[image name]:[tag]` 14 | * 15 | * In multi-project SBT setups, please prefix both values with `ThisBuild / `, e.g.: 16 | * 17 | * ThisBuild / pipelinesDockerRegistry := Some("foo.com") 18 | * ThisBuild / pipelinesDockerRepository := Some("bar/baz") 19 | * 20 | * The sbt plugin expects you to have logged in to the specified registry using 21 | * the `docker login` command. 22 | */ 23 | ThisBuild / pipelinesDockerRegistry := Some("YOUR_DOCKER_REGISTRY") 24 | ThisBuild / pipelinesDockerRepository := Some("YOUR_DOCKER_REPOSITORY") 25 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | System.out 18 | 19 | %d{ISO8601} %-5level [%logger{0}] - %msg%n 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /call-record-aggregator/call-record-pipeline/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | cdr-generator1 = pipelines.examples.carly.aggregator.CallRecordGeneratorIngress 4 | cdr-generator2 = pipelines.examples.carly.aggregator.CallRecordGeneratorIngress 5 | merge = pipelines.examples.carly.ingestor.CallRecordMerge 6 | cdr-ingress = pipelines.examples.carly.ingestor.CallRecordIngress 7 | cdr-validator = pipelines.examples.carly.ingestor.CallRecordValidation 8 | cdr-aggregator = pipelines.examples.carly.aggregator.CallStatsAggregator 9 | console-egress = pipelines.examples.carly.output.AggregateRecordEgress 10 | error-egress = pipelines.examples.carly.output.InvalidRecordEgress 11 | 12 | } 13 | connections { 14 | cdr-generator1.out = [merge.in-0] 15 | cdr-generator2.out = [merge.in-1] 16 | cdr-ingress.out = [merge.in-2] 17 | merge.out = [cdr-validator.in] 18 | cdr-validator.valid = [cdr-aggregator.in] 19 | cdr-aggregator.out = [console-egress.in] 20 | cdr-validator.invalid = [error-egress.in] 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /call-record-aggregator/call-record-pipeline/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | System.out 18 | 19 | %d{ISO8601} %-5level [%logger{0}] - %msg%n 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /flink-taxi-ride/datamodel/src/main/avro/taxiride.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "pipelines.flink.avro", 3 | "type": "record", 4 | "name": "TaxiRide", 5 | "fields":[ 6 | { 7 | "name": "rideId", "type": "long" 8 | }, 9 | { 10 | "name": "isStart", "type": "boolean" 11 | }, 12 | { 13 | "name": "taxiId", "type": "long" 14 | }, 15 | { 16 | "name": "passengerCnt", "type": "int" 17 | }, 18 | { 19 | "name": "driverId", "type": "long" 20 | }, 21 | { 22 | "name": "startLon", "type": "float" 23 | }, 24 | { 25 | "name": "startLat", "type": "float" 26 | }, 27 | { 28 | "name": "endLon", "type": "float" 29 | }, 30 | { 31 | "name": "endLat", "type": "float" 32 | }, 33 | { 34 | "name": "startTime", "type": "long" 35 | }, 36 | { 37 | "name": "endTime", "type": "long" 38 | } 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | http-ingress = pipelines.examples.sensordata.SensorDataHttpIngress 4 | file-ingress = pipelines.examples.sensordata.SensorDataFileIngress 5 | merge = pipelines.examples.sensordata.SensorDataMerge 6 | metrics = pipelines.examples.sensordata.SensorDataToMetrics 7 | validation = pipelines.examples.sensordata.MetricsValidation 8 | valid-logger = pipelines.examples.sensordata.ValidMetricLogger 9 | invalid-logger = pipelines.examples.sensordata.InvalidMetricLogger 10 | rotorizer = pipelines.examples.sensordata.RotorSpeedFilter 11 | rotor-avg-logger = pipelines.examples.sensordata.RotorspeedWindowLogger 12 | } 13 | 14 | connections { 15 | http-ingress.out = [merge.in-0] 16 | file-ingress.out = [merge.in-1] 17 | merge.out = [metrics.in] 18 | metrics.out = [validation.in] 19 | validation.invalid = [invalid-logger.in] 20 | validation.valid = [ 21 | valid-logger.in, 22 | rotorizer.in 23 | ] 24 | rotorizer.out = [rotor-avg-logger.in] 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /spark-resilience-test/src/test/scala/pipelines/example/SparkSequenceValidatorEgressTest.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import scala.collection.immutable.Seq 4 | import scala.concurrent.duration._ 5 | 6 | import pipelines.spark.testkit._ 7 | import pipelines.spark.sql.SQLImplicits._ 8 | 9 | class SparkSequenceValidatorEgressTest extends SparkScalaTestSupport { 10 | 11 | val streamlet = new SparkSequenceValidatorEgress() 12 | val testKit = SparkStreamletTestkit(session) 13 | 14 | "SparkSequenceValidatorEgress" should { 15 | "output streaming data" in { 16 | 17 | // Setup inlet tap on inlet(s) port(s) 18 | val in: SparkInletTap[Data] = testKit.inletAsTap[Data](streamlet.in) 19 | 20 | // Build data and send to inlet tap 21 | val now = System.currentTimeMillis() 22 | val data = (0 until SequenceSettings.GroupSize).map(i ⇒ Data(now + i * 1000, 1, i.toLong)) ++ 23 | (0 until SequenceSettings.GroupSize - 1).map(i ⇒ Data(now + i * 1000, 2, i.toLong)) 24 | in.addData(data) 25 | 26 | testKit.run(streamlet, Seq(in), Seq.empty, 10.seconds) 27 | 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /call-record-aggregator/spark-aggregation/src/test/scala/pipelines/examples/carly/aggregator/CallRecordGeneratorIngressSpec.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.aggregator 2 | 3 | import scala.collection.immutable.Seq 4 | import scala.concurrent.duration._ 5 | 6 | import pipelines.examples.carly.data._ 7 | 8 | import pipelines.spark.testkit._ 9 | import pipelines.spark.sql.SQLImplicits._ 10 | 11 | class CallRecordGeneratorIngressSpec extends SparkScalaTestSupport { 12 | 13 | val streamlet = new CallRecordGeneratorIngress() 14 | val testKit = SparkStreamletTestkit(session).withConfigParameterValues(ConfigParameterValue(streamlet.RecordsPerSecond, "50")) 15 | 16 | "CallRecordGeneratorIngress" should { 17 | "produce elements to its outlet" in { 18 | 19 | // setup outlet tap on outlet port 20 | val out = testKit.outletAsTap[CallRecord](streamlet.out) 21 | 22 | testKit.run(streamlet, Seq.empty, Seq(out), 40.seconds) 23 | 24 | // get data from outlet tap 25 | val results = out.asCollection(session) 26 | 27 | // assert 28 | results.size must be > 0 29 | 30 | } 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataToMetrics.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl._ 5 | import pipelines.streamlets.{ RoundRobinPartitioner, StreamletShape } 6 | import pipelines.streamlets.avro._ 7 | 8 | class SensorDataToMetrics extends AkkaStreamlet { 9 | val in = AvroInlet[SensorData]("in") 10 | val out = AvroOutlet[Metric]("out").withPartitioner(RoundRobinPartitioner) 11 | val shape = StreamletShape(in, out) 12 | def flow = { 13 | FlowWithOffsetContext[SensorData] 14 | .mapConcat { data ⇒ 15 | List( 16 | Metric(data.deviceId, data.timestamp, "power", data.measurements.power), 17 | Metric(data.deviceId, data.timestamp, "rotorSpeed", data.measurements.rotorSpeed), 18 | Metric(data.deviceId, data.timestamp, "windSpeed", data.measurements.windSpeed) 19 | ) 20 | } 21 | } 22 | override def createLogic = new RunnableGraphStreamletLogic() { 23 | def runnableGraph = sourceWithOffsetContext(in).via(flow).to(sinkWithOffsetContext(out)) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-cdr-ingestor/src/main/scala/pipelines/examples/carly/ingestor/CallRecordValidation.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.ingestor 2 | 3 | import pipelines.streamlets.avro._ 4 | import pipelines.streamlets.StreamletShape 5 | import pipelines.akkastream.AkkaStreamlet 6 | import pipelines.akkastream.util.scaladsl.SplitterLogic 7 | 8 | import pipelines.examples.carly.data._ 9 | 10 | class CallRecordValidation extends AkkaStreamlet { 11 | 12 | private val oldDataWatermark = java.sql.Timestamp.valueOf("2010-01-01 00:00:00.000").getTime / 1000 //seconds 13 | 14 | val in = AvroInlet[CallRecord]("in") 15 | val left = AvroOutlet[InvalidRecord]("invalid", _.record) 16 | val right = AvroOutlet[CallRecord]("valid", _.user) 17 | 18 | final override val shape = StreamletShape(in).withOutlets(left, right) 19 | final override def createLogic = new SplitterLogic(in, left, right) { 20 | def flow = 21 | flowWithOffsetContext() 22 | .map { record ⇒ 23 | if (record.timestamp < oldDataWatermark) Left(InvalidRecord(record.toString, "Timestamp outside range!")) 24 | else Right(record) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /spark-resilience-test/src/test/scala/pipelines/example/DataGroupTest.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import org.scalatest.{ Matchers, WordSpec } 4 | 5 | class DataGroupTest extends WordSpec with Matchers { 6 | 7 | "DataGroup" should { 8 | val groupSize = 20 9 | // simulate the behavior of the data producer 10 | val data = (0 to groupSize * 10) 11 | .map(i ⇒ (i.toLong / groupSize, i.toLong)) 12 | .groupBy { case (k, _) ⇒ k } 13 | .map { case (k, seqKV) ⇒ (k, seqKV.map { case (_, v) ⇒ v }) } 14 | 15 | "report completed when it has received all data" in { 16 | val dataGroup = DataGroup(3, groupSize, data(3)) 17 | assert(dataGroup.isComplete, "dataGroup should be complete with the data sample") 18 | } 19 | 20 | "report missing elements when it doesn't have all data for its group" in { 21 | val dataSubset = data(5).drop(3) 22 | val dataGroup = DataGroup(5, groupSize, dataSubset) 23 | assert(!dataGroup.isComplete, "dataGroup should be incomplete") 24 | dataGroup.missing should be(data(5).take(3).toSet) 25 | dataGroup.missingReport should be("(100,102)") 26 | } 27 | 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataStreamingIngress.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata; 2 | 3 | import akka.http.javadsl.common.EntityStreamingSupport; 4 | import akka.http.javadsl.marshallers.jackson.Jackson; 5 | 6 | import pipelines.akkastream.AkkaServerStreamlet; 7 | 8 | import pipelines.akkastream.util.javadsl.HttpServerLogic; 9 | import pipelines.akkastream.StreamletLogic; 10 | import pipelines.streamlets.RoundRobinPartitioner; 11 | import pipelines.streamlets.StreamletShape; 12 | import pipelines.streamlets.avro.AvroOutlet; 13 | 14 | public class SensorDataStreamingIngress extends AkkaServerStreamlet { 15 | 16 | AvroOutlet out = AvroOutlet.create("out", SensorData.class) 17 | .withPartitioner(RoundRobinPartitioner.getInstance()); 18 | 19 | public StreamletShape shape() { 20 | return StreamletShape.createWithOutlets(out); 21 | } 22 | 23 | public StreamletLogic createLogic() { 24 | EntityStreamingSupport ess = EntityStreamingSupport.json(); 25 | return HttpServerLogic.createDefaultStreaming(this, out, Jackson.byteStringUnmarshaller(SensorData.class), ess, getStreamletContext()); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /sensor-data-scala/load-data-into-pvc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script loads the necessary files into the PVC that is mounted by the file based ingress 4 | # For this to work the application has to be deployed and all pods need to have entered `running` state 5 | 6 | streamletName="sensor-data-file-ingress" 7 | podName=$(kubectl get pods -n sensor-data-scala -l com.lightbend.pipelines/streamlet-name=$streamletName --output jsonpath={.items..metadata.name}) 8 | if [ $? -ne 0 ]; then 9 | echo "Could not find the streamlet `$streamletName` which contains the mounted PVC this script will copy test files into." 10 | echo "Make sure that the application has been deployed and all pods are running." 11 | exit 1 12 | fi 13 | 14 | echo "Copying files to /mnt/data in pod $podName" 15 | kubectl cp test-data/04-moderate-breeze.json -n sensor-data-scala $podName:/mnt/data 16 | kubectl cp test-data/10-storm.json -n sensor-data-scala $podName:/mnt/data 17 | kubectl cp test-data/11-violent-storm.json -n sensor-data-scala $podName:/mnt/data 18 | kubectl cp test-data/12-hurricane.json -n sensor-data-scala $podName:/mnt/data 19 | kubectl cp test-data/invalid-metric.json -n sensor-data-scala $podName:/mnt/data 20 | 21 | echo "Done" 22 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/blueprint/blueprint.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | ingress = pipelines.example.SparkRandomGenDataIngress 4 | 5 | spark-process1 = pipelines.example.IdentitySparkProcessor1 6 | spark-process2 = pipelines.example.IdentitySparkProcessor2 7 | spark-process-a100 = pipelines.example.IdentitySparkProcessor0 8 | spark-process-b100 = pipelines.example.IdentitySparkProcessor0 9 | 10 | akka-process-a100 = pipelines.example.IdentityAkkaStreamsProcessor0 11 | akka-process-b100 = pipelines.example.IdentityAkkaStreamsProcessor0 12 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1 13 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2 14 | 15 | egress = pipelines.example.SparkConsoleEgress 16 | } 17 | connections { 18 | ingress.out = [spark-process1.in, akka-process1.in] 19 | spark-process1.out = [spark-process-a100.in] 20 | akka-process1.out = [akka-process-a100.in] 21 | 22 | spark-process-a100.out = [spark-process-b100.in] 23 | akka-process-a100.out = [akka-process-b100.in] 24 | 25 | spark-process-b100.out = [spark-process2.in] 26 | akka-process-b100.out = [akka-process2.in] 27 | 28 | spark-process2.out = [egress.in1] 29 | akka-process2.out = [egress.in2] 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/blueprint/parallel-100ms-delay.conf: -------------------------------------------------------------------------------- 1 | blueprint { 2 | streamlets { 3 | ingress = pipelines.example.SparkRandomGenDataIngress 4 | 5 | spark-process1 = pipelines.example.IdentitySparkProcessor1 6 | spark-process2 = pipelines.example.IdentitySparkProcessor2 7 | spark-process-a100 = pipelines.example.IdentitySparkProcessor0 8 | spark-process-b100 = pipelines.example.IdentitySparkProcessor0 9 | 10 | akka-process-a100 = pipelines.example.IdentityAkkaStreamsProcessor0 11 | akka-process-b100 = pipelines.example.IdentityAkkaStreamsProcessor0 12 | akka-process1 = pipelines.example.IdentityAkkaStreamsProcessor1 13 | akka-process2 = pipelines.example.IdentityAkkaStreamsProcessor2 14 | 15 | egress = pipelines.example.SparkConsoleEgress 16 | } 17 | connections { 18 | ingress.out = [spark-process1.in, akka-process1.in] 19 | spark-process1.out = [spark-process-a100.in] 20 | akka-process1.out = [akka-process-a100.in] 21 | 22 | spark-process-a100.out = [spark-process-b100.in] 23 | akka-process-a100.out = [akka-process-b100.in] 24 | 25 | spark-process-b100.out = [spark-process2.in] 26 | akka-process-b100.out = [akka-process2.in] 27 | 28 | spark-process2.out = [egress.in1] 29 | akka-process2.out = [egress.in2] 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /warez/datamodel/src/main/avro/Product.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "warez", 3 | 4 | "type": "record", 5 | "name": "Product", 6 | 7 | "fields": [ 8 | { 9 | "name": "id", 10 | "type": "string" 11 | }, 12 | { 13 | "name": "name", 14 | "type": "string" 15 | }, 16 | { 17 | "name": "description", 18 | "type": "string" 19 | }, 20 | { 21 | "name": "keywords", 22 | "type": { 23 | "type": "array", 24 | "items": "string" 25 | } 26 | }, 27 | { 28 | "name": "skus", 29 | "type": { 30 | "type": "array", 31 | "items": { 32 | "type": "record", 33 | "name": "Sku", 34 | 35 | "fields": [ 36 | { 37 | "name": "id", 38 | "type": "string" 39 | }, 40 | { 41 | "name": "name", 42 | "type": "string" 43 | }, 44 | { 45 | "name": "stock", 46 | "type": ["null", "int"], 47 | "default": null 48 | }, 49 | { 50 | "name": "price", 51 | "type": ["null", "int"], 52 | "default": null 53 | } 54 | ] 55 | } 56 | } 57 | } 58 | ] 59 | } 60 | 61 | -------------------------------------------------------------------------------- /spark-resilience-test/src/test/scala/pipelines/example/SparkSequenceGeneratorIngressTest.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import scala.collection.immutable.Seq 4 | import scala.concurrent.duration._ 5 | 6 | import pipelines.spark.testkit._ 7 | import pipelines.spark.sql.SQLImplicits._ 8 | 9 | class SparkSequenceGeneratorIngressTest extends SparkScalaTestSupport { 10 | 11 | val streamlet = new SparkSequenceGeneratorIngress() 12 | val testKit = SparkStreamletTestkit(session).withConfigParameterValues(ConfigParameterValue(streamlet.RecordsPerSecond, "50")) 13 | 14 | "SparkSequenceGeneratorIngress" should { 15 | "produce data " in { 16 | 17 | // setup inlet tap on inlet(s) port(s) 18 | val out: SparkOutletTap[Data] = testKit.outletAsTap[Data](streamlet.out) 19 | 20 | // Run the streamlet using the testkit and the setup inlet taps and outlet probes 21 | testKit.run(streamlet, Seq.empty, Seq(out), 10.seconds) 22 | 23 | // get data from outlet tap 24 | val results = out.asCollection(session) 25 | val ordered = results.map(data ⇒ data.value).sorted 26 | ordered.size mustBe >(SequenceSettings.RecordsPerSecond) // at least one second of data 27 | assert((ordered zip ordered.tail).forall { case (i, j) ⇒ j == (i + 1) }, "produced list missed elements") 28 | 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/dsl/HttpServer.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | package dsl 3 | 4 | import scala.util._ 5 | 6 | import akka.actor._ 7 | import akka.http.scaladsl._ 8 | import akka.http.scaladsl.model._ 9 | import akka.stream._ 10 | import akka.stream.scaladsl._ 11 | 12 | import pipelines.streamlets.Dun 13 | import pipelines.akkastream._ 14 | 15 | trait HttpServer { 16 | def startServer( 17 | context: AkkaStreamletContext, 18 | handler: Flow[HttpRequest, HttpResponse, _], 19 | port: Int 20 | )(implicit system: ActorSystem, mat: Materializer): Unit = { 21 | import system.dispatcher 22 | Http() 23 | .bindAndHandle(handler, "0.0.0.0", port) 24 | .map { binding ⇒ 25 | context.signalReady() 26 | system.log.info(s"Bound to ${binding.localAddress.getHostName}:${binding.localAddress.getPort}") 27 | // this only completes when StreamletRef executes cleanup. 28 | context.onStop { () ⇒ 29 | system.log.info(s"Unbinding from ${binding.localAddress.getHostName}:${binding.localAddress.getPort}") 30 | binding.unbind().map(_ ⇒ Dun) 31 | } 32 | binding 33 | } 34 | .andThen { 35 | case Failure(cause) ⇒ 36 | system.log.error(cause, s"Failed to bind to $port.") 37 | context.stop() 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-java-aggregation-output/src/main/java/pipelines/examples/carly/output/AggregateRecordEgress.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.output; 2 | 3 | import akka.NotUsed; 4 | import akka.kafka.ConsumerMessage.CommittableOffset; 5 | import akka.stream.javadsl.*; 6 | import pipelines.streamlets.*; 7 | import pipelines.streamlets.avro.*; 8 | import pipelines.akkastream.*; 9 | import pipelines.akkastream.javadsl.*; 10 | 11 | import pipelines.examples.carly.data.*; 12 | 13 | 14 | public class AggregateRecordEgress extends AkkaStreamlet { 15 | public AvroInlet in = AvroInlet.create("in", AggregatedCallStats.class); 16 | 17 | @Override public StreamletShape shape() { 18 | return StreamletShape.createWithInlets(in); 19 | } 20 | 21 | @Override 22 | public StreamletLogic createLogic() { 23 | return new RunnableGraphStreamletLogic(getStreamletContext()) { 24 | @Override 25 | public RunnableGraph createRunnableGraph() { 26 | return getSourceWithOffsetContext(in) 27 | .via( 28 | FlowWithOffsetContext.create() 29 | .map(metric -> { 30 | System.out.println(metric); 31 | return metric; 32 | }) 33 | ) 34 | .to(getSinkWithOffsetContext()); 35 | } 36 | }; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-resilience-test/src/main/scala/pipelines/example/SuicidalMonkeyProcessor.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import org.apache.spark.sql.Dataset 4 | import org.apache.spark.sql.streaming.OutputMode 5 | 6 | import pipelines.streamlets.StreamletShape 7 | import pipelines.streamlets.avro._ 8 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet } 9 | import pipelines.spark.sql.SQLImplicits._ 10 | 11 | class SuicidalMonkeyProcessor extends SparkStreamlet { 12 | val in = AvroInlet[Data]("in") 13 | val out = AvroOutlet[Data]("out", _.key.toString) 14 | val shape = StreamletShape(in, out) 15 | 16 | val rng = scala.util.Random 17 | override def createLogic() = new SparkStreamletLogic { 18 | override def buildStreamingQueries = { 19 | val outStream = process(readStream(in)) 20 | writeStream(outStream, out, OutputMode.Append).toQueryExecution 21 | } 22 | 23 | private def process(inDataset: Dataset[Data]): Dataset[Data] = { 24 | inDataset.mapPartitions { iter ⇒ 25 | // monkey business 26 | // The logic in this processor causes the current executor to crash with a certain probability. 27 | // comment out to see the process working 28 | if (rng.nextDouble() < SequenceSettings.FailureProbability) { 29 | sys.exit(-1) 30 | } 31 | iter 32 | } 33 | 34 | } 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /spark-sensors/src/main/scala/pipelines/example/MovingAverageSparklet.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import pipelines.streamlets.StreamletShape 4 | 5 | import pipelines.streamlets.avro._ 6 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet } 7 | 8 | import org.apache.spark.sql.Dataset 9 | import org.apache.spark.sql.functions._ 10 | import org.apache.spark.sql.types.TimestampType 11 | import pipelines.spark.sql.SQLImplicits._ 12 | import org.apache.spark.sql.streaming.OutputMode 13 | 14 | class MovingAverageSparklet extends SparkStreamlet { 15 | 16 | val in = AvroInlet[Data]("in") 17 | val out = AvroOutlet[Agg]("out", _.src) 18 | val shape = StreamletShape(in, out) 19 | 20 | override def createLogic() = new SparkStreamletLogic { 21 | override def buildStreamingQueries = { 22 | val dataset = readStream(in) 23 | val outStream = process(dataset) 24 | writeStream(outStream, out, OutputMode.Append).toQueryExecution 25 | } 26 | 27 | private def process(inDataset: Dataset[Data]): Dataset[Agg] = { 28 | val query = inDataset 29 | .withColumn("ts", $"timestamp".cast(TimestampType)) 30 | .withWatermark("ts", "1 minutes") 31 | .groupBy(window($"ts", "1 minute", "30 seconds"), $"src", $"gauge").agg(avg($"value") as "avg") 32 | query.select($"src", $"gauge", $"avg" as "value").as[Agg] 33 | } 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /warez/data/README.md: -------------------------------------------------------------------------------- 1 | # Test data generators 2 | 3 | ## Generated data 4 | Generates formatted Json documents to use as test data for the warez test application. 5 | 6 | The generated data is self-consistent: 7 | * The UUIDs for the products are chosen from `data/uuids.txt`. 8 | * The UUIDs for the SKUs are generated by replacing the 2 last charaters of the product UUID it is associated to with `aa` or `bb`. 9 | * The base list of UUID has been checked to ensure that all SKU's UUIDs are unique. 10 | * The keywords for the products are chosen from `data/keywords.txt`. 11 | 12 | Additional information: 13 | * Prices in the generated PriceUpdates are between 1 and 1999. 14 | * Stock diff in the generated StockUpdates are between -10 and 10. 15 | * Product names and description are generated by combining 5 letters words chosen from `data/5-letters-words.txt`. 16 | 17 | ## Usage 18 | 19 | The scripts can be associated to `curl` to send data to the application ingresses. 20 | 21 | ### Products 22 | 23 | ```bash 24 | curl --data "$(./generate-product.sh)" http://appname.apps.clustername.lightbend.com/products 25 | ``` 26 | 27 | ### Price updates 28 | 29 | ```bash 30 | curl --data "$(./generate-price-update.sh)" http://appname.apps.clustername.lightbend.com/price-updates 31 | ``` 32 | 33 | ### Stock updates 34 | 35 | ```bash 36 | curl --data "$(./generate-stock-update.sh)" http://appname.apps.clustername.lightbend.com/stock-updates 37 | ``` -------------------------------------------------------------------------------- /spark-resilience-test/src/main/scala/pipelines/example/SparkSequenceGeneratorIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import org.apache.spark.sql.Dataset 4 | import org.apache.spark.sql.types.LongType 5 | import org.apache.spark.sql.streaming.OutputMode 6 | 7 | import pipelines.streamlets._ 8 | import pipelines.streamlets.StreamletShape 9 | import pipelines.streamlets.avro._ 10 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet } 11 | import pipelines.spark.sql.SQLImplicits._ 12 | 13 | class SparkSequenceGeneratorIngress extends SparkStreamlet { 14 | val out = AvroOutlet[Data]("out", d ⇒ d.key.toString) 15 | val shape = StreamletShape(out) 16 | 17 | val RecordsPerSecond = IntegerConfigParameter( 18 | "records-per-second", 19 | "Records per second to process.", 20 | Some(50)) 21 | 22 | override def configParameters = Vector(RecordsPerSecond) 23 | 24 | override def createLogic() = new SparkStreamletLogic { 25 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key) 26 | 27 | override def buildStreamingQueries = { 28 | writeStream(process, out, OutputMode.Append).toQueryExecution 29 | } 30 | 31 | private def process: Dataset[Data] = { 32 | session.readStream 33 | .format("rate") 34 | .option("rowsPerSecond", recordsPerSecond) 35 | .load() 36 | .withColumn("key", ($"value" / SequenceSettings.GroupSize).cast(LongType)) 37 | .as[Data] 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-java-aggregation-output/src/main/java/pipelines/examples/carly/output/InvalidRecordEgress.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.output; 2 | 3 | import akka.NotUsed; 4 | import akka.kafka.ConsumerMessage.CommittableOffset; 5 | import akka.stream.javadsl.*; 6 | 7 | import pipelines.streamlets.*; 8 | import pipelines.streamlets.avro.*; 9 | import pipelines.akkastream.*; 10 | import pipelines.akkastream.javadsl.*; 11 | import pipelines.examples.carly.data.*; 12 | 13 | public class InvalidRecordEgress extends AkkaStreamlet { 14 | public AvroInlet in = AvroInlet.create("in", InvalidRecord.class); 15 | 16 | private Object doPrint(final InvalidRecord record) { 17 | System.out.println(record); 18 | return record; 19 | } 20 | 21 | @Override public StreamletShape shape() { 22 | return StreamletShape.createWithInlets(in); 23 | } 24 | 25 | @Override 26 | public StreamletLogic createLogic() { 27 | return new RunnableGraphStreamletLogic(getStreamletContext()) { 28 | @Override 29 | public RunnableGraph createRunnableGraph() { 30 | return getSourceWithOffsetContext(in) 31 | .via(flowWithContext()) 32 | .to(getSinkWithOffsetContext()); 33 | } 34 | }; 35 | } 36 | 37 | private FlowWithContext flowWithContext() { 38 | return FlowWithOffsetContext.create().map(metric -> doPrint(metric)); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/JsonFormats.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import java.time.Instant 4 | import java.util.UUID 5 | 6 | import scala.util.Try 7 | 8 | import spray.json._ 9 | 10 | trait UUIDJsonSupport extends DefaultJsonProtocol { 11 | implicit object UUIDFormat extends JsonFormat[UUID] { 12 | def write(uuid: UUID) = JsString(uuid.toString) 13 | 14 | def read(json: JsValue): UUID = json match { 15 | case JsString(uuid) ⇒ Try(UUID.fromString(uuid)).getOrElse(deserializationError(s"Expected valid UUID but got '$uuid'.")) 16 | case other ⇒ deserializationError(s"Expected UUID as JsString, but got: $other") 17 | } 18 | } 19 | } 20 | 21 | trait InstantJsonSupport extends DefaultJsonProtocol { 22 | implicit object InstantFormat extends JsonFormat[Instant] { 23 | def write(instant: Instant) = JsNumber(instant.toEpochMilli) 24 | 25 | def read(json: JsValue): Instant = json match { 26 | case JsNumber(value) ⇒ Instant.ofEpochMilli(value.toLong) 27 | case other ⇒ deserializationError(s"Expected Instant as JsNumber, but got: $other") 28 | } 29 | } 30 | } 31 | 32 | object MeasurementsJsonSupport extends DefaultJsonProtocol { 33 | implicit val measurementFormat = jsonFormat3(Measurements.apply) 34 | } 35 | 36 | object SensorDataJsonSupport extends DefaultJsonProtocol with UUIDJsonSupport with InstantJsonSupport { 37 | import MeasurementsJsonSupport._ 38 | implicit val sensorDataFormat = jsonFormat3(SensorData.apply) 39 | } 40 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/ValidMetricLogger.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import pipelines.akkastream._ 4 | import pipelines.akkastream.scaladsl._ 5 | import pipelines.streamlets._ 6 | import pipelines.streamlets.avro._ 7 | 8 | class ValidMetricLogger extends AkkaStreamlet { 9 | val inlet = AvroInlet[Metric]("in") 10 | val shape = StreamletShape.withInlets(inlet) 11 | 12 | val LogLevel = RegExpConfigParameter( 13 | "log-level", 14 | "Provide one of the following log levels, debug, info, warning or error", 15 | "^debug|info|warning|error$", 16 | Some("debug") 17 | ) 18 | 19 | val MsgPrefix = StringConfigParameter( 20 | "msg-prefix", 21 | "Provide a prefix for the log lines", 22 | Some("valid-logger")) 23 | 24 | override def configParameters = Vector(LogLevel, MsgPrefix) 25 | 26 | override def createLogic = new RunnableGraphStreamletLogic() { 27 | val logF: String ⇒ Unit = streamletConfig.getString(LogLevel.key).toLowerCase match { 28 | case "debug" ⇒ system.log.debug _ 29 | case "info" ⇒ system.log.info _ 30 | case "warning" ⇒ system.log.warning _ 31 | case "error" ⇒ system.log.error _ 32 | } 33 | 34 | val msgPrefix = streamletConfig.getString(MsgPrefix.key) 35 | 36 | def log(metric: Metric) = { 37 | logF(s"$msgPrefix $metric") 38 | } 39 | 40 | def flow = { 41 | FlowWithOffsetContext[Metric] 42 | .map { validMetric ⇒ 43 | log(validMetric) 44 | validMetric 45 | } 46 | } 47 | 48 | def runnableGraph = { 49 | sourceWithOffsetContext(inlet).via(flow).to(sinkWithOffsetContext) 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /spark-sensors/src/main/scala/pipelines/example/SparkRandomGenDataIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import java.sql.Timestamp 4 | 5 | import scala.util.Random 6 | 7 | import pipelines.streamlets.{ IntegerConfigParameter, StreamletShape } 8 | import pipelines.streamlets.avro._ 9 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } 10 | import org.apache.spark.sql.Dataset 11 | import org.apache.spark.sql.streaming.OutputMode 12 | 13 | import pipelines.spark.sql.SQLImplicits._ 14 | 15 | case class Rate(timestamp: Timestamp, value: Long) 16 | 17 | class SparkRandomGenDataIngress extends SparkStreamlet { 18 | val out = AvroOutlet[Data]("out", d ⇒ d.src) 19 | val shape = StreamletShape(out) 20 | 21 | val RecordsPerSecond = IntegerConfigParameter( 22 | "records-per-second", 23 | "Records per second to produce.", 24 | Some(50)) 25 | 26 | override def configParameters = Vector(RecordsPerSecond) 27 | 28 | override def createLogic() = new SparkStreamletLogic { 29 | 30 | override def buildStreamingQueries = { 31 | writeStream(process, out, OutputMode.Append).toQueryExecution 32 | } 33 | 34 | private def process: Dataset[Data] = { 35 | 36 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key) 37 | 38 | val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas" 39 | 40 | val rateStream = session.readStream 41 | .format("rate") 42 | .option("rowsPerSecond", recordsPerSecond) 43 | .load() 44 | .as[Rate] 45 | 46 | rateStream.map { 47 | case Rate(timestamp, value) ⇒ Data(s"src-${value % 100}", timestamp.getTime, gaugeGen(), Random.nextDouble() * value) 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /spark-sensors/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | 4 | import scalariform.formatter.preferences._ 5 | 6 | lazy val sparkSensors = (project in file(".")) 7 | .enablePlugins(PipelinesSparkApplicationPlugin) 8 | .settings( 9 | libraryDependencies ++= Seq( 10 | "ch.qos.logback" % "logback-classic" % "1.2.3", 11 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 12 | ), 13 | 14 | name := "spark-sensors", 15 | organization := "com.lightbend", 16 | 17 | scalaVersion := "2.12.10", 18 | crossScalaVersions := Vector(scalaVersion.value), 19 | scalacOptions ++= Seq( 20 | "-encoding", "UTF-8", 21 | "-target:jvm-1.8", 22 | "-Xlog-reflective-calls", 23 | "-Xlint", 24 | "-Ywarn-unused", 25 | "-Ywarn-unused-import", 26 | "-deprecation", 27 | "-feature", 28 | "-language:_", 29 | "-unchecked" 30 | ), 31 | 32 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 33 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 34 | 35 | scalariformPreferences := scalariformPreferences.value 36 | .setPreference(AlignParameters, false) 37 | .setPreference(AlignSingleLineCaseStatements, true) 38 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 39 | .setPreference(DoubleIndentConstructorArguments, true) 40 | .setPreference(DoubleIndentMethodDeclaration, true) 41 | .setPreference(RewriteArrowSymbols, true) 42 | .setPreference(DanglingCloseParenthesis, Preserve) 43 | .setPreference(NewlineAtEndOfFile, true) 44 | .setPreference(AllowParamGroupsOnNewlines, true) 45 | ) 46 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/java/pipelines/examples/sensordata/MetricsValidation.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata; 2 | 3 | import akka.stream.javadsl.*; 4 | 5 | import akka.NotUsed; 6 | import akka.actor.*; 7 | import akka.kafka.ConsumerMessage.CommittableOffset; 8 | import akka.stream.*; 9 | 10 | import com.typesafe.config.Config; 11 | 12 | import pipelines.streamlets.*; 13 | import pipelines.streamlets.avro.*; 14 | import pipelines.akkastream.*; 15 | import pipelines.akkastream.javadsl.util.Either; 16 | import pipelines.akkastream.util.javadsl.*; 17 | 18 | public class MetricsValidation extends AkkaStreamlet { 19 | AvroInlet inlet = AvroInlet.create("in", Metric.class); 20 | AvroOutlet invalidOutlet = AvroOutlet.create("invalid", m -> m.metric.toString(), InvalidMetric.class); 21 | AvroOutlet validOutlet = AvroOutlet.create("valid", m -> m.getDeviceId().toString() + m.getTimestamp().toString(), Metric.class); 22 | 23 | public StreamletShape shape() { 24 | return StreamletShape.createWithInlets(inlet).withOutlets(invalidOutlet, validOutlet); 25 | } 26 | 27 | public SplitterLogic createLogic() { 28 | return new SplitterLogic(inlet, invalidOutlet, validOutlet, getStreamletContext()) { 29 | public FlowWithContext, CommittableOffset, NotUsed> createFlow() { 30 | return createFlowWithOffsetContext() 31 | .map(metric -> { 32 | if (!SensorDataUtils.isValidMetric(metric)) return Either.left(new InvalidMetric(metric, "All measurements must be positive numbers!")); 33 | else return Either.right(metric); 34 | }); 35 | } 36 | }; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-resilience-test/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | 4 | import scalariform.formatter.preferences._ 5 | 6 | lazy val sparkSensors = (project in file(".")) 7 | .enablePlugins(PipelinesSparkApplicationPlugin) 8 | .settings( 9 | libraryDependencies ++= Seq( 10 | "ch.qos.logback" % "logback-classic" % "1.2.3", 11 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 12 | ), 13 | 14 | name := "spark-resilience-test", 15 | organization := "com.lightbend", 16 | 17 | scalaVersion := "2.12.10", 18 | crossScalaVersions := Vector(scalaVersion.value), 19 | scalacOptions ++= Seq( 20 | "-encoding", "UTF-8", 21 | "-target:jvm-1.8", 22 | "-Xlog-reflective-calls", 23 | "-Xlint", 24 | "-Ywarn-unused", 25 | "-Ywarn-unused-import", 26 | "-deprecation", 27 | "-feature", 28 | "-language:_", 29 | "-unchecked" 30 | ), 31 | 32 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 33 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 34 | 35 | scalariformPreferences := scalariformPreferences.value 36 | .setPreference(AlignParameters, false) 37 | .setPreference(AlignSingleLineCaseStatements, true) 38 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 39 | .setPreference(DoubleIndentConstructorArguments, true) 40 | .setPreference(DoubleIndentMethodDeclaration, true) 41 | .setPreference(RewriteArrowSymbols, true) 42 | .setPreference(DanglingCloseParenthesis, Preserve) 43 | .setPreference(NewlineAtEndOfFile, true) 44 | .setPreference(AllowParamGroupsOnNewlines, true) 45 | ) 46 | -------------------------------------------------------------------------------- /flink-taxi-ride/logger/src/main/scala/pipelines/examples/logger/FarePerRideLogger.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples 2 | package logger 3 | 4 | import pipelines.akkastream._ 5 | import pipelines.akkastream.scaladsl._ 6 | import pipelines.streamlets._ 7 | import pipelines.streamlets.avro._ 8 | import pipelines.flink.avro._ 9 | 10 | class FarePerRideLogger extends AkkaStreamlet { 11 | val inlet = AvroInlet[TaxiRideFare]("in") 12 | val shape = StreamletShape.withInlets(inlet) 13 | 14 | val LogLevel = RegExpConfigParameter( 15 | "log-level", 16 | "Provide one of the following log levels, debug, info, warning or error", 17 | "^debug|info|warning|error$", 18 | Some("info") 19 | ) 20 | 21 | val MsgPrefix = StringConfigParameter( 22 | "msg-prefix", 23 | "Provide a prefix for the log lines", 24 | Some("valid-logger")) 25 | 26 | override def configParameters = Vector(LogLevel, MsgPrefix) 27 | 28 | override def createLogic = new RunnableGraphStreamletLogic() { 29 | val logF: String ⇒ Unit = streamletConfig.getString(LogLevel.key).toLowerCase match { 30 | case "debug" ⇒ system.log.debug _ 31 | case "info" ⇒ system.log.info _ 32 | case "warning" ⇒ system.log.warning _ 33 | case "error" ⇒ system.log.error _ 34 | } 35 | 36 | val msgPrefix = streamletConfig.getString(MsgPrefix.key) 37 | 38 | def log(rideFare: TaxiRideFare) = { 39 | logF(s"$msgPrefix $rideFare") 40 | } 41 | 42 | def flow = { 43 | FlowWithOffsetContext[TaxiRideFare] 44 | .map { taxiRideFare ⇒ 45 | log(taxiRideFare) 46 | taxiRideFare 47 | } 48 | } 49 | 50 | def runnableGraph = 51 | sourceWithOffsetContext(inlet) 52 | .via(flow) 53 | .to(sinkWithOffsetContext) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /sensor-data-java/src/main/java/pipelines/examples/sensordata/SensorDataToMetrics.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata; 2 | 3 | import java.util.Arrays; 4 | 5 | import akka.stream.javadsl.*; 6 | import akka.kafka.ConsumerMessage.CommittableOffset; 7 | 8 | import akka.NotUsed; 9 | 10 | import pipelines.streamlets.*; 11 | import pipelines.streamlets.avro.*; 12 | import pipelines.akkastream.*; 13 | import pipelines.akkastream.javadsl.*; 14 | 15 | public class SensorDataToMetrics extends AkkaStreamlet { 16 | AvroInlet in = AvroInlet.create("in", SensorData.class); 17 | AvroOutlet out = AvroOutlet.create("out", Metric.class) 18 | .withPartitioner(RoundRobinPartitioner.getInstance()); 19 | 20 | public StreamletShape shape() { 21 | return StreamletShape.createWithInlets(in).withOutlets(out); 22 | } 23 | 24 | private FlowWithContext flowWithContext() { 25 | return FlowWithOffsetContext.create() 26 | .mapConcat(data -> 27 | Arrays.asList( 28 | new Metric(data.getDeviceId(), data.getTimestamp(), "power", data.getMeasurements().getPower()), 29 | new Metric(data.getDeviceId(), data.getTimestamp(), "rotorSpeed", data.getMeasurements().getRotorSpeed()), 30 | new Metric(data.getDeviceId(), data.getTimestamp(), "windSpeed", data.getMeasurements().getWindSpeed()) 31 | ) 32 | ); 33 | } 34 | 35 | public StreamletLogic createLogic() { 36 | return new RunnableGraphStreamletLogic(getStreamletContext()) { 37 | public RunnableGraph createRunnableGraph() { 38 | return getSourceWithOffsetContext(in).via(flowWithContext()).to(getSinkWithOffsetContext(out)); 39 | } 40 | }; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /call-record-aggregator/spark-aggregation/src/test/scala/pipelines/examples/carly/aggregator/CallStatsAggregatorSpec.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.aggregator 2 | 3 | import java.time.Instant 4 | import java.time.temporal.ChronoUnit 5 | 6 | import scala.concurrent.duration._ 7 | 8 | import scala.util.Random 9 | 10 | import pipelines.examples.carly.data._ 11 | 12 | import pipelines.spark.testkit._ 13 | import pipelines.spark.sql.SQLImplicits._ 14 | 15 | class CallStatsAggregatorSpec extends SparkScalaTestSupport { 16 | 17 | val streamlet = new CallStatsAggregator() 18 | val testKit = SparkStreamletTestkit(session).withConfigParameterValues( 19 | ConfigParameterValue(streamlet.GroupByWindow, "1 minute"), 20 | ConfigParameterValue(streamlet.Watermark, "1 minute")) 21 | 22 | "CallStatsAggregator" should { 23 | "produce elements to its outlet" in { 24 | 25 | // setup inlet tap on inlet port 26 | val in = testKit.inletAsTap[CallRecord](streamlet.in) 27 | 28 | // setup outlet tap on outlet port 29 | val out = testKit.outletAsTap[AggregatedCallStats](streamlet.out) 30 | 31 | val maxUsers = 10 32 | val crs = (1 to 30).toList.map { i ⇒ 33 | CallRecord( 34 | s"user-${Random.nextInt(maxUsers)}", 35 | s"user-${Random.nextInt(maxUsers)}", 36 | (if (i % 2 == 0) "incoming" else "outgoing"), 37 | Random.nextInt(50), 38 | Instant.now.minus(Random.nextInt(40), ChronoUnit.MINUTES).toEpochMilli / 1000 39 | ) 40 | } 41 | 42 | in.addData(crs) 43 | 44 | testKit.run(streamlet, Seq(in), Seq(out), 30.seconds) 45 | 46 | // get data from outlet tap 47 | val results = out.asCollection(session) 48 | 49 | // assert 50 | results.size must be > 0 51 | } 52 | } 53 | } 54 | 55 | -------------------------------------------------------------------------------- /mixed-sensors/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | 4 | import scalariform.formatter.preferences._ 5 | 6 | lazy val mixedSensors = (project in file(".")) 7 | .enablePlugins(PipelinesSparkApplicationPlugin, 8 | PipelinesAkkaStreamsApplicationPlugin) 9 | .settings( 10 | libraryDependencies ++= Seq( 11 | "ch.qos.logback" % "logback-classic" % "1.2.3", 12 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 13 | ), 14 | 15 | name := "mixed-sensors", 16 | organization := "com.lightbend", 17 | 18 | scalaVersion := "2.12.10", 19 | crossScalaVersions := Vector(scalaVersion.value), 20 | scalacOptions ++= Seq( 21 | "-encoding", "UTF-8", 22 | "-target:jvm-1.8", 23 | "-Xlog-reflective-calls", 24 | "-Xlint", 25 | "-Ywarn-unused", 26 | "-Ywarn-unused-import", 27 | "-deprecation", 28 | "-feature", 29 | "-language:_", 30 | "-unchecked" 31 | ), 32 | 33 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 34 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 35 | 36 | scalariformPreferences := scalariformPreferences.value 37 | .setPreference(AlignParameters, false) 38 | .setPreference(AlignSingleLineCaseStatements, true) 39 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 40 | .setPreference(DoubleIndentConstructorArguments, true) 41 | .setPreference(DoubleIndentMethodDeclaration, true) 42 | .setPreference(RewriteArrowSymbols, true) 43 | .setPreference(DanglingCloseParenthesis, Preserve) 44 | .setPreference(NewlineAtEndOfFile, true) 45 | .setPreference(AllowParamGroupsOnNewlines, true) 46 | ) 47 | -------------------------------------------------------------------------------- /flink-taxi-ride/test-data/nycTaxiRides-small.json: -------------------------------------------------------------------------------- 1 | {"driverId":2013000006,"endLat":40.76491165161133,"endLon":-73.96133422851562,"endTime":0,"isStart":true,"passengerCnt":6,"rideId":6,"startLat":40.77109146118164,"startLon":-73.86613464355469,"startTime":1356998400000,"taxiId":2013000006} 2 | {"driverId":2013000011,"endLat":40.771759033203125,"endLon":-73.7923583984375,"endTime":0,"isStart":true,"passengerCnt":1,"rideId":11,"startLat":40.77376937866211,"startLon":-73.87083435058594,"startTime":1356998400000,"taxiId":2013000011} 3 | {"driverId":2013000055,"endLat":40.681209564208984,"endLon":-73.8050537109375,"endTime":0,"isStart":true,"passengerCnt":1,"rideId":55,"startLat":40.7739143371582,"startLon":-73.87117004394531,"startTime":1356998400000,"taxiId":2013000055} 4 | {"driverId":2013000031,"endLat":40.74075698852539,"endLon":-73.97993469238281,"endTime":0,"isStart":true,"passengerCnt":2,"rideId":31,"startLat":40.8077278137207,"startLon":-73.9293441772461,"startTime":1356998400000,"taxiId":2013000031} 5 | {"driverId":2013000006,"endLat":40.76491165161133,"endLon":-73.96133422851562,"endTime":1356999420000,"isStart":false,"passengerCnt":6,"rideId":6,"startLat":40.77109146118164,"startLon":-73.86613464355469,"startTime":1356998400000,"taxiId":2013000006} 6 | {"driverId":2013000011,"endLat":40.771759033203125,"endLon":-73.7923583984375,"endTime":1356999300000,"isStart":false,"passengerCnt":1,"rideId":11,"startLat":40.77376937866211,"startLon":-73.87083435058594,"startTime":1356998400000,"taxiId":2013000011} 7 | {"driverId":2013000055,"endLat":40.681209564208984,"endLon":-73.8050537109375,"endTime":1356999060000,"isStart":false,"passengerCnt":1,"rideId":55,"startLat":40.7739143371582,"startLon":-73.87117004394531,"startTime":1356998400000,"taxiId":2013000055} 8 | {"driverId":2013000031,"endLat":40.74075698852539,"endLon":-73.97993469238281,"endTime":1356999120000,"isStart":false,"passengerCnt":2,"rideId":31,"startLat":40.8077278137207,"startLon":-73.9293441772461,"startTime":1356998400000,"taxiId":2013000031} 9 | -------------------------------------------------------------------------------- /warez/akka-streamlets/src/main/scala/warez/ElasticSearchClient.scala: -------------------------------------------------------------------------------- 1 | package warez 2 | 3 | import akka.NotUsed 4 | import akka.kafka.ConsumerMessage.CommittableOffset 5 | import akka.stream.alpakka.elasticsearch.{ ReadResult, WriteMessage, WriteResult } 6 | import akka.stream.alpakka.elasticsearch.scaladsl.{ ElasticsearchFlow, ElasticsearchSource } 7 | import akka.stream.scaladsl.Source 8 | 9 | import org.apache.http.HttpHost 10 | import org.elasticsearch.client.RestClient 11 | import spray.json.{ JsObject, JsonFormat } 12 | 13 | import pipelines.akkastream.scaladsl.FlowWithOffsetContext 14 | 15 | /** 16 | * Alpakka Kafka graph stages used to index and search for Warez domain entities. 17 | */ 18 | object ElasticSearchClient { 19 | case class Config(hostname: String, port: Int, indexName: String, typeName: String = "_doc") 20 | 21 | /** 22 | * Factory method for `ElasticSearchClient`. Uses Context Bound on `JsonFormat` to make the type of our domain 23 | * entity visible (i.e. `Product`) as well as an implicit json format (i.e. `JsonFormat[Product]`). 24 | */ 25 | def apply[T: JsonFormat](config: Config): ElasticSearchClient[T] = 26 | new ElasticSearchClient(config) 27 | } 28 | 29 | class ElasticSearchClient[T: JsonFormat](config: ElasticSearchClient.Config) { 30 | import config._ 31 | 32 | /** 33 | * An ElasticSearch REST client used by Alpakka ElasticSearch to connect to the ES API. 34 | */ 35 | implicit val esClient: RestClient = RestClient.builder(new HttpHost(hostname, port)).build() 36 | 37 | def indexFlow(): FlowWithOffsetContext[WriteMessage[T, NotUsed], WriteResult[T, CommittableOffset]] = 38 | ElasticsearchFlow.createWithContext[T, CommittableOffset](indexName, typeName) 39 | 40 | def querySource(searchCriteria: String): Source[ReadResult[JsObject], NotUsed] = 41 | ElasticsearchSource 42 | .create(indexName, typeName, query = s"""{ 43 | "bool": { 44 | "must": { 45 | "query_string": { 46 | "query": "$searchCriteria" 47 | } 48 | } 49 | } 50 | }""") 51 | } 52 | -------------------------------------------------------------------------------- /sensor-data-java/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | 4 | import scalariform.formatter.preferences._ 5 | 6 | lazy val sensorDataJava = (project in file(".")) 7 | .enablePlugins(PipelinesAkkaStreamsApplicationPlugin) 8 | .settings( 9 | libraryDependencies ++= Seq( 10 | "com.lightbend.akka" %% "akka-stream-alpakka-file" % "1.1.2", 11 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10", 12 | "ch.qos.logback" % "logback-classic" % "1.2.3", 13 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 14 | "junit" % "junit" % "4.12" % "test" 15 | ), 16 | 17 | name := "sensor-data-java", 18 | organization := "com.lightbend", 19 | 20 | schemaCodeGenerator := SchemaCodeGenerator.Java, 21 | 22 | scalaVersion := "2.12.10", 23 | crossScalaVersions := Vector(scalaVersion.value), 24 | scalacOptions ++= Seq( 25 | "-encoding", "UTF-8", 26 | "-target:jvm-1.8", 27 | "-Xlog-reflective-calls", 28 | "-Xlint", 29 | "-Ywarn-unused", 30 | "-Ywarn-unused-import", 31 | "-deprecation", 32 | "-feature", 33 | "-language:_", 34 | "-unchecked" 35 | ), 36 | 37 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 38 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 39 | 40 | scalariformPreferences := scalariformPreferences.value 41 | .setPreference(AlignParameters, false) 42 | .setPreference(AlignSingleLineCaseStatements, true) 43 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 44 | .setPreference(DoubleIndentConstructorArguments, true) 45 | .setPreference(DoubleIndentMethodDeclaration, true) 46 | .setPreference(RewriteArrowSymbols, true) 47 | .setPreference(DanglingCloseParenthesis, Preserve) 48 | .setPreference(NewlineAtEndOfFile, true) 49 | .setPreference(AllowParamGroupsOnNewlines, true) 50 | ) 51 | -------------------------------------------------------------------------------- /sensor-data-scala/src/main/scala/pipelines/examples/sensordata/SensorDataFileIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata 2 | 3 | import java.nio.file 4 | import java.nio.file._ 5 | 6 | import akka.NotUsed 7 | import akka.stream.IOResult 8 | import akka.stream.alpakka.file.scaladsl.Directory 9 | import akka.stream.scaladsl._ 10 | import akka.util.ByteString 11 | import pipelines.akkastream._ 12 | import pipelines.akkastream.scaladsl._ 13 | import pipelines.streamlets._ 14 | import pipelines.streamlets.avro._ 15 | import spray.json.JsonParser 16 | 17 | import scala.concurrent.Future 18 | import scala.concurrent.duration._ 19 | 20 | class SensorDataFileIngress extends AkkaStreamlet { 21 | 22 | import SensorDataJsonSupport._ 23 | 24 | val out = AvroOutlet[SensorData]("out").withPartitioner(RoundRobinPartitioner) 25 | def shape = StreamletShape.withOutlets(out) 26 | 27 | private val sourceData = VolumeMount("source-data-mount", "/mnt/data", ReadWriteMany) 28 | 29 | override def volumeMounts = Vector(sourceData) 30 | 31 | // Streamlet processing steps 32 | // 1. Every X seconds 33 | // 2. Enumerate all files in the mounted path 34 | // 3. Read each file *) 35 | // 4. Deserialize file content to a SensorData value *) 36 | 37 | // *) Note that reading and deserializing the file content is done in separate steps for readability only, in production they should be merged into one step for performance reasons. 38 | 39 | override def createLogic = new RunnableGraphStreamletLogic() { 40 | val listFiles: NotUsed ⇒ Source[file.Path, NotUsed] = { _ ⇒ Directory.ls(getMountedPath(sourceData)) } 41 | val readFile: Path ⇒ Source[ByteString, Future[IOResult]] = { path: Path ⇒ FileIO.fromPath(path).via(JsonFraming.objectScanner(Int.MaxValue)) } 42 | val parseFile: ByteString ⇒ SensorData = { jsonByteString ⇒ JsonParser(jsonByteString.utf8String).convertTo[SensorData] } 43 | 44 | val emitFromFilesContinuously = Source.tick(1.second, 5.second, NotUsed) 45 | .flatMapConcat(listFiles) 46 | .flatMapConcat(readFile) 47 | .map(parseFile) 48 | def runnableGraph = emitFromFilesContinuously.to(plainSink(out)) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/SparkRandomGenDataIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import java.sql.Timestamp 4 | 5 | import scala.util.Random 6 | 7 | import pipelines.streamlets.{ DurationConfigParameter, IntegerConfigParameter, StreamletShape } 8 | import pipelines.streamlets.avro._ 9 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } 10 | import org.apache.spark.sql.Dataset 11 | import org.apache.spark.sql.streaming.{ OutputMode, Trigger } 12 | 13 | import pipelines.spark.sql.SQLImplicits._ 14 | 15 | case class Rate(timestamp: Timestamp, value: Long) 16 | 17 | class SparkRandomGenDataIngress extends SparkStreamlet { 18 | val out = AvroOutlet[Data]("out", d ⇒ d.src) 19 | val shape = StreamletShape(out) 20 | 21 | val RecordsPerSecond = IntegerConfigParameter( 22 | "records-per-second", 23 | "Records per second to produce.", 24 | Some(50)) 25 | 26 | val RampUpTime = DurationConfigParameter( 27 | "ramp-up-time", 28 | "Time to reach max records per second.", 29 | Some("0 seconds")) 30 | 31 | override def configParameters = Vector(RecordsPerSecond, RampUpTime) 32 | 33 | override def createLogic() = new SparkStreamletLogic { 34 | 35 | override def buildStreamingQueries = { 36 | writeStream(process, out, OutputMode.Append).toQueryExecution 37 | } 38 | 39 | private def process: Dataset[Data] = { 40 | 41 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key) 42 | val rampUpTime = context.streamletConfig.getDuration(RampUpTime.key, java.util.concurrent.TimeUnit.SECONDS) 43 | println(s"Using rampup time of $rampUpTime seconds") 44 | 45 | val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas" 46 | 47 | val rateStream = session.readStream 48 | .format("rate") 49 | .option("rowsPerSecond", recordsPerSecond) 50 | .option("rampUpTime", s"${rampUpTime}s") 51 | .load() 52 | .as[Rate] 53 | 54 | rateStream.map { 55 | case Rate(timestamp, value) ⇒ Data(s"src-${value % 1000}", timestamp.getTime, None, None, gaugeGen(), value) 56 | } 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /call-record-aggregator/datamodel/data/data-sample-20.json: -------------------------------------------------------------------------------- 1 | {"user":"07610039694","other":"07434677419","direction":"Incoming","duration":211,"timestamp":1284574664} 2 | {"user":"07641036117","other":"01666472054","direction":"Outgoing","duration":31,"timestamp":319101503} 3 | {"user":"07641036117","other":"07371326239","direction":"Incoming","duration":45,"timestamp":319103142} 4 | {"user":"07641036117","other":"07681546436","direction":"Outgoing","duration":10,"timestamp":319104282} 5 | {"user":"07641036117","other":"07681546436","direction":"Outgoing","duration":0,"timestamp":319104331} 6 | {"user":"07641036117","other":"07681546436","direction":"Incoming","duration":0,"timestamp":319104378} 7 | {"user":"07641036117","other":"07981267897","direction":"Outgoing","duration":0,"timestamp":319104391} 8 | {"user":"07641036117","other":"07588304495","direction":"Incoming","duration":124,"timestamp":1284057337} 9 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":474,"timestamp":1284054224} 10 | {"user":"07981267897","other":"07743039441","direction":"Missed","duration":0,"timestamp":1284058290} 11 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":0,"timestamp":1284062275} 12 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":605,"timestamp":1284146220} 13 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":1,"timestamp":1284219896} 14 | {"user":"07981267897","other":"07743039441","direction":"Outgoing","duration":59,"timestamp":1284220859} 15 | {"user":"07981267897","other":"07784425582","direction":"Outgoing","duration":1201,"timestamp":1284386006} 16 | {"user":"07981267897","other":"07641036117","direction":"Outgoing","duration":2,"timestamp":1284445157} 17 | {"user":"07163185791","other":"01850897526","direction":"Outgoing","duration":0,"timestamp":1284062712} 18 | {"user":"07163185791","other":"07066875066","direction":"Outgoing","duration":0,"timestamp":1284138862} 19 | {"user":"07163185791","other":"07066875066","direction":"Outgoing","duration":0,"timestamp":1284138886} 20 | {"user":"07163185791","other":"07691640598","direction":"Outgoing","duration":0,"timestamp":1284138976} 21 | -------------------------------------------------------------------------------- /warez/spark-streamlets/src/test/scala/pipelines/example/warez/SparkProductJoinerKitSpec.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example.warez 2 | 3 | import scala.collection.immutable.Seq 4 | import scala.concurrent.duration._ 5 | 6 | import pipelines.spark.testkit._ 7 | import pipelines.spark.sql.SQLImplicits._ 8 | import TestUtils._ 9 | import warez._ 10 | 11 | class SparkProductJoinerKitSpec extends SparkScalaTestSupport { 12 | 13 | val testKit = SparkStreamletTestkit(session) 14 | 15 | "SparkJoin3" should { 16 | "process streaming data" in { 17 | // create spark streamlet 18 | val join3 = new SparkProductJoiner() 19 | 20 | // setup inlet tap on inlet port 21 | val in0: SparkInletTap[Product] = testKit.inletAsTap[Product](join3.in0) 22 | val in1: SparkInletTap[StockUpdate] = testKit.inletAsTap[StockUpdate](join3.in1) 23 | val in2: SparkInletTap[PriceUpdate] = testKit.inletAsTap[PriceUpdate](join3.in2) 24 | 25 | // setup outlet tap on outlet port 26 | val out: SparkOutletTap[Product] = testKit.outletAsTap[Product](join3.out) 27 | 28 | val socksId = uuid 29 | val pantsId = uuid 30 | val socksSkus = genSkus() 31 | val pantsSkus = genSkus() 32 | val socks = Product(socksId, "Socks", "Warm in winter", Seq("clothing", "sock", "socks"), socksSkus) 33 | val pants = Product(pantsId, "Pants", "Denim for the masses", Seq("clothing", "pants"), pantsSkus) 34 | 35 | val stockUpdate = StockUpdate(socksId, socksSkus.head.id, 1) 36 | val priceUpdate = PriceUpdate(pantsId, pantsSkus.head.id, 100) 37 | 38 | // build data and send to inlet tap 39 | val data0 = List(socks, pants) 40 | in0.addData(data0) 41 | // try multiple updates 42 | val data1 = (1 to 100).map(_ ⇒ stockUpdate) 43 | in1.addData(data1) 44 | val data2 = List(priceUpdate) 45 | in2.addData(data2) 46 | 47 | testKit.run(join3, Seq(in0, in1, in2), Seq(out), 60.seconds) 48 | 49 | // get data from outlet tap 50 | val results = out.asCollection(session) 51 | 52 | results.foreach(println) 53 | 54 | // assert 55 | results must have length 2 56 | results.exists { p ⇒ p.name == "Socks" && p.skus.head.stock.contains(100) } 57 | } 58 | } 59 | } 60 | 61 | -------------------------------------------------------------------------------- /warez/spark-streamlets/src/test/scala/pipelines/example/warez/SparkProductOperationsSpec.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example.warez 2 | 3 | import org.scalatest.{ Matchers, WordSpec } 4 | import org.scalatest.OptionValues._ 5 | 6 | import scala.collection.immutable.Seq 7 | import warez.{ PriceUpdate, Product, Sku, StockUpdate } 8 | 9 | class SparkProductOperationsSpec extends WordSpec with Matchers { 10 | 11 | "A Product" should { 12 | "be updated correctly" in { 13 | val skus = Array( 14 | Sku("1", "Small Hole", Some(10), Some(5)), 15 | Sku("2", "Medium Hole", Some(10), Some(10)), 16 | Sku("3", "Large Hole", Some(15), Some(20)) 17 | ) 18 | val description = "A cartoon hole that can be applied to any surface." 19 | val keywords = Array("black", "hole", "gag", "plot device", "roger rabbit") 20 | 21 | val p = new Product( 22 | "123456789", 23 | "Acme Portable Hole", 24 | description, 25 | keywords, 26 | skus 27 | ) 28 | 29 | val priceUpdate = PriceUpdate( 30 | "123456789", 31 | "1", 32 | 10 33 | ) 34 | val stockUpdate = StockUpdate( 35 | "123456789", 36 | "1", 37 | 10 38 | ) 39 | val zero = SparkProductJoiner.emptyProduct 40 | val p1 = SparkProductJoiner.updateProduct(zero, Seq(p).toIterator) 41 | p1 == p should equal(true) 42 | val prodPrice = SparkProductJoiner.priceUpdate2Products(priceUpdate) 43 | val p2 = SparkProductJoiner.updateProduct(p1, Seq(prodPrice).toIterator) 44 | p2.skus.find(_.id == "1").value.price should equal(Some(10)) 45 | val prodStock = SparkProductJoiner.stockUpdate2Product(stockUpdate) 46 | val p3 = SparkProductJoiner.updateProduct(p2, Seq(prodStock).toIterator) 47 | p3.skus.find(_.id == "1").value.stock should equal(Some(20)) 48 | // the same price update should cause no change here 49 | val p4 = SparkProductJoiner.updateProduct(p3, Seq(prodPrice).toIterator) 50 | p4.skus.find(_.id == "1").value.price should equal(Some(10)) 51 | p4.skus.find(_.id == "1").value.stock should equal(Some(20)) 52 | p4.description should be(description) 53 | p4.keywords should be(keywords) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /sensor-data-scala/build.sbt: -------------------------------------------------------------------------------- 1 | //tag::docs-projectSetup-example[] 2 | import sbt._ 3 | import sbt.Keys._ 4 | 5 | import scalariform.formatter.preferences._ 6 | 7 | lazy val sensorData = (project in file(".")) 8 | .enablePlugins(PipelinesAkkaStreamsApplicationPlugin) 9 | .settings( 10 | //end::docs-projectSetup-example[] 11 | libraryDependencies ++= Seq( 12 | "com.lightbend.akka" %% "akka-stream-alpakka-file" % "1.1.2", 13 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10", 14 | "ch.qos.logback" % "logback-classic" % "1.2.3", 15 | "com.typesafe.akka" %% "akka-http-testkit" % "10.1.10" % "test", 16 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 17 | 18 | //tag::docs-projectName-example[] 19 | ), 20 | name := "sensor-data-scala", 21 | //end::docs-projectName-example[] 22 | organization := "com.lightbend", 23 | 24 | scalaVersion := "2.12.10", 25 | crossScalaVersions := Vector(scalaVersion.value), 26 | scalacOptions ++= Seq( 27 | "-encoding", "UTF-8", 28 | "-target:jvm-1.8", 29 | "-Xlog-reflective-calls", 30 | "-Xlint", 31 | "-Ywarn-unused", 32 | "-Ywarn-unused-import", 33 | "-deprecation", 34 | "-feature", 35 | "-language:_", 36 | "-unchecked" 37 | ), 38 | runLocalConfigFile := Some("resources/local.conf"), 39 | 40 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 41 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 42 | 43 | scalariformPreferences := scalariformPreferences.value 44 | .setPreference(AlignParameters, false) 45 | .setPreference(AlignSingleLineCaseStatements, true) 46 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 47 | .setPreference(DoubleIndentConstructorArguments, true) 48 | .setPreference(DoubleIndentMethodDeclaration, true) 49 | .setPreference(RewriteArrowSymbols, true) 50 | .setPreference(DanglingCloseParenthesis, Preserve) 51 | .setPreference(NewlineAtEndOfFile, true) 52 | .setPreference(AllowParamGroupsOnNewlines, true) 53 | ) 54 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-cdr-ingestor/src/test/scala/pipelines/examples/carly/ingestor/CallRecordValidationSpec.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.ingestor 2 | 3 | import java.time.Instant 4 | import java.time.temporal.ChronoUnit 5 | 6 | import akka.actor._ 7 | import akka.stream._ 8 | import akka.stream.scaladsl._ 9 | import akka.testkit._ 10 | import org.scalatest._ 11 | import org.scalatest.concurrent._ 12 | 13 | import pipelines.akkastream.testkit.scaladsl._ 14 | 15 | import pipelines.examples.carly.data._ 16 | 17 | class CallRecordValidationSpec extends WordSpec with MustMatchers with ScalaFutures with BeforeAndAfterAll { 18 | private implicit val system = ActorSystem("CallRecordValidationSpec") 19 | private implicit val mat = ActorMaterializer() 20 | 21 | override def afterAll: Unit = { 22 | TestKit.shutdownActorSystem(system) 23 | } 24 | 25 | "A CallRecordValidation" should { 26 | "split incoming data into valid call records and those outside the time range" in { 27 | val testkit = AkkaStreamletTestKit(system, mat) 28 | val streamlet = new CallRecordValidation() 29 | 30 | val instant = Instant.now.toEpochMilli / 1000 31 | val past = Instant.now.minus(5000, ChronoUnit.DAYS).toEpochMilli / 1000 32 | 33 | val cr1 = CallRecord("user-1", "user-2", "f", 10L, instant) 34 | val cr2 = CallRecord("user-1", "user-2", "f", 15L, instant) 35 | val cr3 = CallRecord("user-1", "user-2", "f", 18L, instant) 36 | val cr4 = CallRecord("user-1", "user-2", "f", 40L, past) 37 | val cr5 = CallRecord("user-1", "user-2", "f", 70L, past) 38 | 39 | val source = Source(Vector(cr1, cr2, cr3, cr4, cr5)) 40 | 41 | val in = testkit.inletFromSource(streamlet.in, source) 42 | val left = testkit.outletAsTap(streamlet.left) 43 | val right = testkit.outletAsTap(streamlet.right) 44 | 45 | testkit.run(streamlet, in, List(left, right), () ⇒ { 46 | right.probe.expectMsg(("user-1", cr1)) 47 | right.probe.expectMsg(("user-1", cr2)) 48 | right.probe.expectMsg(("user-1", cr3)) 49 | left.probe.expectMsg((cr4.toString, InvalidRecord(cr4.toString, "Timestamp outside range!"))) 50 | left.probe.expectMsg((cr5.toString, InvalidRecord(cr5.toString, "Timestamp outside range!"))) 51 | }) 52 | 53 | left.probe.expectMsg(Completed) 54 | right.probe.expectMsg(Completed) 55 | } 56 | } 57 | } 58 | 59 | -------------------------------------------------------------------------------- /flink-taxi-ride/README.md: -------------------------------------------------------------------------------- 1 | ## Flink based Pipelines Application 2 | 3 | ### Problem Definition 4 | 5 | We work with two data streams, one with `TaxiRide` events generated by a Akka stream streamlet (ingress) and the other with `TaxiFare` events generated by another Akka stream streamlet (ingress). The 2 streams are then connected together through a Flink streamlet based processor which does a stateful enrichment that builds up an aggregate of `TaxiRide` to `TaxiFare` mappings. 6 | 7 | The mapping is then posted on to an Akka stream streamlet (egress) as a tuple. 8 | 9 | ### Sub projects 10 | 11 | The following sub-projects constitute the whole application: 12 | 13 | * `datamodel` - contains the Avro schema for `TaxiRide`, `TaxiFare` and `TaxiRideFare` 14 | * `ingestor` - contains the Akka stream ingresses that read data streams from http 15 | * `processor` - the Flink streamlet that connects the input streams and does stateful processing to generate the output stream 16 | * `logger` - contains the Akka stream egress that writes to Kafka. The logger streamlet has the following configurable parameters: 17 | * `valid-logger.log-level` - Log level for `*-logger` streamlets to log to. e.g. `info` 18 | * `valid-logger.msg-prefix` - Log line prefix for `*-logger` streamlets to include. e.g. `VALID` 19 | * `taxi-ride-pipeline` - the entry point containing the blueprint definition 20 | 21 | ### Build the application 22 | 23 | Here's the sequence of steps that you need to follow: 24 | 25 | ``` 26 | $ pwd 27 | .../flink-taxi-ride 28 | $ sbt 29 | $ clean 30 | $ buildAndPublish 31 | ``` 32 | 33 | The above will build the application and publish application Docker images to the Docker registry, as configured in `target-env.sbt`. 34 | 35 | > **Note:** You need to copy `target-env.sbt.example` to `target-env.sbt` with appropriate settings for the Docker registry in order for the build and publish to go through. 36 | 37 | The `buildAndPublish` command, if successful, will publish the exact command to use for deployment in the cluster. 38 | 39 | ### Feeding data into the application 40 | 41 | The project comes with scripts that can be used to feed data into the ingresses using http. 42 | 43 | The folder `test-data` contains 2 bash scripts, `send-data-rides.sh` and `send-data-fares.sh` that can be used to feed data through http to the 2 ingresses. You need to change the cluster names in the scripts to match your own environment. 44 | 45 | -------------------------------------------------------------------------------- /call-record-aggregator/akka-cdr-ingestor/src/test/scala/pipelines/examples/carly/ingestor/CallRecordMergeSpec.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.ingestor 2 | 3 | import java.time.Instant 4 | import java.time.temporal.ChronoUnit 5 | 6 | import akka.actor._ 7 | import akka.stream._ 8 | import akka.stream.scaladsl._ 9 | import akka.testkit._ 10 | import org.scalatest._ 11 | import org.scalatest.concurrent._ 12 | 13 | import pipelines.akkastream.testkit.scaladsl._ 14 | import pipelines.examples.carly.data._ 15 | 16 | class CallRecordMergeSpec extends WordSpec with MustMatchers with ScalaFutures with BeforeAndAfterAll { 17 | 18 | private implicit val system = ActorSystem("CallRecordMergeSpec") 19 | private implicit val mat = ActorMaterializer() 20 | 21 | override def afterAll: Unit = { 22 | TestKit.shutdownActorSystem(system) 23 | } 24 | 25 | "A CallRecordMerge" should { 26 | "merge incoming data" in { 27 | val testkit = AkkaStreamletTestKit(system, mat) 28 | val streamlet = new CallRecordMerge 29 | 30 | val instant = Instant.now.toEpochMilli / 1000 31 | val past = Instant.now.minus(5000, ChronoUnit.DAYS).toEpochMilli / 1000 32 | 33 | val cr1 = CallRecord("user-1", "user-2", "f", 10L, instant) 34 | val cr2 = CallRecord("user-1", "user-2", "f", 15L, instant) 35 | val cr3 = CallRecord("user-1", "user-2", "f", 18L, instant) 36 | val cr4 = CallRecord("user-1", "user-2", "f", 40L, past) 37 | val cr5 = CallRecord("user-1", "user-2", "f", 70L, past) 38 | val cr6 = CallRecord("user-3", "user-1", "f", 80L, past) 39 | 40 | val source0 = Source(Vector(cr1, cr2, cr3)) 41 | val source1 = Source(Vector(cr4, cr5)) 42 | val source2 = Source(Vector(cr6)) 43 | 44 | val in0 = testkit.inletFromSource(streamlet.in0, source0) 45 | val in1 = testkit.inletFromSource(streamlet.in1, source1) 46 | val in2 = testkit.inletFromSource(streamlet.in2, source2) 47 | val out = testkit.outletAsTap(streamlet.out) 48 | 49 | testkit.run(streamlet, List(in0, in1, in2), out, () ⇒ { 50 | out.probe.expectMsg(("user-1", cr1)) 51 | out.probe.expectMsg(("user-1", cr4)) 52 | out.probe.expectMsg(("user-3", cr6)) 53 | out.probe.expectMsg(("user-1", cr2)) 54 | out.probe.expectMsg(("user-1", cr5)) 55 | out.probe.expectMsg(("user-1", cr3)) 56 | }) 57 | 58 | out.probe.expectMsg(Completed) 59 | } 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pipelines Examples Applications 2 | 3 | ## `sensor-data-scala` 4 | 5 | A simple pipeline that processes events from a wind turbine farm. 6 | 7 | ## `sensor-data-java` 8 | 9 | The same as `sensor-data-scala`, but implemented using the Java DSL. 10 | 11 | ## `call-record-aggregator` 12 | 13 | An aggregation of user call record data (metadata of phone calls). 14 | 15 | ## `spark-sensors` 16 | 17 | A simple pipeline that generates events from energy devices. 18 | 19 | ## `spark-resilience-test` 20 | 21 | A simple pipeline that generates events from energy devices. This pipeline 22 | will fail based on a pre-defined probability percentage. Its purpose is to 23 | demonstrate the failure recovery features of Pipelines and Spark. 24 | 25 | ## `warez` 26 | 27 | An event-based e-commerce streaming platform. Events are generated based on 28 | user events such as purchases and merchant actions such as the addition of 29 | products and their stock numbers. 30 | 31 | # Pipelines Feature Grid 32 | 33 | | Application | Akka Streams (Scala) | Akka Streams (Java) | Spark | Testkit | Ingress | Egress | Auto Data Generation | 34 | |---------------------|----------------------|---------------------|-------|---------|---------|-------------------------|-------------------------| 35 | | `sensor-data-scala` | Yes | No | No | No | HTTP | stdout (logs) | Yes (Client Lua Script) | 36 | | `sensor-data-java` | No | Yes | No | No | HTTP | stdout (logs) | Yes (Client Lua Script) | 37 | | `call-record-aggregator` | Yes | Yes | Yes | Yes | HTTP | stdout (logs) | Yes | 38 | | `spark-sensors` | No | No | Yes | No | HTTP | stdout (logs) | Yes | 39 | | `spark-resilience-test` | Yes | No | No | Yes | HTTP | stdout (logs) | Yes | 40 | | `warez` | Yes | No | Yes | Yes | HTTP | ElasticSearch, HTTP API | No | 41 | 42 | # Running Examples 43 | 44 | Consult the [Pipelines Documentation](https://developer.lightbend.com/docs/pipelines/current/) 45 | for instructions on building, deploying and running Pipelines applications. 46 | 47 | --- 48 | 49 | **NOTE** 50 | 51 | Before building any examples remember to update the Docker registry in the `target-env.sbt` file. 52 | 53 | --- 54 | -------------------------------------------------------------------------------- /sensor-data-java/src/test/java/pipelines/examples/sensordata/MetricsValidationTest.java: -------------------------------------------------------------------------------- 1 | package pipelines.examples.sensordata; 2 | 3 | import java.util.*; 4 | 5 | import scala.concurrent.duration.Duration; 6 | 7 | import akka.NotUsed; 8 | import akka.actor.ActorSystem; 9 | import akka.japi.Pair; 10 | import akka.kafka.ConsumerMessage.CommittableOffset; 11 | import akka.stream.ActorMaterializer; 12 | import akka.stream.javadsl.*; 13 | import akka.stream.javadsl.Flow; 14 | import akka.testkit.TestKit; 15 | import pipelines.akkastream.*; 16 | import pipelines.akkastream.javadsl.util.*; 17 | import pipelines.akkastream.testkit.OutletTap; 18 | import pipelines.akkastream.testkit.javadsl.*; 19 | 20 | import pipelines.streamlets.*; 21 | import pipelines.streamlets.avro.*; 22 | import pipelines.streamlets.descriptors.*; 23 | 24 | import org.apache.avro.Schema; 25 | import org.scalatest.junit.JUnitSuite; 26 | import org.junit.*; 27 | import static org.junit.Assert.*; 28 | 29 | public class MetricsValidationTest extends JUnitSuite { 30 | static ActorMaterializer mat; 31 | static ActorSystem system; 32 | 33 | @BeforeClass 34 | public static void setUp() throws Exception { 35 | system = ActorSystem.create(); 36 | mat = ActorMaterializer.create(system); 37 | } 38 | 39 | @AfterClass 40 | public static void tearDown() throws Exception { 41 | TestKit.shutdownActorSystem(system, Duration.create(10, "seconds"), false); 42 | system = null; 43 | } 44 | 45 | @Test 46 | public void shouldProcessInvalidMetric() { 47 | MetricsValidation streamlet = new MetricsValidation(); 48 | AkkaStreamletTestKit testkit = AkkaStreamletTestKit.create(system, mat); 49 | 50 | QueueInletTap in = testkit.makeInletAsTap(streamlet.inlet); 51 | ProbeOutletTap valid = testkit.makeOutletAsTap(streamlet.validOutlet); 52 | ProbeOutletTap invalid = testkit.makeOutletAsTap(streamlet.invalidOutlet); 53 | long timestamp = System.currentTimeMillis(); 54 | Metric metric = new Metric("dev1", timestamp, "metric-name", -1.0d); 55 | in.queue().offer(metric); 56 | InvalidMetric expectedInvalidMetric = new InvalidMetric(metric, "All measurements must be positive numbers!"); 57 | 58 | String expectedKey = streamlet.invalidOutlet.partitioner().apply(expectedInvalidMetric); 59 | List> outlets = Arrays.asList(new OutletTap[] {valid, invalid}); 60 | 61 | testkit.run(streamlet, in, outlets, () -> { 62 | return invalid.probe().expectMsg(new Pair(expectedKey, expectedInvalidMetric)); 63 | }); 64 | 65 | invalid.probe().expectMsg(Completed.completed()); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /warez/ml-training/README.md: -------------------------------------------------------------------------------- 1 | # Recommender model generation 2 | 3 | The iPython notebook does the following: 4 | 5 | 1. Reads data from the `data/` folder. The `data` folder contains 2 variant of data files - 1 containing a large dataset and the other a smaller one. 6 | 2. Builds a neural network model for learning 7 | 3. Runs the training 8 | 4. Exports the model in TensorFlow format 9 | 5. Generates an Avro binary file containing all information needed to be transferred to the model serving streamlet 10 | 11 | **Note:** In the current implementation of the notebook, we have the `model_path` hardcoded. This is the folder where all models, graphs, avro files are generated. Needs to change appropriately when running the notebook. 12 | 13 | 14 | ## Mapping Ids 15 | 16 | In `warez` the product (sku) ids and customer ids are modeled as strings (UUIDs) while the machine learning classifier neural network needs integers. Hence we do a mapping of the UUIDs to a unique integer value for all the customer and product ids. 17 | 18 | This mapping information also needs to be exported along with the model itself. The notebook also does this. 19 | 20 | ## Model Id 21 | 22 | In the current implementation, the model id for the generated model is specified as "recommender-model-[current timestamp]". This id will be present in the final avro that the notebook generates. 23 | 24 | ## Model Avro Schema 25 | 26 | The avro file that the notebook generates is based on the schema present in `avro/` folder, named `RecommenderModel.avsc`. This schema has to match with the one present on the Scala side where streamlets are defined. The schema is: 27 | 28 | ``` 29 | { 30 | "namespace": "warez", 31 | 32 | "type": "record", 33 | "name": "RecommenderModel", 34 | 35 | "fields": [ 36 | { 37 | "name": "modelId", 38 | "type": "string" 39 | }, 40 | { 41 | "name": "tensorFlowModel", 42 | "type": "bytes" 43 | }, 44 | { 45 | "name": "productMap", 46 | "type": { 47 | "type": "map", 48 | "values": "int" 49 | } 50 | }, 51 | { 52 | "name": "customerMap", 53 | "type": { 54 | "type": "map", 55 | "values": "int" 56 | } 57 | } 58 | ] 59 | } 60 | ``` 61 | 62 | ## Generated Avro 63 | 64 | The notebook generates 2 Avro files: 65 | 66 | * With schema embedded within the binary file saved in `recommender.avro` under the `model_path` folder. This can be imported to the streamlet for model serving 67 | * Without schema embedded within the binary file saved in `recommender-no-schema.avro` under the `model_path` folder. This can also be imported to the streamlet for model serving 68 | -------------------------------------------------------------------------------- /warez/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | import scalariform.formatter.preferences._ 4 | 5 | lazy val root = blueprint 6 | 7 | lazy val datamodel = (project in file("./datamodel")) 8 | .enablePlugins(PipelinesLibraryPlugin) 9 | 10 | lazy val blueprint = (project in file("./blueprint")) 11 | .enablePlugins(PipelinesApplicationPlugin) 12 | .settings( 13 | /** 14 | * NOTE: Can we namespace or sandbox developer instances of this deployment? 15 | */ 16 | name := "warez" 17 | ) 18 | .dependsOn(akkaStreamlets, sparkStreamlets) 19 | 20 | lazy val akkaStreamlets = (project in file("./akka-streamlets")) 21 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin) 22 | .settings( 23 | commonSettings, 24 | libraryDependencies ++= Seq( 25 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10", 26 | "com.lightbend.akka" %% "akka-stream-alpakka-elasticsearch" % "1.1.2", 27 | "ch.qos.logback" % "logback-classic" % "1.2.3", 28 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 29 | ) 30 | ) 31 | .dependsOn(datamodel) 32 | 33 | lazy val sparkStreamlets = (project in file("./spark-streamlets")) 34 | .enablePlugins(PipelinesSparkLibraryPlugin) 35 | .settings( 36 | commonSettings, 37 | libraryDependencies ++= Seq( 38 | "ch.qos.logback" % "logback-classic" % "1.2.3", 39 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 40 | ) 41 | ) 42 | .dependsOn(datamodel) 43 | 44 | 45 | lazy val commonSettings = Seq( 46 | scalaVersion := "2.12.10", 47 | scalacOptions ++= Seq( 48 | "-encoding", "UTF-8", 49 | "-target:jvm-1.8", 50 | "-Xlog-reflective-calls", 51 | "-Xlint", 52 | "-Ywarn-unused", 53 | "-Ywarn-unused-import", 54 | "-deprecation", 55 | "-feature", 56 | "-language:_", 57 | "-unchecked" 58 | ), 59 | 60 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 61 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 62 | 63 | scalariformPreferences := scalariformPreferences.value 64 | .setPreference(AlignParameters, false) 65 | .setPreference(AlignSingleLineCaseStatements, true) 66 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 67 | .setPreference(DoubleIndentConstructorArguments, true) 68 | .setPreference(DoubleIndentMethodDeclaration, true) 69 | .setPreference(RewriteArrowSymbols, true) 70 | .setPreference(DanglingCloseParenthesis, Preserve) 71 | .setPreference(NewlineAtEndOfFile, true) 72 | .setPreference(AllowParamGroupsOnNewlines, true) 73 | ) 74 | -------------------------------------------------------------------------------- /call-record-aggregator/spark-aggregation/src/main/scala/pipelines/examples/carly/aggregator/CallStatsAggregator.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.aggregator 2 | 3 | import org.apache.spark.sql.Dataset 4 | import org.apache.spark.sql.functions._ 5 | import org.apache.spark.sql.types._ 6 | 7 | import pipelines.streamlets._ 8 | import pipelines.streamlets.avro._ 9 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } 10 | import org.apache.spark.sql.streaming.OutputMode 11 | import pipelines.spark.sql.SQLImplicits._ 12 | import org.apache.log4j.{ Level, Logger } 13 | 14 | import pipelines.examples.carly.data._ 15 | class CallStatsAggregator extends SparkStreamlet { 16 | 17 | val rootLogger = Logger.getRootLogger() 18 | rootLogger.setLevel(Level.ERROR) 19 | 20 | //tag::docs-schemaAware-example[] 21 | val in = AvroInlet[CallRecord]("in") 22 | val out = AvroOutlet[AggregatedCallStats]("out", _.startTime.toString) 23 | val shape = StreamletShape(in, out) 24 | //end::docs-schemaAware-example[] 25 | 26 | val GroupByWindow = DurationConfigParameter( 27 | "group-by-window", 28 | "Window duration for the moving average computation", 29 | Some("1 minute")) 30 | 31 | val Watermark = DurationConfigParameter( 32 | "watermark", 33 | "Late events watermark duration: how long to wait for late events", 34 | Some("1 minute")) 35 | 36 | override def configParameters = Vector(GroupByWindow, Watermark) 37 | override def createLogic = new SparkStreamletLogic { 38 | val watermark = context.streamletConfig.getDuration(Watermark.key) 39 | val groupByWindow = context.streamletConfig.getDuration(GroupByWindow.key) 40 | 41 | //tag::docs-aggregationQuery-example[] 42 | override def buildStreamingQueries = { 43 | val dataset = readStream(in) 44 | val outStream = process(dataset) 45 | writeStream(outStream, out, OutputMode.Update).toQueryExecution 46 | } 47 | 48 | private def process(inDataset: Dataset[CallRecord]): Dataset[AggregatedCallStats] = { 49 | val query = 50 | inDataset 51 | .withColumn("ts", $"timestamp".cast(TimestampType)) 52 | .withWatermark("ts", s"${watermark.toMillis()} milliseconds") 53 | .groupBy(window($"ts", s"${groupByWindow.toMillis()} milliseconds")) 54 | .agg(avg($"duration") as "avgCallDuration", sum($"duration") as "totalCallDuration") 55 | .withColumn("windowDuration", $"window.end".cast(LongType) - $"window.start".cast(LongType)) 56 | 57 | query 58 | .select($"window.start".cast(LongType) as "startTime", $"windowDuration", $"avgCallDuration", $"totalCallDuration") 59 | .as[AggregatedCallStats] 60 | } 61 | //end::docs-aggregationQuery-example[] 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /sensor-data-java/README.md: -------------------------------------------------------------------------------- 1 | # `sensor-data-java` 2 | 3 | A simple Java based pipeline that ingests, converts, and filters data 4 | 5 | # Required configuration 6 | 7 | The application requires a persistent volume claim (PVC) to be created before deployment. This PVC is mounted by the `FilterStreamlet` pod, which checks the mounted directory for a configuration file containing device ids that should be filtered out from the data stream. 8 | 9 | Example PVC: 10 | 11 | ``` 12 | apiVersion: v1 13 | kind: PersistentVolumeClaim 14 | metadata: 15 | name: source-data-claim 16 | namespace: sensor-data-java 17 | spec: 18 | accessModes: 19 | - ReadWriteMany 20 | resources: 21 | requests: 22 | storage: 10Mi 23 | ``` 24 | 25 | # Upload device id filter list 26 | 27 | The filter streamlet will read a configuration file from the mounted volume. The file should contain the device ids that should be filtered out, with one device id per line. If the file is empty or does not exist, all device ids are accepted. 28 | 29 | To upload a prepared file that will filter out one device id (c75cb448-df0e-4692-8e06-0321b7703992), run the following script. 30 | 31 | ./load-data-into-pvc.sh 32 | 33 | The file uploaded is named `test-data/device-ids.txt`. 34 | 35 | # Generate data 36 | 37 | To send data to the HTTP ingress, do the following: 38 | 39 | - Get `sensor-data` ingress HTTP endpoint with `kubectl pipelines status sensor-data-java` 40 | 41 | In the example output below the HTTP endpoint would be `docker-registry-default.my.kubernetes.cluster/sensor-data`: 42 | 43 | ``` 44 | kubectl pipelines status sensor-data-java 45 | Name: sensor-data-java 46 | Namespace: sensor-data-java 47 | Version: 445-fcd70ca 48 | Created: 2019-08-20 11:24:54 +0200 CEST 49 | Status: Running 50 | 51 | STREAMLET ENDPOINT 52 | sensor-data docker-registry-default.my.kubernetes.cluster/sensor-data 53 | 54 | STREAMLET POD STATUS RESTARTS READY 55 | metrics sensor-data-java-metrics-67bc5c45f7-7v5p9 Running 0 True 56 | sensor-data sensor-data-java-sensor-data-f8fb77d85-bgtb9 Running 0 True 57 | filter sensor-data-java-filter-667d85d44b-8ltmg Running 0 True 58 | validation sensor-data-java-validation-7754885f99-h4l67 Running 0 True 59 | ``` 60 | 61 | - Pick a test data file from `./test-data`, for example `test-data/04-moderate-breeze.json` 62 | - Send the file to the HTTP endpoint of the ingress using the following `curl` command 63 | 64 | 65 | curl -i -X POST sensor-data-java.robert-test.ingestion.io/sensor-data -H "Content-Type: application/json" --data '@test-data/04-moderate-breeze.json' 66 | -------------------------------------------------------------------------------- /mixed-sensors/src/main/scala/pipelines/example/SparkConsoleEgress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example 2 | 3 | import pipelines.streamlets.StreamletShape 4 | import pipelines.streamlets.avro._ 5 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic, StreamletQueryExecution } 6 | import pipelines.spark.sql.SQLImplicits._ 7 | import org.apache.spark.sql.streaming.Trigger 8 | import org.apache.spark.sql.functions._ 9 | import org.apache.spark.sql.DataFrame 10 | 11 | class SparkConsoleEgress extends SparkStreamlet { 12 | val in1 = AvroInlet[Data]("in1") 13 | val in2 = AvroInlet[Data]("in2") 14 | val shape = StreamletShape.withInlets(in1, in2) 15 | 16 | def asTimestamp = udf((t: Long) ⇒ new java.sql.Timestamp(t)) 17 | def elapsedTime = udf((t1: Long, t0: Long) ⇒ t1 - t0) 18 | 19 | override def createLogic() = new SparkStreamletLogic { 20 | override def buildStreamingQueries = { 21 | val stream1 = readStream(in1).withColumn("source", lit("spark")).withColumn("elapsed", elapsedTime($"t2", $"t1")) 22 | val stream2 = readStream(in2).withColumn("source", lit("akka")).withColumn("elapsed", elapsedTime($"t2", $"t1")) 23 | 24 | // commented-out process: simple stats to compute min/max/mean on a window 25 | // val dataCount = stream1.union(stream2).withColumn("ts", asTimestamp($"timestamp")) 26 | // val stats = dataCount 27 | // .withWatermark("ts", "1 second") 28 | // .groupBy(window($"ts", "5 minutes", "1 minute"), $"source") 29 | // //.agg(max($"elapsed"), min($"elapsed"), avg($"elapsed"), count($"source")) 30 | 31 | val quantiles: (String ⇒ Long ⇒ (DataFrame, Long) ⇒ Unit) = { name ⇒ period ⇒ (df, time) ⇒ 32 | df.cache() 33 | val count = df.count() 34 | val cps = count.toDouble / period 35 | val quans = df.stat.approxQuantile("elapsed", Array(0.1, 0.5, 0.9, 0.99), 0.01) 36 | println(s"$time, $name, $count, $cps, " + quans.mkString(", ")) 37 | } 38 | 39 | val period = 60 * 5 // seconds 40 | 41 | val q1 = stream1.writeStream.foreachBatch(quantiles("spark")(period)) 42 | .trigger(Trigger.ProcessingTime(s"$period seconds")) 43 | .option("checkpointLocation", context.checkpointDir("console-egress-q1")) 44 | .start() 45 | val q2 = stream2.writeStream.foreachBatch(quantiles("akka")(period)) 46 | .trigger(Trigger.ProcessingTime(s"$period seconds")) 47 | .option("checkpointLocation", context.checkpointDir("console-egress-q2")) 48 | .start() 49 | 50 | new Thread() { 51 | override def run(): Unit = { 52 | while (true) { 53 | val progress = q1.lastProgress 54 | if (progress != null) { 55 | println("***************** [PROGRESS] *********************") 56 | println(progress.toString()) 57 | println("**************************************************") 58 | } 59 | Thread.sleep(60 * 1000) 60 | } 61 | } 62 | } //.start // uncomment to enable the query progress 63 | 64 | StreamletQueryExecution(q1, q2) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /flink-taxi-ride/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | import pipelines.sbt.CommonSettingsAndTasksPlugin._ 4 | 5 | import scalariform.formatter.preferences._ 6 | 7 | lazy val taxiRidePipeline = (project in file("./taxi-ride-pipeline")) 8 | .enablePlugins(PipelinesApplicationPlugin) 9 | .settings(commonSettings) 10 | .settings( 11 | name := "taxi-ride-fare" 12 | ) 13 | .dependsOn(ingestor, processor, ridelogger) 14 | 15 | lazy val datamodel = (project in file("./datamodel")) 16 | .enablePlugins(PipelinesLibraryPlugin) 17 | .settings( 18 | commonSettings, 19 | (sourceGenerators in Compile) += (avroScalaGenerateSpecific in Test).taskValue 20 | ) 21 | 22 | lazy val ingestor = (project in file("./ingestor")) 23 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin) 24 | .settings( 25 | commonSettings, 26 | libraryDependencies ++= Seq( 27 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10", 28 | "ch.qos.logback" % "logback-classic" % "1.2.3", 29 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 30 | ) 31 | ) 32 | .dependsOn(datamodel) 33 | 34 | 35 | lazy val processor = (project in file("./processor")) 36 | .enablePlugins(PipelinesFlinkLibraryPlugin) 37 | .settings( 38 | commonSettings, 39 | libraryDependencies ++= Seq( 40 | "ch.qos.logback" % "logback-classic" % "1.2.3", 41 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 42 | ) 43 | ) 44 | .settings( 45 | parallelExecution in Test := false 46 | ) 47 | .dependsOn(datamodel) 48 | 49 | lazy val ridelogger = (project in file("./logger")) 50 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin) 51 | .settings( 52 | commonSettings, 53 | libraryDependencies ++= Seq( 54 | "ch.qos.logback" % "logback-classic" % "1.2.3", 55 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 56 | ) 57 | ) 58 | .dependsOn(datamodel) 59 | 60 | 61 | lazy val commonSettings = Seq( 62 | scalaVersion := "2.12.8", 63 | scalacOptions ++= Seq( 64 | "-encoding", "UTF-8", 65 | "-target:jvm-1.8", 66 | "-Xlog-reflective-calls", 67 | "-Xlint", 68 | "-Ywarn-unused", 69 | "-Ywarn-unused-import", 70 | "-deprecation", 71 | "-feature", 72 | "-language:_", 73 | "-unchecked" 74 | ), 75 | 76 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 77 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 78 | 79 | scalariformPreferences := scalariformPreferences.value 80 | .setPreference(AlignParameters, false) 81 | .setPreference(AlignSingleLineCaseStatements, true) 82 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 83 | .setPreference(DoubleIndentConstructorArguments, true) 84 | .setPreference(DoubleIndentMethodDeclaration, true) 85 | .setPreference(RewriteArrowSymbols, true) 86 | .setPreference(DanglingCloseParenthesis, Preserve) 87 | .setPreference(NewlineAtEndOfFile, true) 88 | .setPreference(AllowParamGroupsOnNewlines, true) 89 | ) 90 | -------------------------------------------------------------------------------- /call-record-aggregator/build.sbt: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | import scalariform.formatter.preferences._ 4 | 5 | //tag::docs-PipelinesApplicationPlugin-example[] 6 | lazy val callRecordPipeline = (project in file("./call-record-pipeline")) 7 | .enablePlugins(PipelinesApplicationPlugin) 8 | .settings(commonSettings) 9 | .settings( 10 | name := "call-record-aggregator" 11 | ) 12 | .dependsOn(akkaCdrIngestor, akkaJavaAggregationOutput, sparkAggregation) 13 | //end::docs-PipelinesApplicationPlugin-example[] 14 | 15 | lazy val datamodel = (project in file("./datamodel")) 16 | .enablePlugins(PipelinesLibraryPlugin) 17 | 18 | lazy val akkaCdrIngestor= (project in file("./akka-cdr-ingestor")) 19 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin) 20 | .settings( 21 | commonSettings, 22 | libraryDependencies ++= Seq( 23 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10", 24 | "ch.qos.logback" % "logback-classic" % "1.2.3", 25 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 26 | ) 27 | ) 28 | .dependsOn(datamodel) 29 | 30 | lazy val akkaJavaAggregationOutput= (project in file("./akka-java-aggregation-output")) 31 | .enablePlugins(PipelinesAkkaStreamsLibraryPlugin) 32 | .settings( 33 | commonSettings, 34 | libraryDependencies ++= Seq( 35 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.10", 36 | "ch.qos.logback" % "logback-classic" % "1.2.3", 37 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 38 | ) 39 | ) 40 | .dependsOn(datamodel) 41 | 42 | lazy val sparkAggregation = (project in file("./spark-aggregation")) 43 | .enablePlugins(PipelinesSparkLibraryPlugin) 44 | .settings( 45 | commonSettings, 46 | Test / parallelExecution := false, 47 | Test / fork := true, 48 | libraryDependencies ++= Seq( 49 | "ch.qos.logback" % "logback-classic" % "1.2.3", 50 | "org.scalatest" %% "scalatest" % "3.0.8" % "test" 51 | ) 52 | ) 53 | .dependsOn(datamodel) 54 | 55 | 56 | lazy val commonSettings = Seq( 57 | scalaVersion := "2.12.10", 58 | scalacOptions ++= Seq( 59 | "-encoding", "UTF-8", 60 | "-target:jvm-1.8", 61 | "-Xlog-reflective-calls", 62 | "-Xlint", 63 | "-Ywarn-unused", 64 | "-Ywarn-unused-import", 65 | "-deprecation", 66 | "-feature", 67 | "-language:_", 68 | "-unchecked" 69 | ), 70 | 71 | scalacOptions in (Compile, console) --= Seq("-Ywarn-unused", "-Ywarn-unused-import"), 72 | scalacOptions in (Test, console) := (scalacOptions in (Compile, console)).value, 73 | 74 | scalariformPreferences := scalariformPreferences.value 75 | .setPreference(AlignParameters, false) 76 | .setPreference(AlignSingleLineCaseStatements, true) 77 | .setPreference(AlignSingleLineCaseStatements.MaxArrowIndent, 90) 78 | .setPreference(DoubleIndentConstructorArguments, true) 79 | .setPreference(DoubleIndentMethodDeclaration, true) 80 | .setPreference(RewriteArrowSymbols, true) 81 | .setPreference(DanglingCloseParenthesis, Preserve) 82 | .setPreference(NewlineAtEndOfFile, true) 83 | .setPreference(AllowParamGroupsOnNewlines, true) 84 | ) 85 | -------------------------------------------------------------------------------- /flink-taxi-ride/processor/src/main/scala/pipelines/examples/processor/TaxiRideProcessor.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples 2 | package processor 3 | 4 | import org.apache.flink.streaming.api.scala._ 5 | import org.apache.flink.streaming.api.functions.co._ 6 | import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor } 7 | import org.apache.flink.util.Collector 8 | 9 | import pipelines.streamlets.StreamletShape 10 | import pipelines.streamlets.avro._ 11 | import pipelines.flink.avro._ 12 | import pipelines.flink._ 13 | 14 | class TaxiRideProcessor extends FlinkStreamlet { 15 | 16 | // Step 1: Define inlets and outlets. Note for the outlet you need to specify 17 | // the partitioner function explicitly : here we are using the 18 | // rideId as the partitioner 19 | @transient val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride") 20 | @transient val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare") 21 | @transient val out = AvroOutlet[TaxiRideFare]("out", _.rideId.toString) 22 | 23 | // Step 2: Define the shape of the streamlet. In this example the streamlet 24 | // has 2 inlets and 1 outlet 25 | @transient val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out) 26 | 27 | // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines 28 | // the behavior of the streamlet 29 | override def createLogic() = new FlinkStreamletLogic { 30 | override def buildExecutionGraph = { 31 | val rides: DataStream[TaxiRide] = 32 | readStream(inTaxiRide) 33 | .filter { ride ⇒ ride.isStart.booleanValue } 34 | .keyBy("rideId") 35 | 36 | val fares: DataStream[TaxiFare] = 37 | readStream(inTaxiFare) 38 | .keyBy("rideId") 39 | 40 | val processed: DataStream[TaxiRideFare] = 41 | rides 42 | .connect(fares) 43 | .flatMap(new EnrichmentFunction) 44 | 45 | writeStream(out, processed) 46 | } 47 | } 48 | 49 | import org.apache.flink.configuration.Configuration 50 | class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] { 51 | 52 | @transient var rideState: ValueState[TaxiRide] = null 53 | @transient var fareState: ValueState[TaxiFare] = null 54 | 55 | override def open(params: Configuration): Unit = { 56 | super.open(params) 57 | rideState = getRuntimeContext.getState( 58 | new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide])) 59 | fareState = getRuntimeContext.getState( 60 | new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare])) 61 | } 62 | 63 | override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = { 64 | val fare = fareState.value 65 | if (fare != null) { 66 | fareState.clear() 67 | out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) 68 | } else { 69 | rideState.update(ride) 70 | } 71 | } 72 | 73 | override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = { 74 | val ride = rideState.value 75 | if (ride != null) { 76 | rideState.clear() 77 | out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) 78 | } else { 79 | fareState.update(fare) 80 | } 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /call-record-aggregator/spark-aggregation/src/main/scala/pipelines/examples/carly/aggregator/CallRecordGeneratorIngress.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples.carly.aggregator 2 | 3 | import java.sql.Timestamp 4 | 5 | import scala.util.Random 6 | import scala.concurrent.duration._ 7 | 8 | import org.apache.spark.sql.{ Dataset, SparkSession } 9 | import org.apache.spark.sql.streaming.OutputMode 10 | 11 | import org.apache.spark.sql.functions._ 12 | import org.apache.spark.sql.types.LongType 13 | 14 | import pipelines.streamlets._ 15 | import pipelines.streamlets.avro._ 16 | import pipelines.spark.sql.SQLImplicits._ 17 | import pipelines.examples.carly.data.CallRecord 18 | import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } 19 | import org.apache.log4j.{ Level, Logger } 20 | 21 | case class Rate(timestamp: Timestamp, value: Long) 22 | 23 | class CallRecordGeneratorIngress extends SparkStreamlet { 24 | 25 | val rootLogger = Logger.getRootLogger() 26 | rootLogger.setLevel(Level.ERROR) 27 | 28 | val RecordsPerSecond = IntegerConfigParameter( 29 | "records-per-second", 30 | "Records per second to process.", 31 | Some(50)) 32 | 33 | override def configParameters = Vector(RecordsPerSecond) 34 | 35 | val out = AvroOutlet[CallRecord]("out", _.user) 36 | val shape = StreamletShape(out) 37 | 38 | override def createLogic() = new SparkStreamletLogic { 39 | val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key) 40 | override def buildStreamingQueries = { 41 | val outStream = DataGenerator.mkData(super.session, recordsPerSecond) 42 | writeStream(outStream, out, OutputMode.Append).toQueryExecution 43 | } 44 | } 45 | } 46 | 47 | object DataGenerator { 48 | def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = { 49 | // do we need to expose this through configuration? 50 | 51 | val MaxTime = 2.hours.toMillis 52 | val MaxUsers = 100000 53 | val TS0 = new java.sql.Timestamp(0) 54 | val ZeroTimestampProb = 0.05 // error rate 55 | 56 | // Random Data Generator 57 | val usersUdf = udf(() ⇒ "user-" + Random.nextInt(MaxUsers)) 58 | val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing") 59 | 60 | // Time-biased randomized filter - 1/2 hour cycles 61 | val sinTime: Long ⇒ Double = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI) 62 | val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob 63 | val timeFilterUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng)) 64 | val zeroTimestampUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ { 65 | if (rng < ZeroTimestampProb) { 66 | TS0 67 | } else { 68 | ts 69 | } 70 | }) 71 | 72 | val rateStream = session.readStream 73 | .format("rate") 74 | .option("rowsPerSecond", recordsPerSecond) 75 | .load() 76 | .as[Rate] 77 | 78 | val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand()) 79 | val sampledData = randomDataset.where(timeFilterUdf($"timestamp", $"rng")) 80 | .withColumn("user", usersUdf()) 81 | .withColumn("other", usersUdf()) 82 | .withColumn("direction", directionUdf()) 83 | .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType)) 84 | .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng")) 85 | .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp" as "timestamp") 86 | .as[CallRecord] 87 | sampledData 88 | } 89 | } 90 | 91 | -------------------------------------------------------------------------------- /sensor-data-scala/README.md: -------------------------------------------------------------------------------- 1 | # `sensor-data-scala` 2 | 3 | A simple pipeline that processes events from a wind turbine farm. 4 | 5 | # Required configuration 6 | 7 | `valid-logger.log-level` 8 | 9 | Log level for `*-logger` streamlets to log to. Ex) `info` 10 | 11 | `valid-logger.msg-prefix` - Log line prefix for `*-logger` streamlets to include. Ex) `VALID` 12 | 13 | kubectl-pipelines deploy docker-registry-default.purplehat.lightbend.com/lightbend/sensor-data-scala:382-55e76fe-dirty valid-logger.log-level=info valid-logger.msg-prefix=VALID 14 | 15 | # Generating data 16 | 17 | This example has two ingresses that are combined using a merge operation. Data can be sent to either of the ingresses or to both. 18 | 19 | First deploy the app with `kubectl pipelines deploy [image]` 20 | 21 | To send data to the HTTP ingress, do the following: 22 | 23 | - Get `sensor-data` ingress HTTP endpoint with `kubectl pipelines status sensor-data-scala` 24 | 25 | In the example output below the HTTP endpoint would be `docker-registry-default.my.kubernetes.cluster/sensor-data-http-ingress`: 26 | 27 | ``` 28 | kubectl pipelines status sensor-data-scala 29 | Name: sensor-data-scala 30 | Namespace: sensor-data-scala 31 | Version: 445-fcd70ca 32 | Created: 2019-08-20 13:55:35 +0200 CEST 33 | Status: Running 34 | 35 | STREAMLET ENDPOINT 36 | http-ingress docker-registry-default.my.kubernetes.cluster/sensor-data-http-ingress 37 | 38 | STREAMLET POD STATUS RESTARTS READY 39 | invalid-logger sensor-data-scala-invalid-logger-854dd5b47b-rhg7p Running 0 True 40 | http-ingress sensor-data-scala-http-ingress-6b7c586d6-jtd9x Running 0 True 41 | rotor-avg-logger sensor-data-scala-rotor-avg-logger-86c44d896-4f4gb Running 0 True 42 | metrics sensor-data-scala-metrics-f6f749d48-n7qss Running 0 True 43 | file-ingress sensor-data-scala-file-ingress-7f5b966755-jtbnv Running 0 True 44 | validation sensor-data-scala-validation-6f4b59b678-dd4gg Running 0 True 45 | rotorizer sensor-data-scala-rotorizer-55956cb47b-l7kng Running 0 True 46 | merge sensor-data-scala-merge-548994576-k8k8h Running 0 True 47 | valid-logger sensor-data-scala-valid-logger-86449cb958-wztsq Running 0 True 48 | ``` 49 | 50 | - Pick a test data file from `./test-data`, for example `test-data/04-moderate-breeze.json` 51 | - Send the file to the HTTP ingress using `curl` using following command 52 | 53 | 54 | curl -i -X POST sensor-data-scala.apps.purplehat.lightbend.com/sensor-data -H "Content-Type: application/json" --data '@test-data/04-moderate-breeze.json' 55 | 56 | To send data to the file ingress, use the following shell script found in the project root directory: 57 | 58 | ./load-data-into-pvc.sh 59 | 60 | The shell script will load a number of files from the `test-data` directory and the ingress will continuously read those files and emit their content to the merge streamlet. 61 | 62 | ## Using [`wrk`](https://github.com/wg/wrk) benchmarking tool 63 | 64 | To send a continuous stream of data. 65 | 66 | ### Install 67 | 68 | * Ubuntu: `apt-get install wrk` 69 | * MacOS: `brew install wrk` 70 | 71 | ### Run 72 | 73 | Ex) 74 | 75 | ``` 76 | wrk -c 400 -t 400 -d 500 -s wrk-04-moderate-breeze.lua http://sensor-data-scala.apps.purplehat.lightbend.com/sensor-data 77 | ``` 78 | -------------------------------------------------------------------------------- /flink-taxi-ride/ingestor/src/main/scala/pipelines/examples/ingestor/JsonFormats.scala: -------------------------------------------------------------------------------- 1 | package pipelines.examples 2 | package ingestor 3 | 4 | import spray.json._ 5 | import pipelines.flink.avro._ 6 | 7 | object TaxiRideJsonProtocol extends DefaultJsonProtocol { 8 | implicit object TaxiRideJsonFormat extends RootJsonFormat[TaxiRide] { 9 | def write(t: TaxiRide) = JsObject( 10 | "rideId" -> JsNumber(t.rideId), 11 | "isStart" -> JsBoolean(t.isStart), 12 | "taxiId" -> JsNumber(t.taxiId), 13 | "passengerCnt" -> JsNumber(t.passengerCnt), 14 | "driverId" -> JsNumber(t.driverId), 15 | "startLon" -> JsNumber(t.startLon.doubleValue()), 16 | "startLat" -> JsNumber(t.startLat.doubleValue()), 17 | "endLon" -> JsNumber(t.endLon.doubleValue()), 18 | "endLat" -> JsNumber(t.endLat.doubleValue()), 19 | "startTime" -> JsNumber(t.startTime), 20 | "endTime" -> JsNumber(t.endTime) 21 | ) 22 | def read(value: JsValue) = { 23 | value.asJsObject.getFields( 24 | "rideId", 25 | "isStart", 26 | "taxiId", 27 | "passengerCnt", 28 | "driverId", 29 | "startLon", 30 | "startLat", 31 | "endLon", 32 | "endLat", 33 | "startTime", 34 | "endTime") match { 35 | case Seq(JsNumber(rideId), 36 | JsBoolean(isStart), 37 | JsNumber(taxiId), 38 | JsNumber(passengerCnt), 39 | JsNumber(driverId), 40 | JsNumber(startLon), 41 | JsNumber(startLat), 42 | JsNumber(endLon), 43 | JsNumber(endLat), 44 | JsNumber(startTime), 45 | JsNumber(endTime)) ⇒ 46 | new TaxiRide( 47 | rideId.longValue(), 48 | isStart, 49 | taxiId.longValue(), 50 | passengerCnt.intValue(), 51 | driverId.longValue(), 52 | startLon.floatValue(), 53 | startLat.floatValue(), 54 | endLon.floatValue(), 55 | endLat.floatValue(), 56 | startTime.longValue(), 57 | endTime.longValue()) 58 | case _ ⇒ throw new DeserializationException("TaxiRide expected") 59 | } 60 | } 61 | } 62 | } 63 | 64 | object TaxiFareJsonProtocol extends DefaultJsonProtocol { 65 | implicit object TaxiFareJsonFormat extends RootJsonFormat[TaxiFare] { 66 | def write(t: TaxiFare) = JsObject( 67 | "rideId" -> JsNumber(t.rideId), 68 | "taxiId" -> JsNumber(t.taxiId), 69 | "paymentType" -> JsString(t.paymentType), 70 | "driverId" -> JsNumber(t.driverId), 71 | "startTime" -> JsNumber(t.startTime), 72 | "tip" -> JsNumber(t.tip.floatValue()), 73 | "tolls" -> JsNumber(t.tolls.floatValue()), 74 | "totalFare" -> JsNumber(t.totalFare.floatValue()) 75 | ) 76 | def read(value: JsValue) = { 77 | value.asJsObject.getFields( 78 | "rideId", 79 | "taxiId", 80 | "paymentType", 81 | "driverId", 82 | "startTime", 83 | "tip", 84 | "tolls", 85 | "totalFare") match { 86 | case Seq(JsNumber(rideId), 87 | JsNumber(taxiId), 88 | JsString(paymentType), 89 | JsNumber(driverId), 90 | JsNumber(startTime), 91 | JsNumber(tip), 92 | JsNumber(tolls), 93 | JsNumber(totalFare)) ⇒ 94 | new TaxiFare( 95 | rideId.longValue(), 96 | taxiId.longValue(), 97 | paymentType, 98 | driverId.longValue(), 99 | startTime.longValue(), 100 | tip.floatValue(), 101 | tolls.floatValue(), 102 | totalFare.floatValue()) 103 | case _ ⇒ throw new DeserializationException("TaxiFare expected") 104 | } 105 | } 106 | } 107 | } 108 | 109 | -------------------------------------------------------------------------------- /warez/data/values/uuids.txt: -------------------------------------------------------------------------------- 1 | 5728b0c7-e561-4faf-a958-af69e415b91e 2 | bdadecc1-7cf5-431c-bba4-f5cb06903222 3 | 0349fbe5-3570-4868-906f-acd0243a36d9 4 | 928c7e09-6e2f-43e3-8537-ba92eebf6651 5 | 1367950c-1904-4bfa-ae61-5a5d79a8003f 6 | bf1b0266-6088-4369-9699-0e55e778c585 7 | 0da5586e-298c-4d03-9a64-1b87654ced88 8 | f692b634-e332-46fc-92f4-368e1c473923 9 | 914d0e4c-4897-4579-b67b-bc6d3fd91a15 10 | b681f516-70b6-4f3e-a71d-dd24426a9f42 11 | bcf44d72-8a4c-4f43-8e06-2fe607a0b0d2 12 | 6da26f79-4857-4727-8836-f359c8872ecd 13 | 244185ac-6515-48e8-89ad-ee4d3af2212b 14 | d39f2962-e913-4670-9ea9-0681e9b4eaca 15 | 280cfa0f-7540-4937-bb59-38dc24fef161 16 | 3ae8c7a0-003c-4fd8-95da-03767827b135 17 | 074c1b79-6574-4db2-95af-d1043650886e 18 | bd4db512-cf72-4cb9-84dd-69d5f19fc003 19 | 11efa973-9720-4f3c-a7f4-f29deb62da50 20 | 4cef7e0c-72ae-4afd-87d5-22a8db3e15e9 21 | d36ef2e5-c205-4b2e-b631-15d59cf081e0 22 | 507f2f01-dc2a-410a-bdc5-4ca532e8c202 23 | e9b32847-9adf-43e2-874b-127eaa90a8c6 24 | 838d0d0f-5213-4896-9e94-6733d99ffd94 25 | a4a8d1df-26af-4440-abb3-3b363716a619 26 | 6132864f-8cb1-4de2-a286-7670c9ffc72d 27 | bd3afa11-d499-40bd-9e8f-45676f5ce458 28 | 07136375-a251-4d2a-9364-887c01cc63cc 29 | bb61adc9-cdfe-4e6f-bfd0-7bb07fd3e4e7 30 | 5865f31c-7677-4ab1-9ab3-4872abeaeade 31 | f3b5e71f-64fc-4c1d-8f13-1ec0f2d2ccc0 32 | 613507a9-8ffd-4ed0-a0ba-3b7b9956ae7a 33 | 612a1236-abba-49a5-a638-5e2e064c84f1 34 | b8f0f760-5ee8-434c-8828-59eb61094d27 35 | c715864c-0557-4e23-bf59-0a266621896c 36 | 067078f8-3dea-467d-b9fa-2a8d0157b80f 37 | 3d543837-81fc-44c6-8c0d-ccae056392c4 38 | 7db392a7-d2bd-487b-aab8-9e27fc1d9597 39 | 74f3bf36-8aea-4e12-9299-a8f9e482df81 40 | 35eae45b-eb50-4ace-9a09-49cc55012bd2 41 | 3f875710-5512-44ad-9241-0133083b1e88 42 | 1d2bd727-4780-4499-8865-7a282da46d0d 43 | 7bf45b41-4b2c-428b-b07e-9d3579c1789b 44 | d331bd99-897b-480b-8fd3-9dd36a83ab1c 45 | 9816c9d8-bf07-4bae-bbd2-e685f5f96511 46 | 01b54c3b-913b-42cc-bde5-2e3efba4e083 47 | c2d5cbe0-f42e-425c-87cf-c902734912e0 48 | 95c9a275-627b-4e22-8717-848b5b58bc6f 49 | f34dec58-57c8-4251-8e77-c7ae1f79f457 50 | 8a9ad446-d62f-495b-a636-c0ad9fa9880b 51 | 7b2c233b-2ea7-4f34-a33f-3c25272c85c5 52 | 44af5782-f9e8-4c81-b69a-36f29a213b11 53 | 2a1389ad-0261-4d55-9384-9b5d36493474 54 | 6df1a590-e4db-4297-9d99-d602e3e87645 55 | 44822f06-b1ce-4d63-9daf-588f03622bc4 56 | dee251f4-1347-4dca-aac0-0c855d2d534c 57 | 13fec854-b3ef-4e9d-ae5a-e26b0c093b0f 58 | 413ba5e4-e826-44e6-a4b7-55ec35befe09 59 | 735ded00-7acc-442b-a4ee-b393aef4d007 60 | 99ed94f1-92cd-4f54-b2d0-4639509938f6 61 | 51caaa68-d224-404e-8909-a6082bcdb2e5 62 | 84006f1d-3011-40ab-b62a-f2cecaf4a9f5 63 | e2d37e6f-ae5d-41b2-abdf-15e00f081f11 64 | e21a2d68-d6ee-46fb-8d36-1da6b0c89ffb 65 | d9889c07-97cc-4488-920d-79efdd576385 66 | 8f0aa2e3-2014-49bc-8d14-b2ed27ef3fa4 67 | b78232dc-222b-4a21-ab49-c3ce69639f53 68 | ea80afa3-63e6-481b-9c63-9681945ec33b 69 | 72d59883-2b47-483a-a682-1901c7040012 70 | 608b1cec-f017-49a9-a4dc-5be44f03f398 71 | b56f3954-a80a-4056-a689-b25274cb0365 72 | 29e7fdb7-cdbb-45d9-b6f3-d3a6ffeee056 73 | 13efa3db-bdab-437b-8f3f-9e653ce06a77 74 | d3d2e381-0aa4-4848-b911-85b6d9a9505c 75 | 36f31a4b-c74d-4ea3-b0a0-f4454b2352d7 76 | 6e181d59-0804-4a6b-a3a7-14169ae66764 77 | 9de6e256-cf4c-490c-bd8a-1c91d38e7cb0 78 | e46864bb-5f44-4df2-b2a8-9afe57adee2e 79 | 93fcdb08-f6fc-46ae-bd20-effe8107f0d6 80 | 6802c9e8-d156-410b-8500-369396bbf5ef 81 | dc700a91-0b0b-45b4-a0a9-52c1cebe79e3 82 | 2f6fcb74-27c8-43ef-b4b8-d9428958f46b 83 | caf10de4-b9d6-4156-8d7d-e9c31981cf94 84 | 59b0c7aa-ef9d-4b36-85ff-0cfb4eaedaaa 85 | 98b00e25-d6d1-4fb1-b714-26486cebee40 86 | 57456ad3-a02a-4ccc-b1ed-35d3460e6e9f 87 | dd2c3500-b06a-4eae-9a1a-dcb12bc872e3 88 | 5fc0e05f-4cbb-43f8-8596-219308858598 89 | c0f8fbdf-48be-4b70-af72-76969fba0bbd 90 | 7b692c68-ffae-48d1-981a-12871db3474f 91 | f77cc77b-7595-4f88-b6a3-b56e135e0c06 92 | c24a9f79-d12e-44f3-a665-4d36bfaef08f 93 | 27bf19e7-8686-46bf-81aa-b3860196cb5c 94 | b8ad4125-7364-4f08-a61e-e863094caef4 95 | 4679ef35-f82d-4853-bcc1-a6da5e618b62 96 | 6db42963-5909-4e4a-a92a-3d009078e293 97 | 73608eb1-aa43-4cca-8cc5-9bba3278f4ff 98 | cada8794-b9e2-4b1d-b565-de7772d96b94 99 | 12aeb1b1-38bd-4a78-bc47-764d30a132cd 100 | e0e1443c-b276-4a13-aa4a-32998db55db0 101 | -------------------------------------------------------------------------------- /warez/spark-streamlets/src/main/scala/pipelines/example/warez/SparkProductJoiner.scala: -------------------------------------------------------------------------------- 1 | package pipelines.example.warez 2 | 3 | import scala.collection.immutable.Seq 4 | import org.apache.spark.sql.Dataset 5 | import org.apache.spark.sql.streaming.{ GroupState, GroupStateTimeout, OutputMode } 6 | import pipelines.streamlets.StreamletShape 7 | import pipelines.streamlets.avro._ 8 | import pipelines.spark.sql.SQLImplicits._ 9 | import pipelines.spark.{ SparkStreamletLogic, SparkStreamlet } 10 | import warez.{ PriceUpdate, Sku, StockUpdate } 11 | import SparkProductJoiner._ 12 | 13 | class SparkProductJoiner extends SparkStreamlet { 14 | 15 | val in0 = AvroInlet[warez.Product]("in-0") 16 | val in1 = AvroInlet[warez.StockUpdate]("in-1") 17 | val in2 = AvroInlet[warez.PriceUpdate]("in-2") 18 | val out = AvroOutlet[warez.Product]("out", _.id.toString) 19 | 20 | val shape = StreamletShape(out).withInlets(in0, in1, in2) 21 | 22 | override def createLogic = new SparkStreamletLogic { 23 | 24 | override def buildStreamingQueries = { 25 | val products = readStream(in0) 26 | val stocks = readStream(in1) 27 | val prices = readStream(in2) 28 | val outStream = process(products, stocks, prices) 29 | val query = writeStream(outStream, out, OutputMode.Append) 30 | query.toQueryExecution 31 | } 32 | private def process(products: Dataset[warez.Product], stocks: Dataset[warez.StockUpdate], prices: Dataset[warez.PriceUpdate]): Dataset[warez.Product] = { 33 | val stocksAsProducts = stocks.map(stockUpdate2Product) 34 | val pricesAsProducts = prices.map(priceUpdate2Products) 35 | val withStocks = products 36 | .union(stocksAsProducts) 37 | .union(pricesAsProducts) 38 | .groupByKey(p ⇒ p.id) 39 | .flatMapGroupsWithState(OutputMode.Append(), GroupStateTimeout.NoTimeout)(stateFunc) 40 | withStocks 41 | } 42 | } 43 | } 44 | 45 | object SparkProductJoiner { 46 | private[warez] def stockUpdate2Product(s: StockUpdate): warez.Product = { 47 | warez.Product(s.productId, "", "", Seq.empty[String], Seq(Sku(s.skuId, "", stock = Option(s.diff), price = None))) 48 | } 49 | 50 | private[warez] def priceUpdate2Products(p: PriceUpdate): warez.Product = { 51 | warez.Product(p.productId, "", "", Seq.empty[String], Seq(Sku(p.skuId, "", stock = None, price = Option(p.price)))) 52 | } 53 | 54 | type ProductId = String 55 | 56 | private[warez] def emptyProduct: warez.Product = new warez.Product 57 | 58 | private[warez] def calcStockDiff(a: Option[Int], b: Option[Int]): Option[Int] = (a, b) match { 59 | case (Some(i), Some(j)) ⇒ Some(i + j) 60 | case (Some(i), None) ⇒ Some(i) 61 | case (None, Some(j)) ⇒ Some(j) 62 | case _ ⇒ None 63 | } 64 | 65 | private[warez] def mergeSkus(a: Sku, b: Sku): Sku = { 66 | val name = if (a.name.length > b.name.length) a.name else b.name 67 | val stock = calcStockDiff(a.stock, b.stock) 68 | Sku(a.id, name, stock, b.price) 69 | } 70 | 71 | private[warez] def mergeProducts(acc: warez.Product, skuId: String, newSku: Sku) = { 72 | val skuIndex = acc.skus.indexWhere(_.id == skuId) 73 | if (skuIndex < 0) { 74 | acc.copy(skus = acc.skus :+ newSku) 75 | } else { 76 | val sku = acc.skus(skuIndex) 77 | acc.copy(skus = acc.skus.updated(skuIndex, mergeSkus(sku, newSku))) 78 | } 79 | } 80 | 81 | private[warez] def updateProduct(currentProduct: warez.Product, prods: Iterator[warez.Product]): warez.Product = { 82 | val empty = emptyProduct 83 | prods.foldLeft(currentProduct) { (acc, p) ⇒ 84 | p match { 85 | // Is StockUpdate 86 | case warez.Product(_, "", "", _, Seq(Sku(skuId, "", Some(diff), None))) ⇒ { 87 | acc match { 88 | case warez.Product("", "", "", Seq(), Seq()) ⇒ empty 89 | case _ ⇒ 90 | val newSku = Sku(skuId, "", Some(diff), None) 91 | mergeProducts(acc, skuId, newSku) 92 | } 93 | } 94 | // Is PriceUpdate 95 | case warez.Product(_, "", "", _, Seq(Sku(skuId, "", None, Some(price)))) ⇒ { 96 | acc match { 97 | case warez.Product("", "", "", Seq(), Seq()) ⇒ empty 98 | case _ ⇒ 99 | val newSku = Sku(skuId, "", None, Some(price)) 100 | mergeProducts(acc, skuId, newSku) 101 | } 102 | } 103 | // Is Product 104 | case newProd ⇒ acc.copy(id = newProd.id, name = newProd.name, description = newProd.description, keywords = newProd.keywords, skus = newProd.skus) 105 | } 106 | } 107 | } 108 | 109 | private[warez] def invalid(p: warez.Product): Boolean = p.description.isEmpty && p.name.isEmpty && p.keywords.isEmpty 110 | 111 | val stateFunc: (ProductId, Iterator[warez.Product], GroupState[warez.Product]) ⇒ Iterator[warez.Product] = (_, prods, state) ⇒ { 112 | val out = updateProduct(state.getOption.getOrElse(emptyProduct), prods) 113 | (if (invalid(out)) { 114 | // return nothing 115 | None 116 | } else { 117 | // update state only when output is valid 118 | state.update(out) 119 | Some(out) 120 | }).toIterator 121 | } 122 | } 123 | --------------------------------------------------------------------------------