├── .github └── workflows │ └── gradle.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── beam-cpp-example ├── Dockerfile ├── LICENSE ├── README.txt ├── collatz.pyx ├── pipeline.py ├── requirements.txt └── setup.py ├── retail ├── monitoring │ ├── .gitignore │ └── dashboards │ │ ├── README.MD │ │ ├── main.tf │ │ └── retail-pipeline.json ├── retail-clickstream-application │ ├── README.md │ ├── bigquery │ │ ├── README.md │ │ └── ecommerce_events.json │ ├── cloud-run-proxy.png │ ├── cloud-run-ps-proxy.png │ ├── cloud-run-pubsub-proxy │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── app.js │ │ ├── index.js │ │ └── package.json │ ├── datalayer │ │ ├── add_to_cart.json │ │ ├── purchase.json │ │ └── view_item.json │ ├── ecommerce_events_bq_schema.json │ ├── main.tf │ ├── terraform.tfvars.example │ └── variables.tf └── retail-java-applications │ ├── README.MD │ ├── build.gradle │ ├── data-engineering-dept │ ├── README.MD │ ├── build.gradle │ ├── business-logic │ │ ├── README.MD │ │ ├── build.gradle │ │ └── src │ │ │ ├── main │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── google │ │ │ │ └── dataflow │ │ │ │ └── sample │ │ │ │ └── retail │ │ │ │ └── businesslogic │ │ │ │ ├── core │ │ │ │ ├── DeploymentAnnotations.java │ │ │ │ ├── options │ │ │ │ │ ├── RetailPipelineAggregationOptions.java │ │ │ │ │ ├── RetailPipelineClickStreamOptions.java │ │ │ │ │ ├── RetailPipelineInventoryOptions.java │ │ │ │ │ ├── RetailPipelineOptions.java │ │ │ │ │ ├── RetailPipelineReportingOptions.java │ │ │ │ │ ├── RetailPipelineStoresOptions.java │ │ │ │ │ └── RetailPipelineTransactionsOptions.java │ │ │ │ ├── transforms │ │ │ │ │ ├── CreateStockAggregatorMetadata.java │ │ │ │ │ ├── DeadLetterSink.java │ │ │ │ │ ├── ErrorMsg.java │ │ │ │ │ ├── clickstream │ │ │ │ │ │ ├── ClickStreamSessions.java │ │ │ │ │ │ ├── ClickstreamProcessing.java │ │ │ │ │ │ ├── CountViewsPerProduct.java │ │ │ │ │ │ ├── ValidateAndCorrectCSEvt.java │ │ │ │ │ │ ├── WriteAggregatesToBigTable.java │ │ │ │ │ │ ├── WriteAggregationToBigQuery.java │ │ │ │ │ │ └── validation │ │ │ │ │ │ │ ├── EventDateTimeCorrectionService.java │ │ │ │ │ │ │ ├── EventItemCorrectionService.java │ │ │ │ │ │ │ ├── ValidateEventDateTime.java │ │ │ │ │ │ │ ├── ValidateEventItems.java │ │ │ │ │ │ │ └── ValidationUtils.java │ │ │ │ │ ├── stock │ │ │ │ │ │ ├── CountGlobalStockUpdatePerProduct.java │ │ │ │ │ │ ├── CountIncomingStockPerProductLocation.java │ │ │ │ │ │ └── StockProcessing.java │ │ │ │ │ └── transaction │ │ │ │ │ │ ├── CountGlobalStockFromTransaction.java │ │ │ │ │ │ ├── EnrichTransactionWithStoreLocation.java │ │ │ │ │ │ ├── TransactionPerProductAndLocation.java │ │ │ │ │ │ └── TransactionProcessing.java │ │ │ │ └── utils │ │ │ │ │ ├── BigQueryUtil.java │ │ │ │ │ ├── JSONUtils.java │ │ │ │ │ ├── Print.java │ │ │ │ │ ├── ReadPubSubMsgPayLoadAsString.java │ │ │ │ │ └── WriteRawJSONMessagesToBigQuery.java │ │ │ │ └── externalservices │ │ │ │ ├── RetailCompanyServices.java │ │ │ │ └── SlowMovingStoreLocationDimension.java │ │ │ └── test │ │ │ └── java │ │ │ └── com │ │ │ └── google │ │ │ └── dataflow │ │ │ └── sample │ │ │ └── retail │ │ │ └── businesslogic │ │ │ └── core │ │ │ └── utils │ │ │ └── test │ │ │ ├── CreateStockAggregatorMetadataTest.java │ │ │ ├── JSONUtilsTest.java │ │ │ ├── avrotestobjects │ │ │ ├── InventoryAVRO.java │ │ │ └── TransactionsAVRO.java │ │ │ ├── clickstream │ │ │ ├── BackFillSessionDataTest.java │ │ │ ├── ClickStreamSessionTestUtil.java │ │ │ ├── CountViewsPerProductTest.java │ │ │ ├── CreateClickStreamSessionsTest.java │ │ │ ├── ValidateAndCorrectClickStreamEventsTests.java │ │ │ └── WriterawJSONMessagesToBigQureyTest.java │ │ │ ├── stock │ │ │ ├── CountGlobalStockUpdatePerProductTest.java │ │ │ └── CountIncomingStockPerProductLocationTest.java │ │ │ └── transaction │ │ │ ├── TransactionPerProductAndLocationTest.java │ │ │ └── TransactionPerProductTest.java │ ├── data-objects │ │ ├── README.MD │ │ ├── build.gradle │ │ └── src │ │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── google │ │ │ └── dataflow │ │ │ └── sample │ │ │ └── retail │ │ │ └── dataobjects │ │ │ ├── ClickStream.java │ │ │ ├── Dimensions.java │ │ │ ├── Ecommerce.java │ │ │ ├── Item.java │ │ │ ├── Purchase.java │ │ │ ├── Stock.java │ │ │ ├── StockAggregation.java │ │ │ └── Transaction.java │ └── pipelines │ │ ├── README.MD │ │ ├── build.gradle │ │ └── src │ │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── google │ │ │ └── dataflow │ │ │ └── sample │ │ │ └── retail │ │ │ └── pipeline │ │ │ └── RetailDataProcessingPipeline.java │ │ └── test │ │ └── java │ │ └── com │ │ └── google │ │ └── dataflow │ │ └── sample │ │ └── retail │ │ └── pipeline │ │ └── test │ │ ├── RetailDataProcessingPipelineSimpleSmokeTest.java │ │ └── TestStreamGenerator.java │ ├── gradle │ └── wrapper │ │ ├── gradle-wrapper.jar │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── settings.gradle │ ├── spotless │ └── Apache_Licence_For_Spotless │ └── terraform │ ├── README.MD │ ├── examples │ └── pubsub │ │ ├── main.tf │ │ ├── terraform.tfvars │ │ └── variables.tf │ ├── main.tf │ ├── modules │ └── pubsub │ │ ├── main.tf │ │ └── variables.tf │ ├── terraform.tfvars │ ├── test │ ├── README.md │ └── pubsub │ │ ├── go.mod │ │ ├── go.sum │ │ └── pubsub_test.go │ └── variables.tf └── timeseries-streaming ├── README.MD ├── docs └── Draft_Dataflow_streaming_time_series_sample_library_user_guide_v0.4.0.pdf ├── images └── timeseries_metrics.png ├── timeseries-java-applications ├── Adapters │ ├── build.gradle │ └── src │ │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── google │ │ │ └── dataflow │ │ │ └── sample │ │ │ └── timeseriesflow │ │ │ └── adaptors │ │ │ ├── domain │ │ │ ├── BlendedIndex.java │ │ │ └── MergeSparseStreamsToSingleDenseStream.java │ │ │ └── fsi │ │ │ └── data │ │ │ └── cme │ │ │ ├── CMEAdapter.java │ │ │ ├── CMEMarketDataReader.java │ │ │ ├── ConvertCMETOBRowToTSDataPoints.java │ │ │ ├── ConvertCMETRDRowToTSDataPoints.java │ │ │ ├── Data.java │ │ │ ├── DeadLetterSink.java │ │ │ ├── Extract.java │ │ │ ├── GenerateErrorMessageEvents.java │ │ │ ├── LogElements.java │ │ │ ├── README.MD │ │ │ ├── TopOfBook.java │ │ │ ├── TradeInfo.java │ │ │ └── Util.java │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── google │ │ │ └── dataflow │ │ │ └── sample │ │ │ └── timeseriesflow │ │ │ └── adaptors │ │ │ ├── domain │ │ │ └── tests │ │ │ │ └── BlendedIndexTest.java │ │ │ └── fsi │ │ │ └── data │ │ │ └── cme │ │ │ └── CMEAdapterTest.java │ │ └── resources │ │ ├── TSTestBlendedIndexNoGaps.json │ │ ├── TSTestBlendedIndexWithGaps.json │ │ └── TSTestData.json ├── Apache_Licence_For_Spotless ├── Examples │ ├── build.gradle │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── google │ │ │ └── dataflow │ │ │ └── sample │ │ │ └── timeseriesflow │ │ │ └── examples │ │ │ └── fsi │ │ │ └── forex │ │ │ ├── AllComputationsExamplePipeline.java │ │ │ ├── ExampleForexPipelineOptions.java │ │ │ ├── ForexBatchPipeline.java │ │ │ ├── ForexCSVAdaptor.java │ │ │ └── HistoryForexReader.java │ │ └── resources │ │ └── [EMPTY]-EURUSD-2020-05-11_2020-05-11.csv ├── README.MD ├── SyntheticExamples │ ├── build.gradle │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── google │ │ └── dataflow │ │ └── sample │ │ └── timeseriesflow │ │ └── examples │ │ └── simpledata │ │ └── transforms │ │ ├── Print.java │ │ ├── SimpleDataBootstrapGenerator.java │ │ ├── SinWaveExample.java │ │ └── SinWaveExampleOptions.java ├── TimeSeriesMetricsLibrary │ ├── README.MD │ ├── build.gradle │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── google │ │ │ │ └── dataflow │ │ │ │ └── sample │ │ │ │ └── timeseriesflow │ │ │ │ └── metrics │ │ │ │ ├── README.MD │ │ │ │ ├── core │ │ │ │ ├── TSMetricsOptions.java │ │ │ │ ├── complex │ │ │ │ │ ├── fsi │ │ │ │ │ │ ├── rsi │ │ │ │ │ │ │ └── RSIGFn.java │ │ │ │ │ │ └── vwap │ │ │ │ │ │ │ ├── VWAPGFn.java │ │ │ │ │ │ │ └── VWAPTypeOneComp.java │ │ │ │ │ └── rule │ │ │ │ │ │ └── ValueInBoundsGFn.java │ │ │ │ ├── typeone │ │ │ │ │ ├── Max.java │ │ │ │ │ ├── Min.java │ │ │ │ │ └── Sum.java │ │ │ │ └── typetwo │ │ │ │ │ └── basic │ │ │ │ │ ├── bb │ │ │ │ │ └── BBFn.java │ │ │ │ │ ├── logrtn │ │ │ │ │ └── LogRtnFn.java │ │ │ │ │ ├── ma │ │ │ │ │ └── MAFn.java │ │ │ │ │ ├── stddev │ │ │ │ │ └── StdDevFn.java │ │ │ │ │ └── sumupdown │ │ │ │ │ └── SumUpDownFn.java │ │ │ │ └── utils │ │ │ │ ├── AllMetricsWithDefaults.java │ │ │ │ └── StatisticalFormulas.java │ │ └── proto │ │ │ └── TSFSITechKeys.proto │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── google │ │ │ └── dataflow │ │ │ └── sample │ │ │ └── timeseriesflow │ │ │ └── metrics │ │ │ ├── BBFnTest.java │ │ │ ├── IntegrationMetricTest.java │ │ │ ├── StatisticalFormulasTests.java │ │ │ ├── TSMetricsIntegrationTest.java │ │ │ ├── TSMetricsTests.java │ │ │ ├── VWAPGraphFnTest.java │ │ │ └── ValueWithinBoundsGraphFnTest.java │ │ └── resources │ │ ├── LogRtnTSTestDataHints.json │ │ ├── TSAccumVWAPTest.json │ │ ├── TSTestDataHints.json │ │ ├── VWAPTestGap.json │ │ └── VWAPWithinBoundTest.json ├── TimeSeriesPipeline │ ├── build.gradle │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── google │ │ │ │ └── dataflow │ │ │ │ └── sample │ │ │ │ └── timeseriesflow │ │ │ │ ├── combiners │ │ │ │ ├── BTypeOne.java │ │ │ │ ├── TSCombiner.java │ │ │ │ └── typeone │ │ │ │ │ ├── TSBaseCombiner.java │ │ │ │ │ ├── TSCategoricalCombiner.java │ │ │ │ │ └── TSNumericCombiner.java │ │ │ │ ├── common │ │ │ │ ├── CommonUtils.java │ │ │ │ ├── TSDataUtils.java │ │ │ │ └── TupleTypes.java │ │ │ │ ├── datamap │ │ │ │ ├── AccumBuilder.java │ │ │ │ ├── AccumCategoricalBuilder.java │ │ │ │ ├── AccumCoreMetadataBuilder.java │ │ │ │ └── AccumCoreNumericBuilder.java │ │ │ │ ├── graph │ │ │ │ ├── GenerateComputations.java │ │ │ │ ├── GraphType1Comp.java │ │ │ │ ├── GraphType2Comp.java │ │ │ │ └── TSFlowGraph.java │ │ │ │ ├── io │ │ │ │ └── tfexample │ │ │ │ │ ├── CreateTFRecordMetadata.java │ │ │ │ │ ├── FeaturesFromIterableAccumSequence.java │ │ │ │ │ ├── OutPutTFExampleToFile.java │ │ │ │ │ ├── OutPutTFExampleToPubSub.java │ │ │ │ │ ├── TSAccumIterableToTFExample.java │ │ │ │ │ └── TSToTFExampleUtils.java │ │ │ │ ├── metrics │ │ │ │ ├── BTypeTwo.java │ │ │ │ ├── BTypeTwoFn.java │ │ │ │ ├── CTypeTwo.java │ │ │ │ ├── core │ │ │ │ │ ├── typeone │ │ │ │ │ │ ├── TestMax.java │ │ │ │ │ │ ├── TestMin.java │ │ │ │ │ │ └── TestSum.java │ │ │ │ │ └── typetwo │ │ │ │ │ │ ├── Test1Fn.java │ │ │ │ │ │ ├── Test2Fn.java │ │ │ │ │ │ ├── TestComplex1GFn.java │ │ │ │ │ │ └── TestComplex2GFn.java │ │ │ │ └── utils │ │ │ │ │ ├── CreateCompositeTSAccum.java │ │ │ │ │ └── TSKeyRemapping.java │ │ │ │ ├── options │ │ │ │ ├── GapFillOptions.java │ │ │ │ ├── GenerateComputationsOptions.java │ │ │ │ ├── TFXOptions.java │ │ │ │ ├── TSFlowOptions.java │ │ │ │ └── TSOutputPipelineOptions.java │ │ │ │ ├── transforms │ │ │ │ ├── AddWindowBoundaryToTSAccum.java │ │ │ │ ├── ConvertAccumToSequence.java │ │ │ │ ├── MajorKeyWindowSnapshot.java │ │ │ │ ├── MergeAllTypeCompsInSameKeyWindow.java │ │ │ │ ├── MergeTSAccum.java │ │ │ │ ├── MinorKeyWindowSnapshot.java │ │ │ │ ├── ParseTSDataPointFromBytes.java │ │ │ │ ├── PerfectRectangles.java │ │ │ │ ├── TSAccumSequenceToRow.java │ │ │ │ ├── TSAccumToJson.java │ │ │ │ ├── TSAccumToRow.java │ │ │ │ ├── TSDataPointToRow.java │ │ │ │ └── TypeTwoComputation.java │ │ │ │ └── verifier │ │ │ │ └── TSDataPointVerifier.java │ │ └── proto │ │ │ ├── TFExampleKeys.proto │ │ │ ├── TS.proto │ │ │ └── TSBaseKeys.proto │ │ └── test │ │ ├── java │ │ ├── com │ │ │ └── google │ │ │ │ └── dataflow │ │ │ │ └── sample │ │ │ │ └── timeseriesflow │ │ │ │ └── test │ │ │ │ ├── CommonUtilsTest.java │ │ │ │ ├── GenerateComputationTest.java │ │ │ │ ├── GenerateComputationTestWithAccumToSeq.java │ │ │ │ ├── GenerateTFExampleFromTSSequenceTest.java │ │ │ │ ├── MergeAllTypeCompsInSameKeyWindowTest.java │ │ │ │ ├── PerfectRectangleUtils.java │ │ │ │ ├── PerfectRectanglesScalability_5Days_5Keys_EvenGaps.java │ │ │ │ ├── PerfectRectanglesScalability_86400S_AllGaps.java │ │ │ │ ├── PerfectRectanglesScalability_86400S_EvenGaps.java │ │ │ │ ├── PerfectRectanglesScalability_86400S_NoGaps.java │ │ │ │ ├── PerfectRectanglesTests.java │ │ │ │ ├── Print.java │ │ │ │ ├── ScaleTestingOptions.java │ │ │ │ ├── SnapShotScalabilityUtilsTest.java │ │ │ │ ├── SnapShotScalability_1D_100M_100K_1FW_60SW_withTFExampleSerlization.java │ │ │ │ ├── SnapShotScalability_1D_100M_1K_1FW_60SW.java │ │ │ │ ├── SnapShotScalability_1D_100M_1K_1FW_60SW_withTFExampleSerlization.java │ │ │ │ ├── SnapShotUtils.java │ │ │ │ ├── SnapshotTests.java │ │ │ │ ├── TSAccumIterableToTFExampleTest.java │ │ │ │ ├── TSAccumSequenceToRowTest.java │ │ │ │ ├── TSAccumToRowTest.java │ │ │ │ ├── TSDataPointToRowTest.java │ │ │ │ ├── TSDataTestUtils.java │ │ │ │ ├── TSKeyRemappingTest.java │ │ │ │ ├── TSNumericCombinerTest.java │ │ │ │ └── TestUtils.java │ │ └── common │ │ │ ├── TSTestData.java │ │ │ └── TSTestDataBaseline.java │ │ ├── proto │ │ └── TSTest.proto │ │ └── resources │ │ ├── CreateCompositeTSAccumTest.json │ │ └── TSAccumVWAPTest.json ├── build.gradle ├── gradle │ └── wrapper │ │ ├── gradle-wrapper.jar │ │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat └── settings.gradle └── timeseries-python-applications ├── README.MD ├── __init__.py ├── ml_pipeline ├── __init__.py └── timeseries │ ├── __init__.py │ ├── encoder_decoder │ ├── __init__.py │ ├── encoder_decoder_model.py │ ├── encoder_decoder_preprocessing.py │ ├── encoder_decoder_run_fn.py │ └── transforms │ │ ├── __init__.py │ │ └── process_encdec_inf_rtn.py │ ├── pipeline_templates │ ├── __init__.py │ └── timeseries_pipeline.py │ └── utils │ ├── __init__.py │ ├── timeseries_transform_utils.py │ └── timeseries_transform_utils_test.py ├── ml_pipeline_examples ├── __init__.py └── sin_wave_example │ ├── __init__.py │ ├── config.py │ ├── inference │ ├── __init__.py │ ├── batch_inference.py │ └── stream_inference.py │ ├── saved_model_example │ └── serving_model_dir │ │ ├── saved_model.pb │ │ └── variables │ │ ├── variables.data-00000-of-00001 │ │ └── variables.index │ ├── tf_transform_graph_dir │ ├── metadata │ │ └── schema.pbtxt │ ├── transform_fn │ │ └── saved_model.pb │ └── transformed_metadata │ │ └── schema.pbtxt │ └── training │ ├── __init__.py │ ├── kfp │ └── kfp_timeseries_local_sin_wave.py │ └── timeseries_local_sin_wave.py ├── notebooks ├── Comparing_metrics_with_Pandas.ipynb └── img │ ├── FILLINGBQ.png │ ├── FILLINGPANDAS.png │ ├── MA60.png │ └── STDDEV.png ├── setup.cfg └── setup.py /.github/workflows/gradle.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Gradle 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-gradle 3 | 4 | name: Java CI with Gradle 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | 14 | timeseries: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | defaults: 19 | run: 20 | working-directory: timeseries-streaming/timeseries-java-applications/ 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Set up JDK 1.8 25 | uses: actions/setup-java@v1 26 | with: 27 | java-version: 1.8 28 | - name: Grant execute permission for gradlew 29 | run: chmod +x gradlew 30 | - name: Build with Gradle 31 | run: ./gradlew build 32 | 33 | retail: 34 | 35 | runs-on: ubuntu-latest 36 | 37 | defaults: 38 | run: 39 | working-directory: retail/retail-java-applications/ 40 | 41 | steps: 42 | - uses: actions/checkout@v2 43 | - name: Set up JDK 1.8 44 | uses: actions/setup-java@v1 45 | with: 46 | java-version: 1.8 47 | - name: Grant execute permission for gradlew 48 | run: chmod +x gradlew 49 | - name: Build with Gradle 50 | run: ./gradlew build --full-stacktrace 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # NOTE: if you modify this file, you probably need to modify the file set that 2 | # is an input to 'maven-assembly-plugin' that generates source distribution. 3 | # This is typically in files named 'src.xml' throughout this repository. 4 | 5 | # Ignore any offline repositories the user may have created. 6 | **/offline-repository/**/* 7 | 8 | # Ignore files generated by the Gradle build process. 9 | **/.gradle/**/* 10 | **/.gogradle/**/* 11 | **/.nb-gradle/**/* 12 | **/gogradle.lock 13 | **/build/**/* 14 | .test-infra/**/vendor/**/* 15 | sdks/**/vendor/**/* 16 | runners/**/vendor/**/* 17 | **/.gradletasknamecache 18 | 19 | # Ignore files generated by the Maven build process. 20 | **/bin/**/* 21 | **/dependency-reduced-pom.xml 22 | **/target/**/* 23 | 24 | # Ignore generated archetypes 25 | sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/ 26 | sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/ 27 | 28 | # Ignore files generated by the Python build process. 29 | **/*.pyc 30 | **/*.pyo 31 | **/*.pyd 32 | **/*.egg-info/ 33 | **/.eggs/ 34 | **/nose-*.egg/ 35 | **/.tox/**/* 36 | **/dist/**/* 37 | **/distribute-*/**/* 38 | **/env/**/* 39 | **/.mypy_cache 40 | **/.dmypy.json 41 | sdks/python/**/*.c 42 | sdks/python/**/*.so 43 | sdks/python/**/*.egg 44 | sdks/python/LICENSE 45 | sdks/python/NOTICE 46 | sdks/python/README.md 47 | sdks/python/apache_beam/portability/api/*pb2*.* 48 | sdks/python/apache_beam/portability/api/*.yaml 49 | sdks/python/nosetests*.xml 50 | sdks/python/pytest*.xml 51 | sdks/python/postcommit_requirements.txt 52 | 53 | # Ignore IntelliJ files. 54 | **/.idea/**/* 55 | **/*.iml 56 | **/*.ipr 57 | **/*.iws 58 | **/out/**/* 59 | 60 | # Ignore Eclipse files. 61 | **/.classpath 62 | **/.project 63 | **/.factorypath 64 | **/.checkstyle 65 | **/.fbExcludeFilterFile 66 | **/.apt_generated/**/* 67 | **/.settings/**/* 68 | **/.gitignore 69 | 70 | # Ignore Visual Studio Code files. 71 | **/.vscode/**/* 72 | 73 | # Hotspot VM leaves this log in a non-target directory when java crashes 74 | **/hs_err_pid*.log 75 | 76 | # Ignore files that end with '~', since they are most likely auto-save files 77 | # produced by a text editor. 78 | **/*~ 79 | 80 | # Ignore MacOSX files. 81 | **/.DS_Store/**/* 82 | **/.DS_Store 83 | 84 | # Ignore Jupyter notebook checkpoints. 85 | **/.ipynb_checkpoints/**/* 86 | 87 | # JetBrains Education files 88 | !**/study_project.xml 89 | **/.coursecreator/**/* 90 | 91 | .pytest_cache 92 | .pytest_cache/**/* 93 | 94 | # Terraform intermediate artifacts 95 | .terraform/ 96 | .terraform.lock.hcl 97 | *.tfstate 98 | *.tfstate.* 99 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement (CLA). You (or your employer) retain the copyright to your 10 | contribution; this simply gives us permission to use and redistribute your 11 | contributions as part of the project. Head over to 12 | to see your current agreements on file or 13 | to sign a new one. 14 | 15 | You generally only need to submit a CLA once, so if you've already submitted one 16 | (even if it was for a different project), you probably don't need to do it 17 | again. 18 | 19 | ## Code reviews 20 | 21 | All submissions, including submissions by project members, require review. We 22 | use GitHub pull requests for this purpose. Consult 23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 24 | information on using pull requests. 25 | 26 | ## Community Guidelines 27 | 28 | This project follows 29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Dataflow sample applications are intended to provide blueprints to building pipelines. 4 | 5 | # Disclaimer 6 | 7 | These samples are for illustration of techniques that can be used with Dataflow. 8 | The samples are not a supported Google product. We welcome feedback, bug reports and code contributions, but cannot guarantee they will be 9 | addressed. 10 | 11 | ### Timeseries Sample 12 | 13 | This repository provides a set of time-series transforms that simplify developing Apache Beam pipelines for processing streaming time-series data. 14 | 15 | [timeseries-streaming](timeseries-streaming) 16 | 17 | [README](timeseries-streaming/README.MD) 18 | 19 | 20 | ![Timeseries Metrics Image](timeseries-streaming/images/timeseries_metrics.png?raw=true "Timeseries Metrics") 21 | 22 | 23 | ### Retail Application Sample 24 | 25 | The e-commerce sample application illustrates common use cases and best practices for implementing streaming data analytics and real-time AI. Use it to learn how to dynamically respond to customer actions by analyzing and responding to events in real time, and also how to store, analyze and visualize that event data for longer-term insights. 26 | 27 | [retail-java-application](retail/retail-java-applications/README.MD) 28 | 29 | [README](retail/retail-java-applications/README.MD) 30 | 31 | -------------------------------------------------------------------------------- /beam-cpp-example/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/beam_python3.9_sdk:2.46.0 2 | 3 | # Install a C++ library. 4 | RUN apt-get update 5 | RUN apt-get install -y libgmp3-dev 6 | 7 | # Install Python dependencies. 8 | COPY requirements.txt requirements.txt 9 | RUN pip install -r requirements.txt 10 | 11 | # Install the code and some python bindings. 12 | COPY pipeline.py pipeline.py 13 | COPY collatz.pyx collatz.pyx 14 | COPY setup.py setup.py 15 | RUN python setup.py install 16 | -------------------------------------------------------------------------------- /beam-cpp-example/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | apache-beam[gcp]==2.46.0 19 | cython==0.29.24 20 | pyparsing==2.4.2 21 | matplotlib==3.4.3 22 | -------------------------------------------------------------------------------- /beam-cpp-example/setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from setuptools import setup 19 | from setuptools import Extension 20 | from Cython.Build import cythonize 21 | 22 | setup( 23 | name='beam-collatz', 24 | ext_modules=cythonize([ 25 | Extension( 26 | '*', 27 | ['collatz.pyx'], 28 | libraries=['gmp', 'gmpxx'], 29 | language='c++', 30 | ) 31 | ]), 32 | ) 33 | -------------------------------------------------------------------------------- /retail/monitoring/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | ### Terraform ### 3 | # Local .terraform directories 4 | **/.terraform/* 5 | 6 | # .tfstate files 7 | *.tfstate 8 | *.tfstate.* 9 | 10 | # Crash log files 11 | crash.log 12 | 13 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most 14 | # .tfvars files are managed as part of configuration and so should be included in 15 | # version control. 16 | # 17 | # example.tfvars 18 | 19 | # Ignore override files as they are usually used to override resources locally and so 20 | # are not checked in 21 | override.tf 22 | override.tf.json 23 | *_override.tf 24 | *_override.tf.json 25 | 26 | # Include override files you do wish to add to version control using negated pattern 27 | # !example_override.tf 28 | 29 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 30 | # example: *tfplan* -------------------------------------------------------------------------------- /retail/monitoring/dashboards/README.MD: -------------------------------------------------------------------------------- 1 | # Using Terraform to deploy monitoring dashboards 2 | 3 | This directory contains Cloud Monitoring dashboards that you can deploy using [Terraform](https://www.terraform.io/). If it's your first time using Terraform on GCP, you can read the materials listed in the resource section below. 4 | 5 | ## Usage 6 | This directory contains JSON definitions for monitoring dashboards that monitor your Dataflow pipelines. 7 | 8 | Under this directory, run the following commands and provide proper arguments: 9 | 10 | ```bash 11 | terraform init 12 | terraform plan 13 | terraform apply 14 | ``` 15 | 16 | You can also provide the arguments directly in the CLI, for example: 17 | 18 | ```bash 19 | terraform apply -var 'dashboard_json_file=retail-pipeline.json' \ 20 | -var 'project_id=[your_project_id]' 21 | ``` 22 | 23 | ## Inputs 24 | 25 | | Name | Description | Type | Default | Required | 26 | |------|-------------|:----:|:-----:|:-----:| 27 | | dashboard\_json\_file | The JSON file of the dashboard | string | n/a | yes | 28 | | project\_id | The ID of the project in which the dashboard will be created | string | n/a | yes | 29 | 30 | ## Outputs 31 | 32 | | Name | Description | 33 | |------|-------------| 34 | | project\_id | The project in which the dashboard was created | 35 | | resource\_id | The resource id for the dashboard | 36 | | console\_link | The destination console URL for the dashboard | 37 | 38 | ## Resources: 39 | * [Terraform - Getting Started with the Google Provider](https://www.terraform.io/docs/providers/google/guides/getting_started.html) 40 | * [Terraform - Google monitoring dashboard](https://www.terraform.io/docs/providers/google/r/monitoring_dashboard.html) 41 | * [Use the Dashboard API to build your own monitoring dashboard](https://cloud.google.com/blog/products/management-tools/cloud-monitoring-dashboards-using-an-api) 42 | * [REST Resource: projects.dashboards](https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards) 43 | * [Getting started with Terraform on Google Cloud Platform](https://cloud.google.com/community/tutorials/getting-started-on-gcp-with-terraform) 44 | * [Additional Terraform examples](https://github.com/GoogleCloudPlatform/cloud-foundation-toolkit/blob/master/docs/terraform.md) -------------------------------------------------------------------------------- /retail/monitoring/dashboards/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | provider "google" { 18 | # Resource google_monitoring_dashboard is available since 3.23.0 19 | # https://github.com/terraform-providers/terraform-provider-google/releases/tag/3.23 20 | version = ">= 3.23.0" 21 | } 22 | 23 | variable "project_id" { 24 | description = "The ID of the project in which the dashboard will be created." 25 | type = string 26 | } 27 | 28 | variable "dashboard_json_file" { 29 | description = "The JSON file of the dashboard." 30 | type = string 31 | } 32 | 33 | resource "google_project_service" "enable_destination_api" { 34 | project = var.project_id 35 | service = "monitoring.googleapis.com" 36 | disable_on_destroy = false 37 | } 38 | 39 | resource "google_monitoring_dashboard" "dashboard" { 40 | dashboard_json = file(var.dashboard_json_file) 41 | project = var.project_id 42 | } 43 | 44 | output "project_id" { 45 | value = var.project_id 46 | } 47 | 48 | output "resource_id" { 49 | description = "The resource id for the dashboard" 50 | value = google_monitoring_dashboard.dashboard.id 51 | } 52 | 53 | output "console_link" { 54 | description = "The destination console URL for the dashboard." 55 | value = join("", ["https://console.cloud.google.com/monitoring/dashboards/custom/", 56 | element(split("/", google_monitoring_dashboard.dashboard.id), 3), 57 | "?project=", 58 | var.project_id]) 59 | } -------------------------------------------------------------------------------- /retail/retail-clickstream-application/bigquery/README.md: -------------------------------------------------------------------------------- 1 | # BigQuery 2 | 3 | ## Generate BigQuery Schema 4 | 5 | Manually defining the schema for BigQuery with nested records and repeated records can be dificult so this repo contains an example file of JSON events that can be used to auto generate the schema. 6 | 7 | 8 | 9 | Create BQ dataset and table and generate schema based on events 10 | 11 | ``` 12 | bq --location=US mk \ 13 | --dataset \ 14 | retail-data-demo:retail_dataset_generate 15 | ``` 16 | 17 | ``` 18 | bq --location=US load --autodetect --source_format=NEWLINE_DELIMITED_JSON retail_dataset_generate.ecommerce_events_generate_schema ecommerce_events.json 19 | ``` 20 | 21 | 22 | ``` 23 | bq show --format=prettyjson retail_dataset_generate.ecommerce_events_generate_schema > ecommerce_events_table_export.json 24 | ``` 25 | 26 | Use ctrl + j to convert multi-line to single line json -------------------------------------------------------------------------------- /retail/retail-clickstream-application/bigquery/ecommerce_events.json: -------------------------------------------------------------------------------- 1 | { "datetime": "2020-11-16 22:59:59", "event": "view_item", "user_id": "UID00003", "client_id": "CID00003","page":"/product-67890","page_previous": "/category-tshirts","ecommerce": { "items": [ { "item_name": "Donut Friday Scented T-Shirt", "item_id": "67890", "price": 33.75, "item_brand": "Google", "item_category": "Apparel", "item_category_2": "Mens", "item_category_3": "Shirts", "item_category_4": "Tshirts", "item_variant": "Black", "item_list_name": "Search Results", "item_list_id": "SR123", "index": 1, "quantity": 1 } ] } } 2 | { "datetime":"2020-11-16 20:59:59", "event": "add_to_cart", "user_id": "UID00001", "client_id": "CID00003","page":"/product-67890","page_previous": "/category-tshirts","ecommerce": { "items": [{ "item_name": "Donut Friday Scented T-Shirt", "item_id": "67890", "price": 33.75, "item_brand": "Google", "item_category": "Apparel", "item_category_2": "Mens", "item_category_3": "Shirts", "item_category_4": "Tshirts", "item_variant": "Black", "item_list_name": "Search Results", "item_list_id": "SR123", "index": 1, "quantity": 2 }] } } 3 | { "datetime":"2020-11-16 20:59:59", "event": "purchase", "user_id": "UID00001", "client_id": "CID00003", "page":"/checkout","page_previous": "/order-confirmation","ecommerce": { "purchase": { "transaction_id": "T12345", "affiliation": "Online Store", "value": 35.43, "tax": 4.90, "shipping": 5.99, "currency": "EUR", "coupon": "SUMMER_SALE", "items": [{ "item_name": "Triblend Android T-Shirt", "item_id": "12345", "item_price": 15.25, "item_brand": "Google", "item_category": "Apparel", "item_variant": "Gray", "quantity": 1, "item_coupon": "" }, { "item_name": "Donut Friday Scented T-Shirt", "item_id": "67890", "item_price": 33.75, "item_brand": "Google", "item_category": "Apparel", "item_variant": "Black", "quantity": 1 }] } } } 4 | -------------------------------------------------------------------------------- /retail/retail-clickstream-application/cloud-run-proxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/retail/retail-clickstream-application/cloud-run-proxy.png -------------------------------------------------------------------------------- /retail/retail-clickstream-application/cloud-run-ps-proxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/retail/retail-clickstream-application/cloud-run-ps-proxy.png -------------------------------------------------------------------------------- /retail/retail-clickstream-application/cloud-run-pubsub-proxy/.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | .dockerignore 3 | node_modules 4 | npm-debug.log 5 | -------------------------------------------------------------------------------- /retail/retail-clickstream-application/cloud-run-pubsub-proxy/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official lightweight Node.js 12 image. 2 | # https://hub.docker.com/_/node 3 | FROM node:12-slim 4 | 5 | # Create and change to the app directory. 6 | WORKDIR /usr/src/app 7 | 8 | # Copy application dependency manifests to the container image. 9 | # A wildcard is used to ensure both package.json AND package-lock.json are copied. 10 | # Copying this separately prevents re-running npm install on every code change. 11 | COPY package*.json ./ 12 | 13 | # Install production dependencies. 14 | RUN npm install --only=production 15 | 16 | # Copy local code to the container image. 17 | COPY . ./ 18 | 19 | # Run the web service on container startup. 20 | CMD [ "npm", "start" ] -------------------------------------------------------------------------------- /retail/retail-clickstream-application/cloud-run-pubsub-proxy/index.js: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | const app = require('./app.js'); 16 | const PORT = process.env.PORT || 8080; 17 | 18 | app.listen(PORT, () => console.log(`pubsub proxy app listening on port ${PORT}`)); 19 | -------------------------------------------------------------------------------- /retail/retail-clickstream-application/cloud-run-pubsub-proxy/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pubsub-proxy", 3 | "version": "1.0.0", 4 | "description": "Cloud Run app to send messages to Pub/Sub topic using Node", 5 | "main": "index.js", 6 | "scripts": { 7 | "start": "node index.js" 8 | }, 9 | "author": "", 10 | "license": "Apache-2.0", 11 | "dependencies": { 12 | "express": "^4.17.1", 13 | "body-parser": "^1.19.0", 14 | "@google-cloud/pubsub": "^0.28.1", 15 | "safe-buffer": "5.1.2" 16 | } 17 | } -------------------------------------------------------------------------------- /retail/retail-clickstream-application/datalayer/add_to_cart.json: -------------------------------------------------------------------------------- 1 | { 2 | "event_datetime":"2020-11-16 20:59:59", 3 | "event": "add_to_cart", 4 | "user_id": "UID00003", 5 | "client_id": "CID00003", 6 | "page":"/product-67890", 7 | "page_previous": "/category-tshirts", 8 | "ecommerce": { 9 | "items": [{ 10 | "item_name": "Donut Friday Scented T-Shirt", 11 | "item_id": "67890", 12 | "price": 33.75, 13 | "item_brand": "Google", 14 | "item_category": "Apparel", 15 | "item_category_2": "Mens", 16 | "item_category_3": "Shirts", 17 | "item_category_4": "Tshirts", 18 | "item_variant": "Black", 19 | "item_list_name": "Search Results", 20 | "item_list_id": "SR123", 21 | "index": 1, 22 | "quantity": 2 23 | }] 24 | } 25 | } -------------------------------------------------------------------------------- /retail/retail-clickstream-application/datalayer/purchase.json: -------------------------------------------------------------------------------- 1 | { 2 | "event_datetime":"2020-11-16 20:59:59", 3 | "event": "purchase", 4 | "user_id": "UID00001", 5 | "client_id": "CID00003", 6 | "page":"/checkout", 7 | "page_previous": "/order-confirmation", 8 | "ecommerce": { 9 | "purchase": { 10 | "transaction_id": "T12345", 11 | "affiliation": "Online Store", 12 | "value": 35.43, 13 | "tax": 4.90, 14 | "shipping": 5.99, 15 | "currency": "EUR", 16 | "coupon": "SUMMER_SALE", 17 | "items": [{ 18 | "item_name": "Triblend Android T-Shirt", 19 | "item_id": "12345", 20 | "item_price": 15.25, 21 | "item_brand": "Google", 22 | "item_category": "Apparel", 23 | "item_variant": "Gray", 24 | "quantity": 1, 25 | "item_coupon": "" 26 | }, { 27 | "item_name": "Donut Friday Scented T-Shirt", 28 | "item_id": "67890", 29 | "item_price": 33.75, 30 | "item_brand": "Google", 31 | "item_category": "Apparel", 32 | "item_variant": "Black", 33 | "quantity": 1 34 | }] 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /retail/retail-clickstream-application/datalayer/view_item.json: -------------------------------------------------------------------------------- 1 | { 2 | "event_datetime":"2020-11-16 22:59:59", 3 | "event": "view_item", 4 | "user_id": "UID00003", 5 | "client_id": "CID00003", 6 | "page":"/product-67890", 7 | "page_previous": "/category-tshirts", 8 | "ecommerce": { 9 | "items": [{ 10 | "item_name": "Donut Friday Scented T-Shirt", 11 | "item_id": "67890", 12 | "price": 33.75, 13 | "item_brand": "Google", 14 | "item_category": "Apparel", 15 | "item_category_2": "Mens", 16 | "item_category_3": "Shirts", 17 | "item_category_4": "Tshirts", 18 | "item_variant": "Black", 19 | "item_list_name": "Search Results", 20 | "item_list_id": "SR123", 21 | "index": 1, 22 | "quantity": 1 23 | }] 24 | } 25 | } -------------------------------------------------------------------------------- /retail/retail-clickstream-application/terraform.tfvars.example: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | project_id = "default-project-id" 19 | delete_contents_on_destroy = true 20 | -------------------------------------------------------------------------------- /retail/retail-clickstream-application/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "project_id" { 18 | description = "Project where the dataset and table are created." 19 | } 20 | 21 | variable "delete_contents_on_destroy" { 22 | description = "(Optional) If set to true, delete all the tables in the dataset when destroying the resource; otherwise, destroying the resource will fail if tables are present." 23 | type = bool 24 | default = null 25 | } 26 | 27 | variable "force_destroy" { 28 | description = "When deleting a bucket, this boolean option will delete all contained objects. If false, Terraform will fail to delete buckets which contain objects." 29 | type = bool 30 | default = false 31 | } 32 | -------------------------------------------------------------------------------- /retail/retail-java-applications/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | plugins { 20 | id 'idea' 21 | id 'java' 22 | id "com.diffplug.gradle.spotless" version "4.5.1" 23 | id "com.github.spotbugs" version "4.4.4" 24 | } 25 | 26 | // All common configuration 27 | allprojects { 28 | 29 | group 'com.google.dataflow.sample' 30 | version '0.9.0' 31 | 32 | ext { 33 | beamVersion = '2.28.0' 34 | autoValueVersion = '1.6.3' 35 | } 36 | 37 | repositories { 38 | mavenCentral() 39 | } 40 | } 41 | 42 | // Enable code formatting 43 | spotless { 44 | java { 45 | licenseHeaderFile 'spotless/Apache_Licence_For_Spotless' 46 | googleJavaFormat('1.7') 47 | target fileTree(dir: project.rootDir, includes: ['**/*.java']) 48 | } 49 | } 50 | 51 | afterEvaluate { 52 | tasks.getByName('spotlessCheck').dependsOn(tasks.getByName('spotlessApply')) 53 | } 54 | 55 | //subprojects { 56 | // apply plugin: 'com.github.spotbugs' 57 | //} 58 | 59 | wrapper { 60 | gradleVersion = '5.6.1' 61 | } 62 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/README.MD: -------------------------------------------------------------------------------- 1 | 19 | #Overview 20 | The engineering dept's code base, including: 21 | 22 | ##business-logic 23 | Where the core functionality of the data engineering pipelines exists. 24 | ##data-objects 25 | Schema information about the objects used by the pipeline, including serialization within and external to the pipeline. 26 | ##pipelines 27 | Data pipelines composed using the building blocks created within the business-logic layer. 28 | 29 | 30 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | plugins { 20 | id 'java' 21 | id 'idea' 22 | } 23 | 24 | sourceCompatibility = 1.8 25 | 26 | dependencies { 27 | // Enable code formatting 28 | compile 'com.google.googlejavaformat:google-java-format:1.7' 29 | } 30 | 31 | 32 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/README.MD: -------------------------------------------------------------------------------- 1 | 19 | # Clickstream Processing 20 | 21 | All business transformations for the processing and analysis of clickstream data. 22 | 23 | # Main Process Clickstream: 24 | * Read Click Stream Topic 25 | * Parse Messages to Beam SCHEMAS 26 | * Branch 1: 27 | * Write RAW JSON String Clickstream for storage 28 | * Branch 2: 29 | * Clean the data 30 | * Write Cleaned Data to BigQuery 31 | * Branch 2.1: 32 | * Filter out events of type ERROR 33 | * Count Page Views per product in 5 sec windows 34 | * Export page view aggregates to BigTable 35 | * Export page view aggregates to BigQuery 36 | 37 | # Main Transactions Processing 38 | 39 | All business transformations for the processing and analysis of transaction data. 40 | 41 | Process Transaction: 42 | * Read transaction Stream Topic 43 | * Branch 1: 44 | * Write RAW JSON String Transaction for storage 45 | * Branch 2: 46 | * Parse Messages to Beam SCHEMAS 47 | * Convert to transaction object 48 | * Enrich the data 49 | * Write enriched Data to BigQuery 50 | 51 | All business transformations for the processing and analysis of stock data. 52 | 53 | # Main Stock Processing 54 | * Read stock Stream Topic 55 | * Branch 1: 56 | * Write RAW JSON String stock for storage 57 | * Branch 2: 58 | * Parse Messages to Beam SCHEMAS 59 | * Convert to Stock Object 60 | * Write Data to BigQuery 61 | 62 | # Utility Transaction 63 | * Count sales per item per store within a fixed window 64 | * Count sales per item within a fixed window 65 | 66 | # Utility Stock 67 | * Count incoming stock per item per store within a fixed window 68 | * Count incoming stock per item within a fixed window 69 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/DeploymentAnnotations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | 22 | @Experimental 23 | public class DeploymentAnnotations { 24 | 25 | /** Used to indicate a transform will have side effects in deployment life cycle events. */ 26 | public static @interface NoPartialResultsOnDrain {} 27 | 28 | public static @interface PartialResultsExpectedOnDrain {} 29 | } 30 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineAggregationOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; 22 | import org.apache.beam.sdk.options.Default; 23 | import org.apache.beam.sdk.options.Description; 24 | 25 | @Experimental 26 | public interface RetailPipelineAggregationOptions extends BigQueryOptions { 27 | 28 | @Default.String("aggregate-tables") 29 | String getAggregateBigTableInstance(); 30 | 31 | void setAggregateBigTableInstance(String aggregateBigTableInstance); 32 | 33 | @Default.String("Retail_Store_Aggregations") 34 | String getAggregateBigQueryTable(); 35 | 36 | void setAggregateBigQueryTable(String aggregateBigQueryTable); 37 | 38 | @Description("The fixed window period which aggregations are computed over") 39 | @Default.Integer(5) 40 | Integer getAggregationDefaultSec(); 41 | 42 | void setAggregationDefaultSec(Integer aggregationDefaultSec); 43 | } 44 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineClickStreamOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.io.gcp.pubsub.PubsubOptions; 22 | import org.apache.beam.sdk.options.Default; 23 | 24 | @Experimental 25 | public interface RetailPipelineClickStreamOptions extends PubsubOptions { 26 | 27 | @Default.String("subscriptions/clickstream-inbound-sub") 28 | String getClickStreamPubSubSubscription(); 29 | 30 | void setClickStreamPubSubSubscription(String clickStreamOutput); 31 | 32 | @Default.String("Retail_Store.raw_clickstream_data") 33 | String getClickStreamBigQueryRawTable(); 34 | 35 | void setClickStreamBigQueryRawTable(String clickStreamBigQueryRawTable); 36 | 37 | @Default.String("Retail_Store.clean_clickstream_data") 38 | String getClickStreamBigQueryCleanTable(); 39 | 40 | void setClickStreamBigQueryCleanTable(String clickStreamBigQueryCleanTable); 41 | 42 | @Default.String("Retail_Store.sessionized_clickstream") 43 | String getClickStreamSessionizedTable(); 44 | 45 | void setClickStreamSessionizedTable(String clickStreamSessionizedTable); 46 | } 47 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineInventoryOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.options.Default; 22 | import org.apache.beam.sdk.options.PipelineOptions; 23 | 24 | @Experimental 25 | public interface RetailPipelineInventoryOptions extends PipelineOptions { 26 | 27 | @Default.String("subscriptions/global-inventory-topic") 28 | String getInventoryPubSubSubscriptions(); 29 | 30 | void setInventoryPubSubSubscriptions(String inventoryOutput); 31 | 32 | @Default.String("Retail_Store.raw_inventory_data") 33 | String getInventoryBigQueryRawTable(); 34 | 35 | void setInventoryBigQueryRawTable(String clickStreamBigQueryRawTable); 36 | 37 | @Default.String("Retail_Store.clean_inventory_data") 38 | String getInventoryBigQueryCleanTable(); 39 | 40 | void setInventoryBigQueryCleanTable(String clickStreamBigQueryRawTable); 41 | } 42 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.options; 19 | 20 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; 21 | import org.apache.beam.sdk.annotations.Experimental; 22 | import org.apache.beam.sdk.options.Default; 23 | 24 | @Experimental 25 | public interface RetailPipelineOptions 26 | extends DataflowPipelineOptions, 27 | RetailPipelineAggregationOptions, 28 | RetailPipelineClickStreamOptions, 29 | RetailPipelineInventoryOptions, 30 | RetailPipelineTransactionsOptions, 31 | RetailPipelineStoresOptions, 32 | RetailPipelineReportingOptions { 33 | 34 | @Default.Boolean(false) 35 | Boolean getDebugMode(); 36 | 37 | void setDebugMode(Boolean debugMode); 38 | 39 | @Default.Boolean(false) 40 | Boolean getTestModeEnabled(); 41 | 42 | void setTestModeEnabled(Boolean testModeEnabled); 43 | } 44 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineReportingOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.options.Default; 22 | import org.apache.beam.sdk.options.Description; 23 | import org.apache.beam.sdk.options.PipelineOptions; 24 | 25 | @Experimental 26 | public interface RetailPipelineReportingOptions extends PipelineOptions { 27 | 28 | @Description("Deadletter Table for pipeline.") 29 | @Default.String("Retail_Store.deadletter_data") 30 | String getDeadLetterTable(); 31 | 32 | void setDeadLetterTable(String deadletterBigQueryTable); 33 | 34 | @Description("Project used for data warehousing.") 35 | String getDataWarehouseOutputProject(); 36 | 37 | void setDataWarehouseOutputProject(String dataWarehouseOutputProject); 38 | 39 | @Default.String("Retail_Store") 40 | String getMainReportingDataset(); 41 | 42 | void setMainReportingDataset(String mainReportingDataset); 43 | 44 | @Default.String("Retail_Store_Aggregations") 45 | String getAggregateBigQueryDataset(); 46 | 47 | void setAggregateBigQueryDataset(String aggregateBigQueryDataset); 48 | 49 | @Default.String("/topics/global-stock-level-topic") 50 | String getAggregateStockPubSubOutputTopic(); 51 | 52 | void setAggregateStockPubSubOutputTopic(String aggregateStockPubSubOutputTopic); 53 | } 54 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineStoresOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.options.Default; 22 | import org.apache.beam.sdk.options.Description; 23 | import org.apache.beam.sdk.options.PipelineOptions; 24 | 25 | @Experimental 26 | public interface RetailPipelineStoresOptions extends PipelineOptions { 27 | 28 | @Description("Store Location BigQuery TableReference") 29 | @Default.String("Retail_Store.Store_Locations") 30 | String getStoreLocationBigQueryTableRef(); 31 | 32 | void setStoreLocationBigQueryTableRef(String storeLocationTableRef); 33 | } 34 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineTransactionsOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.options.Default; 22 | import org.apache.beam.sdk.options.PipelineOptions; 23 | 24 | @Experimental 25 | public interface RetailPipelineTransactionsOptions extends PipelineOptions { 26 | 27 | @Default.String("subscriptions/global-transaction-topic") 28 | String getTransactionsPubSubSubscription(); 29 | 30 | void setTransactionsPubSubSubscription(String transactionsOutput); 31 | 32 | @Default.String("Retail_Store.raw_transactions_data") 33 | String getTransactionsBigQueryRawTable(); 34 | 35 | void setTransactionsBigQueryRawTable(String transactionsBigQueryRawTable); 36 | 37 | @Default.String("Retail_Store.clean_transaction_data") 38 | String getTransactionsBigQueryCleanTable(); 39 | 40 | void setTransactionsBigQueryCleanTable(String transactionsBigQueryCleanTable); 41 | } 42 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/transforms/ErrorMsg.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.transforms; 19 | 20 | import com.google.auto.value.AutoValue; 21 | import javax.annotation.Nullable; 22 | import org.apache.beam.sdk.annotations.Experimental; 23 | import org.apache.beam.sdk.schemas.AutoValueSchema; 24 | import org.apache.beam.sdk.schemas.annotations.DefaultSchema; 25 | import org.joda.time.Instant; 26 | 27 | /** Error Objects for Dead Letter */ 28 | @AutoValue 29 | @DefaultSchema(AutoValueSchema.class) 30 | @Experimental 31 | public abstract class ErrorMsg { 32 | public @Nullable abstract String getTransform(); 33 | 34 | public @Nullable abstract String getError(); 35 | 36 | public @Nullable abstract String getData(); 37 | 38 | public @Nullable abstract Instant getTimestamp(); 39 | 40 | public abstract Builder toBuilder(); 41 | 42 | public static Builder builder() { 43 | return new AutoValue_ErrorMsg.Builder(); 44 | } 45 | 46 | @AutoValue.Builder 47 | public abstract static class Builder { 48 | public abstract Builder setTransform(String value); 49 | 50 | public abstract Builder setError(String value); 51 | 52 | public abstract Builder setData(String value); 53 | 54 | public abstract Builder setTimestamp(Instant value); 55 | 56 | public abstract ErrorMsg build(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/transforms/clickstream/validation/EventDateTimeCorrectionService.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.transforms.clickstream.validation; 19 | 20 | import org.apache.beam.sdk.transforms.DoFn; 21 | import org.apache.beam.sdk.values.Row; 22 | import org.apache.beam.sdk.values.Row.FieldValueBuilder; 23 | import org.joda.time.Instant; 24 | 25 | public class EventDateTimeCorrectionService extends DoFn { 26 | 27 | @ProcessElement 28 | public void process(@Element Row input, @Timestamp Instant time, OutputReceiver o) { 29 | 30 | // Pass through if items are not needed by this event. 31 | 32 | if (!input.getArray("errors").contains(ValidateEventDateTime.CORRECTION_TIMESTAMP)) { 33 | o.output(input); 34 | return; 35 | } 36 | 37 | FieldValueBuilder row = Row.fromRow(input.getRow("data")); 38 | 39 | // There are two stations where we can be in this code, the event_datetime did not parse or 40 | // the time was in the future. 41 | // In both cases the fix is to set timestamp to be the processing time. 42 | row.withFieldValue("timestamp", time.getMillis()); 43 | o.output(Row.fromRow(input).withFieldValue("data", row.build()).build()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/transforms/clickstream/validation/ValidationUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.transforms.clickstream.validation; 19 | 20 | import com.google.dataflow.sample.retail.businesslogic.core.transforms.clickstream.ValidateAndCorrectCSEvt; 21 | import java.util.Collection; 22 | import org.apache.beam.sdk.schemas.Schema; 23 | import org.apache.beam.sdk.schemas.Schema.FieldType; 24 | import org.apache.beam.sdk.transforms.DoFn; 25 | import org.apache.beam.sdk.values.Row; 26 | 27 | public class ValidationUtils { 28 | public static Schema getValidationWrapper(Schema rowSchema) { 29 | 30 | return Schema.builder() 31 | .addRowField("data", rowSchema) 32 | .addField("errors", FieldType.array(FieldType.STRING).withNullable(true)) 33 | .build(); 34 | } 35 | 36 | public static class ValidationRouter extends DoFn { 37 | 38 | @ProcessElement 39 | public void process(@Element Row input, MultiOutputReceiver o) { 40 | 41 | Collection errors = input.getArray("errors"); 42 | 43 | if (errors == null || errors.size() < 1) { 44 | o.get(ValidateAndCorrectCSEvt.MAIN).output(input); 45 | return; 46 | } 47 | o.get(ValidateAndCorrectCSEvt.NEEDS_CORRECTIONS).output(input); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/utils/Print.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.utils; 19 | 20 | import com.google.dataflow.sample.retail.businesslogic.core.transforms.clickstream.WriteAggregatesToBigTable.PrintMutation; 21 | import java.io.IOException; 22 | import org.apache.beam.sdk.annotations.Experimental; 23 | import org.apache.beam.sdk.transforms.DoFn; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | @Experimental 28 | public class Print extends DoFn { 29 | 30 | private static final Logger LOG = LoggerFactory.getLogger(PrintMutation.class); 31 | String message; 32 | 33 | public Print(String message) { 34 | this.message = message; 35 | } 36 | 37 | @ProcessElement 38 | public void process(@Element T row) throws IOException { 39 | 40 | LOG.info(message + row.toString()); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/externalservices/RetailCompanyServices.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.externalservices; 19 | 20 | import com.google.dataflow.sample.retail.dataobjects.Item; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | import org.apache.beam.sdk.annotations.Experimental; 25 | import org.apache.beam.sdk.schemas.Schema; 26 | 27 | /** 28 | * This class represents a mock client to a microservice implemented by the Demo Retail company. 29 | * 30 | *

The class emulates communication between the Dataflow pipeline, and a hypothetical internal 31 | * microservice. 32 | * 33 | *

Real services will often take 10-100's of ms to respond, which cause back pressure within a 34 | * pipeline. This version of this mock does not cause push back. 35 | * 36 | *

TODO convert to a service which requires a few hundred ms to respond. 37 | */ 38 | @Experimental 39 | public class RetailCompanyServices { 40 | 41 | public Map convertItemIdsToFullText(List itemIds, Schema itemSchema) { 42 | 43 | Map map = new HashMap<>(); 44 | 45 | Item item = 46 | Item.builder() 47 | .setItemBrand("item_brand") 48 | .setItemCat01("foo_category") 49 | .setItemName("foo_name") 50 | .build(); 51 | 52 | itemIds.forEach(x -> map.put(x, item)); 53 | return map; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/test/java/com/google/dataflow/sample/retail/businesslogic/core/utils/test/avrotestobjects/InventoryAVRO.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.utils.test.avrotestobjects; 19 | 20 | import org.apache.avro.reflect.Nullable; 21 | import org.apache.beam.sdk.coders.AvroCoder; 22 | import org.apache.beam.sdk.coders.DefaultCoder; 23 | 24 | @DefaultCoder(AvroCoder.class) 25 | /** 26 | * Used as part of utility for creation of JSON with {@link Gson}. TODO Remove in favour of raw 27 | * String for the JSON. 28 | */ 29 | public class InventoryAVRO { 30 | 31 | public @Nullable long timestamp; 32 | public @Nullable int count; 33 | public @Nullable int sku; 34 | public @Nullable int product_id; 35 | public @Nullable int store_id; 36 | public @Nullable int aisleId; 37 | public @Nullable String product_name; 38 | public @Nullable int departmentId; 39 | public @Nullable Float price; 40 | public @Nullable String recipeId; 41 | public @Nullable String image; 42 | } 43 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/business-logic/src/test/java/com/google/dataflow/sample/retail/businesslogic/core/utils/test/clickstream/WriterawJSONMessagesToBigQureyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.businesslogic.core.utils.test.clickstream; 19 | 20 | import com.google.dataflow.sample.retail.businesslogic.core.options.RetailPipelineOptions; 21 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 22 | import org.apache.beam.sdk.testing.TestPipeline; 23 | import org.junit.Rule; 24 | import org.junit.Test; 25 | import org.junit.runner.RunWith; 26 | import org.junit.runners.JUnit4; 27 | 28 | @RunWith(JUnit4.class) 29 | /** Unit tests for {@link ClickstreamProcessing}. */ 30 | public class WriterawJSONMessagesToBigQureyTest { 31 | 32 | RetailPipelineOptions options = PipelineOptionsFactory.as(RetailPipelineOptions.class); 33 | 34 | { 35 | options.setTestModeEnabled(true); 36 | } 37 | 38 | @Rule public transient TestPipeline pipeline = TestPipeline.create(); 39 | 40 | @Test 41 | public void testCountViews() { 42 | 43 | pipeline.run(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/data-objects/README.MD: -------------------------------------------------------------------------------- 1 | 19 | 20 | #Overview 21 | This module holds all the schema information for the objects used in the pipelines. 22 | Other schema information, for example BigTable column names which are stored in this module. 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/data-objects/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | plugins { 20 | id 'java' 21 | } 22 | 23 | 24 | description = "Retail Sample Application :: Java :: Data Objects" 25 | ext.summary = "Data Objects used by all modules, within the pipeline and for I/O" 26 | 27 | dependencies { 28 | // Main 29 | compile group: 'org.apache.beam', name: 'beam-sdks-java-core', version: "${beamVersion}" 30 | 31 | // -- AutoValue 32 | annotationProcessor "com.google.auto.value:auto-value:${autoValueVersion}" 33 | compile "com.google.auto.value:auto-value-annotations:${autoValueVersion}" 34 | 35 | // Test 36 | testCompile group: 'junit', name: 'junit', version: '4.12' 37 | } 38 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/data-objects/src/main/java/com/google/dataflow/sample/retail/dataobjects/Ecommerce.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.dataobjects; 19 | 20 | import com.google.auto.value.AutoValue; 21 | import java.util.List; 22 | import javax.annotation.Nullable; 23 | import org.apache.beam.sdk.schemas.AutoValueSchema; 24 | import org.apache.beam.sdk.schemas.annotations.DefaultSchema; 25 | import org.apache.beam.sdk.schemas.annotations.SchemaFieldName; 26 | 27 | @AutoValue 28 | @DefaultSchema(AutoValueSchema.class) 29 | public abstract class Ecommerce { 30 | 31 | @SchemaFieldName("items") 32 | public @Nullable abstract List getItems(); 33 | 34 | @SchemaFieldName("purchase") 35 | public @Nullable abstract Purchase getPurchase(); 36 | 37 | public static Builder builder() { 38 | 39 | return new AutoValue_Ecommerce.Builder(); 40 | } 41 | 42 | @AutoValue.Builder 43 | public abstract static class Builder { 44 | public abstract Builder setItems(List value); 45 | 46 | public abstract Builder setPurchase(Purchase purchase); 47 | 48 | public abstract Ecommerce build(); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/data-objects/src/main/java/com/google/dataflow/sample/retail/dataobjects/StockAggregation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.retail.dataobjects; 19 | 20 | import com.google.auto.value.AutoValue; 21 | import javax.annotation.Nullable; 22 | import org.apache.beam.sdk.annotations.Experimental; 23 | import org.apache.beam.sdk.schemas.AutoValueSchema; 24 | import org.apache.beam.sdk.schemas.annotations.DefaultSchema; 25 | import org.apache.beam.sdk.schemas.annotations.SchemaFieldName; 26 | 27 | @AutoValue 28 | @DefaultSchema(AutoValueSchema.class) 29 | @Experimental 30 | public abstract class StockAggregation { 31 | 32 | @Nullable 33 | public abstract Long getDurationMS(); 34 | 35 | @Nullable 36 | public abstract Long getStartTime(); 37 | 38 | @Nullable 39 | @SchemaFieldName("product_id") 40 | public abstract Integer getProductId(); 41 | 42 | @Nullable 43 | @SchemaFieldName("store_id") 44 | public abstract Integer getStoreId(); 45 | 46 | @Nullable 47 | public abstract Long getCount(); 48 | 49 | public abstract StockAggregation.Builder toBuilder(); 50 | 51 | public static StockAggregation.Builder builder() { 52 | return new AutoValue_StockAggregation.Builder(); 53 | } 54 | 55 | @AutoValue.Builder 56 | public abstract static class Builder { 57 | 58 | public abstract Builder setDurationMS(Long value); 59 | 60 | public abstract Builder setStartTime(Long value); 61 | 62 | public abstract Builder setProductId(Integer value); 63 | 64 | public abstract Builder setStoreId(Integer value); 65 | 66 | public abstract Builder setCount(Long value); 67 | 68 | public abstract StockAggregation build(); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/pipelines/README.MD: -------------------------------------------------------------------------------- 1 | 19 | # Overview 20 | 21 | 22 | -------------------------------------------------------------------------------- /retail/retail-java-applications/data-engineering-dept/pipelines/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | plugins { 19 | id 'java' 20 | } 21 | 22 | description = "Retail Sample Application :: Java :: Pipelines" 23 | ext.summary = "Pipelines composition." 24 | 25 | dependencies { 26 | // Main 27 | compile project(':data-engineering-dept:data-objects') 28 | compile project(':data-engineering-dept:business-logic') 29 | 30 | // Test 31 | compile group: 'org.slf4j', name: 'slf4j-jdk14', version: '1.7.7' 32 | testCompile group: 'junit', name: 'junit', version: '4.12' 33 | testCompile project(':data-engineering-dept:business-logic').sourceSets.test.output 34 | } 35 | 36 | task executeOnDataflow(type: JavaExec){ 37 | classpath=sourceSets.main.runtimeClasspath 38 | main = "com.google.dataflow.sample.retail.pipeline.RetailDataProcessingPipeline" 39 | systemProperties System.getProperties() 40 | args System.getProperty("exec.args", "").split() 41 | } -------------------------------------------------------------------------------- /retail/retail-java-applications/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/retail/retail-java-applications/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /retail/retail-java-applications/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | distributionBase=GRADLE_USER_HOME 19 | distributionPath=wrapper/dists 20 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.6.1-bin.zip 21 | zipStoreBase=GRADLE_USER_HOME 22 | zipStorePath=wrapper/dists 23 | -------------------------------------------------------------------------------- /retail/retail-java-applications/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | rootProject.name = 'retail-java-applications' 20 | 21 | include ':data-engineering-dept:data-objects' 22 | include ':data-engineering-dept:business-logic' 23 | include ':data-engineering-dept:pipelines' 24 | -------------------------------------------------------------------------------- /retail/retail-java-applications/spotless/Apache_Licence_For_Spotless: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ -------------------------------------------------------------------------------- /retail/retail-java-applications/terraform/examples/pubsub/terraform.tfvars: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | region = "us-central1" 17 | topic_clickstream_inbound = "Clickstream-inbound" 18 | topic_transactions_inbound = "Transactions-inbound" 19 | topic_inventory_inbound = "Inventory-inbound" 20 | topic_inventory_outbound = "Inventory-outbound" 21 | clickstream_inbound_sub = "Clickstream-inbound-sub" 22 | transactions_inbound_sub = "Transactions-inbound-sub" 23 | inventory_inbound_sub = "Inventory-inbound-sub" 24 | 25 | -------------------------------------------------------------------------------- /retail/retail-java-applications/terraform/examples/pubsub/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "project_id" { 18 | type = string 19 | description = "Project ID in GCP" 20 | } 21 | 22 | variable "region" { 23 | type = string 24 | description = "Name of the selected region" 25 | } 26 | 27 | variable "topic_clickstream_inbound" { 28 | type = string 29 | description = "Topic name for clickstream inbound" 30 | } 31 | 32 | variable "topic_transactions_inbound" { 33 | type = string 34 | description = "Topic name for transactions inbound" 35 | } 36 | 37 | variable "topic_inventory_inbound" { 38 | type = string 39 | description = "Topic name for inventory inbound" 40 | } 41 | 42 | variable "topic_inventory_outbound" { 43 | type = string 44 | description = "Topic name for inventory outbound" 45 | } 46 | 47 | variable "clickstream_inbound_sub" { 48 | type = string 49 | description = "Subscription for clickstream inbound" 50 | } 51 | 52 | variable "transactions_inbound_sub" { 53 | type = string 54 | description = "Subscription for transactions inbound" 55 | } 56 | 57 | variable "inventory_inbound_sub" { 58 | type = string 59 | description = "Subscription for inventory inbound" 60 | } 61 | 62 | -------------------------------------------------------------------------------- /retail/retail-java-applications/terraform/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_providers { 19 | google = { 20 | source = "hashicorp/google" 21 | version = ">= 3.48.0" 22 | } 23 | } 24 | } 25 | 26 | provider "google" { 27 | project = var.project_id 28 | region = var.region 29 | } 30 | 31 | module "pubsub" { 32 | source = "./modules/pubsub" 33 | topic_clickstream_inbound = var.topic_clickstream_inbound 34 | topic_transactions_inbound = var.topic_transactions_inbound 35 | topic_inventory_inbound = var.topic_inventory_inbound 36 | topic_inventory_outbound = var.topic_inventory_outbound 37 | clickstream_inbound_sub = var.clickstream_inbound_sub 38 | transactions_inbound_sub = var.transactions_inbound_sub 39 | inventory_inbound_sub = var.inventory_inbound_sub 40 | } 41 | 42 | -------------------------------------------------------------------------------- /retail/retail-java-applications/terraform/modules/pubsub/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | variable "topic_clickstream_inbound" { 17 | type = string 18 | description = "Topic name for clickstream inbound" 19 | } 20 | 21 | variable "topic_transactions_inbound" { 22 | type = string 23 | description = "Topic name for transactions inbound" 24 | } 25 | 26 | variable "topic_inventory_inbound" { 27 | type = string 28 | description = "Topic name for inventory inbound" 29 | } 30 | 31 | variable "topic_inventory_outbound" { 32 | type = string 33 | description = "Topic name for inventory outbound" 34 | } 35 | 36 | variable "clickstream_inbound_sub" { 37 | type = string 38 | description = "Subscription name for clickstream inbound" 39 | } 40 | 41 | variable "transactions_inbound_sub" { 42 | type = string 43 | description = "Subscription name for transactions inbound" 44 | } 45 | 46 | variable "inventory_inbound_sub" { 47 | type = string 48 | description = "Subscription name for inventory inbound" 49 | } 50 | 51 | -------------------------------------------------------------------------------- /retail/retail-java-applications/terraform/terraform.tfvars: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | region = "us-central1" 17 | topic_clickstream_inbound = "Clickstream-inbound" 18 | topic_transactions_inbound = "Transactions-inbound" 19 | topic_inventory_inbound = "Inventory-inbound" 20 | topic_inventory_outbound = "Inventory-outbound" 21 | clickstream_inbound_sub = "Clickstream-inbound-sub" 22 | transactions_inbound_sub = "Transactions-inbound-sub" 23 | inventory_inbound_sub = "Inventory-inbound-sub" 24 | 25 | -------------------------------------------------------------------------------- /retail/retail-java-applications/terraform/test/pubsub/go.mod: -------------------------------------------------------------------------------- 1 | module dataflow-sample-applications/retail/retail-java-applications/terraform/test 2 | 3 | go 1.16 4 | 5 | require ( 6 | cloud.google.com/go/pubsub v1.3.1 7 | github.com/gruntwork-io/terratest v0.40.22 8 | github.com/stretchr/testify v1.7.0 9 | ) 10 | -------------------------------------------------------------------------------- /retail/retail-java-applications/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "project_id" { 18 | type = string 19 | description = "Project ID in GCP" 20 | } 21 | 22 | variable "region" { 23 | type = string 24 | description = "Name of the selected region" 25 | } 26 | 27 | variable "topic_clickstream_inbound" { 28 | type = string 29 | description = "Topic name for clickstream inbound" 30 | } 31 | 32 | variable "topic_transactions_inbound" { 33 | type = string 34 | description = "Topic name for transactions inbound" 35 | } 36 | 37 | variable "topic_inventory_inbound" { 38 | type = string 39 | description = "Topic name for inventory inbound" 40 | } 41 | 42 | variable "topic_inventory_outbound" { 43 | type = string 44 | description = "Topic name for inventory outbound" 45 | } 46 | 47 | variable "clickstream_inbound_sub" { 48 | type = string 49 | description = "Subscription for clickstream inbound" 50 | } 51 | 52 | variable "transactions_inbound_sub" { 53 | type = string 54 | description = "Subscription for transactions inbound" 55 | } 56 | 57 | variable "inventory_inbound_sub" { 58 | type = string 59 | description = "Subscription for inventory inbound" 60 | } 61 | 62 | -------------------------------------------------------------------------------- /timeseries-streaming/docs/Draft_Dataflow_streaming_time_series_sample_library_user_guide_v0.4.0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/docs/Draft_Dataflow_streaming_time_series_sample_library_user_guide_v0.4.0.pdf -------------------------------------------------------------------------------- /timeseries-streaming/images/timeseries_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/images/timeseries_metrics.png -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Adapters/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | plugins { 19 | id 'java' 20 | id 'idea' 21 | id "com.diffplug.gradle.spotless" version "3.24.2" 22 | } 23 | 24 | sourceCompatibility = 1.8 25 | 26 | dependencies { 27 | compile project(':TimeSeriesPipeline') 28 | compile project(':TimeSeriesMetricsLibrary') 29 | 30 | compile "com.google.auto.value:auto-value-annotations:${autoValueVersion}" 31 | annotationProcessor "com.google.auto.value:auto-value:${autoValueVersion}" 32 | 33 | testCompile project(path: ":TimeSeriesPipeline", configuration: 'testArtifacts') 34 | testCompile group: 'junit', name: 'junit', version: '4.12' 35 | } 36 | 37 | 38 | // Enable code formatting 39 | spotless { 40 | java { 41 | licenseHeaderFile '../Apache_Licence_For_Spotless' 42 | googleJavaFormat('1.7') 43 | } 44 | } -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Adapters/src/main/java/com/google/dataflow/sample/timeseriesflow/adaptors/fsi/data/cme/DeadLetterSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.adaptors.fsi.data.cme; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | 22 | @Experimental 23 | public enum DeadLetterSink { 24 | LOG, 25 | BIGQUERY 26 | } 27 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Adapters/src/main/java/com/google/dataflow/sample/timeseriesflow/adaptors/fsi/data/cme/LogElements.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.adaptors.fsi.data.cme; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.transforms.DoFn; 22 | import org.apache.beam.sdk.transforms.PTransform; 23 | import org.apache.beam.sdk.transforms.ParDo; 24 | import org.apache.beam.sdk.transforms.windowing.BoundedWindow; 25 | import org.apache.beam.sdk.transforms.windowing.GlobalWindow; 26 | import org.apache.beam.sdk.values.PCollection; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | @Experimental 31 | class LogElements extends PTransform, PCollection> { 32 | 33 | private static final Logger LOG = LoggerFactory.getLogger(LogElements.class); 34 | 35 | @Override 36 | public PCollection expand(PCollection input) { 37 | 38 | return input.apply( 39 | "Logging Elements", 40 | ParDo.of( 41 | new DoFn() { 42 | 43 | @ProcessElement 44 | public void processElement( 45 | @Element T element, OutputReceiver out, BoundedWindow window) { 46 | 47 | String message = element.toString(); 48 | 49 | if (!(window instanceof GlobalWindow)) { 50 | message = message + " Window: " + window.toString(); 51 | } 52 | 53 | LOG.info(message); 54 | 55 | out.output(element); 56 | } 57 | })); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Adapters/src/test/resources/TSTestBlendedIndexNoGaps.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "key": { 4 | "major_key": "Key-A", 5 | "minor_key_string": "MKey-a" 6 | }, 7 | "data": {"double_val": 10.0 8 | }, "timestamp": "2000-01-01T00:00:00Z" 9 | }, 10 | { 11 | "key": { 12 | "major_key": "Key-B", 13 | "minor_key_string": "MKey-a" 14 | }, 15 | "data": {"double_val": 110.0 16 | }, "timestamp": "2000-01-01T00:00:00Z" 17 | }, 18 | { 19 | "key": { 20 | "major_key": "Key-C", 21 | "minor_key_string": "MKey-a" 22 | }, 23 | "data": {"double_val": 210.0 24 | }, "timestamp": "2000-01-01T00:00:00Z", 25 | "time": { 26 | "advance_watermark_seconds": 1 27 | } 28 | }, 29 | { 30 | "key": { 31 | "major_key": "Key-A", 32 | "minor_key_string": "MKey-a" 33 | }, 34 | "data": {"double_val": 20.0 35 | }, "timestamp": "2000-01-01T00:00:01Z" 36 | }, 37 | { 38 | "key": { 39 | "major_key": "Key-B", 40 | "minor_key_string": "MKey-a" 41 | }, 42 | "data": {"double_val": 120.0 43 | }, "timestamp": "2000-01-01T00:00:01Z" 44 | }, 45 | { 46 | "key": { 47 | "major_key": "Key-C", 48 | "minor_key_string": "MKey-a" 49 | }, 50 | "data": {"double_val": 220.0 51 | }, "timestamp": "2000-01-01T00:00:01Z", 52 | "time": { 53 | "advance_watermark_seconds": 2 54 | } 55 | }, 56 | { 57 | "key": { 58 | "major_key": "Key-A", 59 | "minor_key_string": "MKey-a" 60 | }, 61 | "data": {"double_val": 30.0 62 | }, "timestamp": "2000-01-01T00:00:02Z" 63 | }, 64 | { 65 | "key": { 66 | "major_key": "Key-B", 67 | "minor_key_string": "MKey-a" 68 | }, 69 | "data": {"double_val": 130.0 70 | }, "timestamp": "2000-01-01T00:00:02Z" 71 | }, 72 | { 73 | "key": { 74 | "major_key": "Key-C", 75 | "minor_key_string": "MKey-a" 76 | }, 77 | "data": {"double_val": 230.0 78 | }, "timestamp": "2000-01-01T00:00:02Z", 79 | "time": { 80 | "advance_watermark_expression": "INFINITY" 81 | } 82 | } 83 | ] -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Adapters/src/test/resources/TSTestBlendedIndexWithGaps.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "key": { 4 | "major_key": "Key-A", 5 | "minor_key_string": "MKey-a" 6 | }, 7 | "data": {"double_val": 10.0 8 | }, "timestamp": "2000-01-01T00:00:00Z" 9 | }, 10 | { 11 | "key": { 12 | "major_key": "Key-B", 13 | "minor_key_string": "MKey-a" 14 | }, 15 | "data": {"double_val": 110.0 16 | }, "timestamp": "2000-01-01T00:00:00Z" 17 | }, 18 | { 19 | "key": { 20 | "major_key": "Key-C", 21 | "minor_key_string": "MKey-a" 22 | }, 23 | "data": {"double_val": 210.0 24 | }, "timestamp": "2000-01-01T00:00:00Z", 25 | "time": { 26 | "advance_watermark_seconds": 1 27 | } 28 | }, 29 | { 30 | "key": { 31 | "major_key": "Key-A", 32 | "minor_key_string": "MKey-a" 33 | }, 34 | "data": {"double_val": 20.0 35 | }, "timestamp": "2000-01-01T00:00:01Z" 36 | }, 37 | { 38 | "key": { 39 | "major_key": "Key-C", 40 | "minor_key_string": "MKey-a" 41 | }, 42 | "data": {"double_val": 220.0 43 | }, "timestamp": "2000-01-01T00:00:01Z", 44 | "time": { 45 | "advance_watermark_seconds": 2 46 | } 47 | }, 48 | { 49 | "key": { 50 | "major_key": "Key-A", 51 | "minor_key_string": "MKey-a" 52 | }, 53 | "data": {"double_val": 30.0 54 | }, "timestamp": "2000-01-01T00:00:02Z" 55 | }, 56 | { 57 | "key": { 58 | "major_key": "Key-B", 59 | "minor_key_string": "MKey-a" 60 | }, 61 | "data": { 62 | "double_val": 130.0 63 | }, "timestamp": "2000-01-01T00:00:02Z", 64 | "time": { 65 | "advance_watermark_seconds": 3 66 | } 67 | }, 68 | { 69 | "time": { 70 | "advance_watermark_expression": "INFINITY" 71 | } 72 | } 73 | ] -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Adapters/src/test/resources/TSTestData.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "key": { 4 | "major_key": "Key-A", 5 | "minor_key_string": "MKey-a" 6 | }, 7 | "data": {"double_val": 10.0 8 | } 9 | }, 10 | { 11 | "key": { 12 | "major_key": "Key-A", 13 | "minor_key_string": "MKey-a" 14 | }, 15 | "data": {"double_val": 20.0 16 | } 17 | }, 18 | { 19 | "key": { 20 | "major_key": "Key-A", 21 | "minor_key_string": "MKey-a" 22 | }, 23 | "data": {"double_val": 30.0 24 | } 25 | }, 26 | { 27 | "key": { 28 | "major_key": "Key-B", 29 | "minor_key_string": "MKey-a" 30 | }, 31 | "data": {"double_val": 110.0 32 | } 33 | }, 34 | { 35 | "key": { 36 | "major_key": "Key-B", 37 | "minor_key_string": "MKey-a" 38 | }, 39 | "data": {"double_val": 120.0 40 | } 41 | }, 42 | { 43 | "key": { 44 | "major_key": "Key-C", 45 | "minor_key_string": "MKey-a" 46 | }, 47 | "data": {"double_val": 130.0 48 | } 49 | }, 50 | { 51 | "key": { 52 | "major_key": "Key-C", 53 | "minor_key_string": "MKey-a" 54 | }, 55 | "data": {"double_val": 210.0 56 | } 57 | }, 58 | { 59 | "key": { 60 | "major_key": "Key-C", 61 | "minor_key_string": "MKey-a" 62 | }, 63 | "data": {"double_val": 220.0 64 | } 65 | }, 66 | { 67 | "key": { 68 | "major_key": "Key-C", 69 | "minor_key_string": "MKey-a" 70 | }, 71 | "data": {"double_val": 230.0 72 | } 73 | } 74 | ] -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Apache_Licence_For_Spotless: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Examples/src/main/java/com/google/dataflow/sample/timeseriesflow/examples/fsi/forex/ExampleForexPipelineOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.examples.fsi.forex; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions; 21 | import com.google.dataflow.sample.timeseriesflow.options.TSFlowOptions; 22 | import org.apache.beam.sdk.annotations.Experimental; 23 | import org.apache.beam.sdk.options.Description; 24 | 25 | @Experimental 26 | public interface ExampleForexPipelineOptions extends TSFlowOptions, TFXOptions { 27 | 28 | @Description("Option to specify BigQuery target table to push metrics") 29 | String getBigQueryTableForTSAccumOutputLocation(); 30 | 31 | void setBigQueryTableForTSAccumOutputLocation(String bigQueryTableForTSAccumOutputLocation); 32 | 33 | @Description("Option to specify absolute path for input dataset") 34 | String getInputPath(); 35 | 36 | void setInputPath(String inputPath); 37 | 38 | @Description( 39 | "Option to specify the end timestamp to stop filling gaps, e.g., end of time series dataset") 40 | String getEndTimestamp(); 41 | 42 | void setEndTimestamp(String endTimestamp); 43 | 44 | @Description("Option to specify sampling period in seconds") 45 | Integer getResampleSec(); 46 | 47 | void setResampleSec(Integer resampleSec); 48 | 49 | @Description("Option to specify rolling window to calculate metrics in seconds") 50 | Integer getWindowSec(); 51 | 52 | void setWindowSec(Integer resampleSec); 53 | } 54 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Examples/src/main/java/com/google/dataflow/sample/timeseriesflow/examples/fsi/forex/HistoryForexReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.examples.fsi.forex; 19 | 20 | import com.google.auto.value.AutoValue; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData; 22 | import java.util.Set; 23 | import org.apache.beam.sdk.io.TextIO; 24 | import org.apache.beam.sdk.transforms.PTransform; 25 | import org.apache.beam.sdk.values.PBegin; 26 | import org.apache.beam.sdk.values.PCollectionTuple; 27 | import org.apache.beam.sdk.values.TupleTag; 28 | 29 | @AutoValue 30 | public abstract class HistoryForexReader extends PTransform { 31 | 32 | public abstract String getSourceFilesURI(); 33 | 34 | public abstract Set getTickers(); 35 | 36 | public static Builder builder() { 37 | return new AutoValue_HistoryForexReader.Builder(); 38 | } 39 | 40 | @AutoValue.Builder 41 | public abstract static class Builder { 42 | 43 | public abstract Builder setSourceFilesURI(String newSourceFilesURI); 44 | 45 | public abstract Builder setTickers(Set newTickers); 46 | 47 | public abstract HistoryForexReader build(); 48 | } 49 | 50 | // Tags to implement basic example of deadletter queue pattern 51 | static final TupleTag successfulParse = 52 | new TupleTag(); 53 | static final TupleTag deadLetterTag = new TupleTag(); 54 | 55 | @Override 56 | public PCollectionTuple expand(PBegin input) { 57 | return input 58 | .apply(TextIO.read().from(getSourceFilesURI())) 59 | .apply(new ForexCSVAdaptor.ConvertCSVForex(getTickers())); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/Examples/src/main/resources/[EMPTY]-EURUSD-2020-05-11_2020-05-11.csv: -------------------------------------------------------------------------------- 1 | PLEASE DOWNLOAD ONE DAY WORTH OF FOREX TICK DATA FROM YOUR FAVORITE PROVIDER. -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/SyntheticExamples/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | plugins { 19 | id 'java' 20 | id 'idea' 21 | id "com.diffplug.gradle.spotless" version "3.24.2" 22 | } 23 | 24 | sourceCompatibility = 1.8 25 | 26 | dependencies { 27 | compile project(':TimeSeriesPipeline') 28 | compile project(':TimeSeriesMetricsLibrary') 29 | testCompile group: 'junit', name: 'junit', version: '4.12' 30 | } 31 | 32 | // Enable code formatting 33 | spotless { 34 | java { 35 | licenseHeaderFile '../Apache_Licence_For_Spotless' 36 | googleJavaFormat('1.7') 37 | } 38 | } 39 | 40 | task run_example(type: JavaExec) { 41 | classpath sourceSets.main.runtimeClasspath 42 | main = "com.google.dataflow.sample.timeseriesflow.examples.simpledata.transforms.SinWaveExample" 43 | group = "samples" 44 | } 45 | 46 | task generate_bootstrap_data(type: JavaExec) { 47 | classpath sourceSets.main.runtimeClasspath 48 | main = "com.google.dataflow.sample.timeseriesflow.examples.simpledata.transforms.SimpleDataBootstrapGenerator" 49 | group = "samples" 50 | } 51 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/SyntheticExamples/src/main/java/com/google/dataflow/sample/timeseriesflow/examples/simpledata/transforms/SinWaveExampleOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.examples.simpledata.transforms; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.metrics.core.TSMetricsOptions; 21 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions; 22 | import com.google.dataflow.sample.timeseriesflow.options.TSOutputPipelineOptions; 23 | import org.apache.beam.sdk.options.Default; 24 | import org.apache.beam.sdk.options.Description; 25 | 26 | public interface SinWaveExampleOptions 27 | extends TSOutputPipelineOptions, TSMetricsOptions, TFXOptions { 28 | 29 | @Description( 30 | "In order to see easy output of metrics for demos set this to true. This will result in all values being 'printed' to logs.") 31 | @Default.Boolean(false) 32 | Boolean getEnablePrintMetricsToLogs(); 33 | 34 | void setEnablePrintMetricsToLogs(Boolean value); 35 | 36 | @Description( 37 | "In order to see easy output of TF.Examples for demos set this to true. This will result in all values being 'printed' to logs.") 38 | @Default.Boolean(false) 39 | Boolean getEnablePrintTFExamplesToLogs(); 40 | 41 | void setEnablePrintTFExamplesToLogs(Boolean value); 42 | 43 | @Description("Enable sending outliers with the stream of synthetic data.") 44 | Boolean getWithOutliers(); 45 | 46 | void setWithOutliers(Boolean value); 47 | } 48 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/README.MD: -------------------------------------------------------------------------------- 1 | #Style guide 2 | - 3 | ## All Metrics 4 | * All metrics should have an option interface even if its empty. 5 | * Options should be placed at the top of the class below LOG. 6 | * Options interface should be added to [MetricsOptions](MetricsOptions.java) 7 | 8 | ## Basic Type Two 9 | * Name of basic metrics should be in the format XXFn, for example StdDevFn. 10 | 11 | ## Complex Type Two 12 | * Name of complex metrics should be in the format XXGFn, for example RSIGFn. (G denotes graph as complex metrics mutate the graph) 13 | 14 | 15 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/Max.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 22 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne; 23 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils; 24 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder; 25 | import org.apache.beam.sdk.options.PipelineOptions; 26 | 27 | public class Max extends BTypeOne { 28 | 29 | public interface MaxOptions extends PipelineOptions {}; 30 | 31 | @Override 32 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) { 33 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator); 34 | coreNumeric.setMax(TSDataUtils.findMaxValue(coreNumeric.getMaxOrNull(), dataPoint.getData())); 35 | return coreNumeric.build(); 36 | } 37 | 38 | @Override 39 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) { 40 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a); 41 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b); 42 | aBuilder.setMax(TSDataUtils.findMaxValue(aBuilder.getMaxOrNull(), bBuilder.getMaxOrNull())); 43 | 44 | return aBuilder.build(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/Min.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 22 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne; 23 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils; 24 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder; 25 | import org.apache.beam.sdk.options.PipelineOptions; 26 | 27 | public class Min extends BTypeOne { 28 | 29 | public interface MinOptions extends PipelineOptions {} 30 | 31 | @Override 32 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) { 33 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator); 34 | coreNumeric.setMin(TSDataUtils.findMinData(coreNumeric.getMinOrNull(), dataPoint.getData())); 35 | return coreNumeric.build(); 36 | } 37 | 38 | @Override 39 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) { 40 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a); 41 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b); 42 | aBuilder.setMin(TSDataUtils.findMinData(aBuilder.getMinOrNull(), bBuilder.getMinOrNull())); 43 | 44 | return aBuilder.build(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/Sum.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 22 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne; 23 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils; 24 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder; 25 | import org.apache.beam.sdk.options.PipelineOptions; 26 | 27 | public class Sum extends BTypeOne { 28 | 29 | public interface SumOptions extends PipelineOptions {} 30 | 31 | @Override 32 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) { 33 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator); 34 | coreNumeric.setSum( 35 | CommonUtils.sumNumericDataNullAsZero(coreNumeric.getSumOrNull(), dataPoint.getData())); 36 | return coreNumeric.build(); 37 | } 38 | 39 | @Override 40 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) { 41 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a); 42 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b); 43 | aBuilder.setSum( 44 | CommonUtils.sumNumericDataNullAsZero(aBuilder.getSumOrNull(), bBuilder.getSumOrNull())); 45 | 46 | return aBuilder.build(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/proto/TSFSITechKeys.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | syntax = "proto3"; 19 | 20 | package protos; 21 | 22 | option java_outer_classname = "FSITechnicalDerivedAggregations"; 23 | option java_multiple_files = false; 24 | option java_package = "com.google.dataflow.sample.timeseriesflow"; 25 | 26 | /** 27 | This proto stores all of the keys used in the accum map. 28 | 29 | */ 30 | enum FsiTechnicalIndicators{ 31 | SUM_UP_MOVEMENT = 0; 32 | SUM_DOWN_MOVEMENT = 1; 33 | SUM_MOVEMENT_COUNT = 12; 34 | 35 | // Always ABS but here to keep consistent with LOSS naming 36 | ABS_MOVING_AVERAGE_GAIN = 2; 37 | ABS_MOVING_AVERAGE_LOSS = 3; 38 | RELATIVE_STRENGTH = 4; 39 | RELATIVE_STRENGTH_INDICATOR = 5; 40 | SUM_MOVEMENT=6; 41 | SIMPLE_MOVING_AVERAGE=7; 42 | EXPONENTIAL_MOVING_AVERAGE=8; 43 | WEIGHTED_MOVING_AVERAGE=9; 44 | BB_MIDDLE_BAND_SMA=10; 45 | BB_UPPER_BAND_SMA=11; 46 | BB_BOTTOM_BAND_SMA=13; 47 | BB_MIDDLE_BAND_EMA=14; 48 | BB_UPPER_BAND_EMA=15; 49 | BB_BOTTOM_BAND_EMA=16; 50 | STANDARD_DEVIATION=17; 51 | LOG_RTN=18; 52 | 53 | } 54 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/test/resources/TSAccumVWAPTest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "key": { 4 | "major_key": "Key-A", 5 | "minor_key_string": "PRICE" 6 | }, 7 | "data": {"double_val": 10.0 8 | }, 9 | "extendedData": { 10 | "Data::VOL": { 11 | "doubleVal": 2.0 12 | } 13 | }, 14 | "timestamp": "2000-01-01T00:00:00Z" 15 | }, 16 | 17 | { 18 | "key": { 19 | "major_key": "Key-B", 20 | "minor_key_string": "PRICE" 21 | }, 22 | "data": {"double_val": 20.0 23 | }, "extendedData": { 24 | "Data::VOL": { 25 | "doubleVal": 2.0 26 | } 27 | }, 28 | "timestamp": "2000-01-01T00:00:00Z" 29 | }, 30 | 31 | { 32 | "key": { 33 | "major_key": "Key-B", 34 | "minor_key_string": "PRICE" 35 | }, 36 | "data": {"double_val": 40.0 37 | }, 38 | "extendedData": { 39 | "Data::VOL": { 40 | "doubleVal": 8.0 41 | } 42 | }, 43 | "timestamp": "2000-01-01T00:00:01Z", 44 | "time": { 45 | "advance_watermark_seconds": 5 46 | } 47 | }, 48 | 49 | { 50 | "key": { 51 | "major_key": "Key-A", 52 | "minor_key_string": "PRICE" 53 | }, 54 | "data": {"double_val": 10.0 55 | }, 56 | "extendedData": { 57 | "Data::VOL": { 58 | "doubleVal": 2.0 59 | } 60 | }, 61 | 62 | "timestamp": "2000-01-01T00:00:05Z", 63 | "time": { 64 | "advance_watermark_expression": "INFINITY" 65 | } 66 | } 67 | ] -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/test/resources/VWAPTestGap.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "key": { 4 | "major_key": "Key-A", 5 | "minor_key_string": "PRICE" 6 | }, 7 | "data": {"double_val": 10.0 8 | }, 9 | "extendedData": { 10 | "Data::VOL": { 11 | "doubleVal": 2.0 12 | } 13 | }, 14 | "timestamp": "2000-01-01T00:00:00Z" 15 | }, 16 | { 17 | "key": { 18 | "major_key": "Key-B", 19 | "minor_key_string": "PRICE" 20 | }, 21 | "data": {"double_val": 20.0 22 | }, "extendedData": { 23 | "Data::VOL": { 24 | "doubleVal": 2.0 25 | } 26 | }, 27 | "timestamp": "2000-01-01T00:00:00Z", 28 | "time": { 29 | "advance_watermark_seconds": 5 30 | } 31 | }, 32 | { 33 | "key": { 34 | "major_key": "Key-B", 35 | "minor_key_string": "PRICE" 36 | }, 37 | "data": {"double_val": 40.0 38 | }, 39 | "extendedData": { 40 | "Data::VOL": { 41 | "doubleVal": 8.0 42 | } 43 | }, 44 | "timestamp": "2000-01-01T00:00:11Z", 45 | "time": { 46 | "advance_watermark_seconds": 5 47 | } 48 | }, 49 | 50 | { 51 | "key": { 52 | "major_key": "Key-A", 53 | "minor_key_string": "PRICE" 54 | }, 55 | "data": {"double_val": 10.0 56 | }, 57 | "extendedData": { 58 | "Data::VOL": { 59 | "doubleVal": 2.0 60 | } 61 | }, 62 | 63 | "timestamp": "2000-01-01T00:00:11Z", 64 | "time": { 65 | "advance_watermark_expression": "INFINITY" 66 | } 67 | } 68 | ] -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/test/resources/VWAPWithinBoundTest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "key": { 4 | "major_key": "Key-A", 5 | "minor_key_string": "PRICE" 6 | }, 7 | "data": {"double_val": 10.0 8 | }, 9 | "extendedData": { 10 | "Data::VOL": { 11 | "doubleVal": 2.0 12 | } 13 | }, 14 | "timestamp": "2000-01-01T00:00:00Z", 15 | "time": { 16 | "advance_watermark_seconds": 5 17 | } 18 | }, 19 | { 20 | "key": { 21 | "major_key": "Key-A", 22 | "minor_key_string": "ASK" 23 | }, 24 | "data": {"double_val": 8.0 25 | }, 26 | "timestamp": "2000-01-01T00:00:06Z" 27 | }, 28 | { 29 | "key": { 30 | "major_key": "Key-A", 31 | "minor_key_string": "BID" 32 | }, 33 | "data": {"double_val": 6.0 34 | }, 35 | 36 | "timestamp": "2000-01-01T00:00:06Z", 37 | "time": { 38 | "advance_watermark_seconds": 5 39 | } 40 | }, 41 | 42 | { 43 | "key": { 44 | "major_key": "Key-A", 45 | "minor_key_string": "PRICE" 46 | }, 47 | "data": {"double_val": 10.0 48 | }, 49 | "extendedData": { 50 | "Data::VOL": { 51 | "doubleVal": 2.0 52 | } 53 | }, 54 | 55 | "timestamp": "2000-01-01T00:00:11Z", 56 | "time": { 57 | "advance_watermark_expression": "INFINITY" 58 | } 59 | } 60 | ] -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/combiners/BTypeOne.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.combiners; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 22 | import java.io.Serializable; 23 | 24 | /** Allows for type one metrics to be added to the system. */ 25 | public abstract class BTypeOne implements Serializable { 26 | 27 | /** Define how a datapoints values should be added to a TSAccum for a type one computation */ 28 | public abstract TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint); 29 | 30 | /** Define how two accums should be merged for the type one computation */ 31 | public abstract TSAccum mergeDataAccums(TSAccum a, TSAccum b); 32 | } 33 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/combiners/TSCombiner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.combiners; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | 22 | @Experimental 23 | /** Place holder, will be used in future as way to optimise the DAG for type 2 computations. */ 24 | public interface TSCombiner {} 25 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/combiners/typeone/TSCategoricalCombiner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.combiners.typeone; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 22 | import com.google.dataflow.sample.timeseriesflow.combiners.TSCombiner; 23 | import org.apache.beam.sdk.annotations.Experimental; 24 | 25 | /** Not implemented. */ 26 | @Experimental 27 | public class TSCategoricalCombiner extends TSBaseCombiner implements TSCombiner { 28 | 29 | // TODO implement in next cycle. 30 | private TSCategoricalCombiner() {} 31 | 32 | public static TSCategoricalCombiner combine() { 33 | return new TSCategoricalCombiner(); 34 | } 35 | 36 | @Override 37 | public TSAccum mergeTypedDataAccum(TSAccum a, TSAccum b) { 38 | 39 | return null; 40 | } 41 | 42 | @Override 43 | public TSAccum addTypeSpecificInput(TSAccum accumulator, TSDataPoint dataPoint) { 44 | // AccumCategoricalBuilder accumStoreCoreCategorical = new AccumCategoricalBuilder(accum); 45 | // accumStoreCoreCategorical.setDOW(createNumData(time.toDateTime().dayOfWeek().get())); 46 | // accumStoreCoreCategorical.setDOM(createNumData(time.toDateTime().dayOfMonth().get())); 47 | // accumStoreCoreCategorical.setYY(createNumData(time.toDateTime().year().get())); 48 | 49 | return null; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/common/TupleTypes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.common; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 21 | import org.apache.beam.sdk.annotations.Experimental; 22 | import org.apache.beam.sdk.values.TupleTag; 23 | 24 | @Experimental 25 | /** Used for filter operations within aggregations. */ 26 | public class TupleTypes { 27 | 28 | public static TupleTag t_str = new TupleTag() {}; 29 | public static TupleTag t_int = new TupleTag() {}; 30 | public static TupleTag t_double = new TupleTag() {}; 31 | public static TupleTag t_long = new TupleTag() {}; 32 | public static TupleTag t_float = new TupleTag() {}; 33 | } 34 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/datamap/AccumCategoricalBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.datamap; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.DerivedAggregations.Indicators; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.Data; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 23 | import org.apache.beam.sdk.annotations.Experimental; 24 | 25 | @Experimental 26 | /** Builder for the as yet un- implemented Categorical values. */ 27 | public class AccumCategoricalBuilder extends AccumCoreMetadataBuilder { 28 | 29 | public AccumCategoricalBuilder(TSAccum tsAccum) { 30 | super(tsAccum); 31 | } 32 | 33 | public Data getDOWOrNull() { 34 | return getValueOrNull(Indicators.DOW.name()); 35 | } 36 | 37 | public Data getDOMOrNull() { 38 | return getValueOrNull(Indicators.DOM.name()); 39 | } 40 | 41 | public Data getYYOrNull() { 42 | return getValueOrNull(Indicators.YY.name()); 43 | } 44 | 45 | public void setDOW(Data data) { 46 | setValue(Indicators.DOW.name(), data); 47 | } 48 | 49 | public void setDOM(Data data) { 50 | setValue(Indicators.DOM.name(), data); 51 | } 52 | 53 | public void setYY(Data data) { 54 | setValue(Indicators.YY.name(), data); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/datamap/AccumCoreNumericBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.datamap; 19 | 20 | import com.google.common.base.Preconditions; 21 | import com.google.dataflow.sample.timeseriesflow.DerivedAggregations.Indicators; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.Data; 23 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 24 | import org.apache.beam.sdk.annotations.Experimental; 25 | 26 | @Experimental 27 | /** Accum Numeric Builder, dealing with common numeric aggregations Sum, Min, Max, First, Last. */ 28 | public class AccumCoreNumericBuilder extends AccumCoreMetadataBuilder { 29 | 30 | public AccumCoreNumericBuilder(TSAccum tsAccum) { 31 | super(tsAccum); 32 | } 33 | 34 | public Data getSumOrNull() { 35 | return getValueOrNull(Indicators.SUM.name()); 36 | } 37 | 38 | public Data getMaxOrNull() { 39 | return getValueOrNull(Indicators.MAX.name()); 40 | } 41 | 42 | public Data getMinOrNull() { 43 | return getValueOrNull(Indicators.MIN.name()); 44 | } 45 | 46 | public void setSum(Data data) { 47 | Preconditions.checkNotNull(data); 48 | setValue(Indicators.SUM.name(), data); 49 | } 50 | 51 | public void setMax(Data data) { 52 | 53 | Preconditions.checkNotNull(data); 54 | setValue(Indicators.MAX.name(), data); 55 | } 56 | 57 | public void setMin(Data data) { 58 | Preconditions.checkNotNull(data); 59 | setValue(Indicators.MIN.name(), data); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/BTypeTwoFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey; 23 | import org.apache.beam.sdk.options.PipelineOptions; 24 | import org.apache.beam.sdk.transforms.Contextful; 25 | import org.apache.beam.sdk.transforms.Contextful.Fn; 26 | import org.apache.beam.sdk.transforms.SerializableFunction; 27 | import org.apache.beam.sdk.values.KV; 28 | 29 | public abstract class BTypeTwoFn { 30 | 31 | public abstract SerializableFunction, KV> getFunction( 32 | PipelineOptions options); 33 | 34 | public Contextful, KV>> getContextualFn( 35 | PipelineOptions options) { 36 | return Contextful.fn(getFunction(options)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/CTypeTwo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey; 23 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne; 24 | import java.util.List; 25 | import org.apache.beam.sdk.transforms.PTransform; 26 | import org.apache.beam.sdk.values.KV; 27 | import org.apache.beam.sdk.values.PCollection; 28 | 29 | /** 30 | * Complex Type 2 metrics require access to the TSFlow graph to create multi stage computations. If 31 | * a computation is a simple f(KV) -> KV make use of {@link 32 | * BTypeTwo} 33 | */ 34 | public abstract class CTypeTwo 35 | extends PTransform>, PCollection>> { 36 | 37 | /** 38 | * Some type 2 computations may need specialized type 1 computations. {@link 39 | * com.google.dataflow.sample.timeseriesflow.graph.GenerateComputations} class will attach any 40 | * provided by this list. If none required return an empty list. 41 | */ 42 | public abstract List> requiredTypeOne(); 43 | 44 | public abstract List excludeFromOutput(); 45 | } 46 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/TestMax.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone; 19 | 20 | import com.google.common.annotations.VisibleForTesting; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 23 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne; 24 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils; 25 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder; 26 | import org.apache.beam.sdk.annotations.Experimental; 27 | 28 | @VisibleForTesting 29 | @Experimental 30 | /** This is a dummy class used as a test artifact only. */ 31 | public class TestMax extends BTypeOne { 32 | @Override 33 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) { 34 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator); 35 | coreNumeric.setMax(TSDataUtils.findMaxValue(coreNumeric.getMaxOrNull(), dataPoint.getData())); 36 | return coreNumeric.build(); 37 | } 38 | 39 | @Override 40 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) { 41 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a); 42 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b); 43 | aBuilder.setMax(TSDataUtils.findMaxValue(aBuilder.getMaxOrNull(), bBuilder.getMaxOrNull())); 44 | 45 | return aBuilder.build(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/TestMin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone; 19 | 20 | import com.google.common.annotations.VisibleForTesting; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 23 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne; 24 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils; 25 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder; 26 | import org.apache.beam.sdk.annotations.Experimental; 27 | 28 | @VisibleForTesting 29 | @Experimental 30 | /** This is a dummy class used as a test artifact only. */ 31 | public class TestMin extends BTypeOne { 32 | @Override 33 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) { 34 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator); 35 | coreNumeric.setMin(TSDataUtils.findMinData(coreNumeric.getMinOrNull(), dataPoint.getData())); 36 | return coreNumeric.build(); 37 | } 38 | 39 | @Override 40 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) { 41 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a); 42 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b); 43 | aBuilder.setMin(TSDataUtils.findMinData(aBuilder.getMinOrNull(), bBuilder.getMinOrNull())); 44 | 45 | return aBuilder.build(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/TestSum.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone; 19 | 20 | import com.google.common.annotations.VisibleForTesting; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 23 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne; 24 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils; 25 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder; 26 | import org.apache.beam.sdk.annotations.Experimental; 27 | 28 | @VisibleForTesting 29 | @Experimental 30 | /** This is a dummy class used as a test artifact only. */ 31 | public class TestSum extends BTypeOne { 32 | @Override 33 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) { 34 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator); 35 | coreNumeric.setSum( 36 | CommonUtils.sumNumericDataNullAsZero(coreNumeric.getSumOrNull(), dataPoint.getData())); 37 | return coreNumeric.build(); 38 | } 39 | 40 | @Override 41 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) { 42 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a); 43 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b); 44 | aBuilder.setSum( 45 | CommonUtils.sumNumericDataNullAsZero(aBuilder.getSumOrNull(), bBuilder.getSumOrNull())); 46 | 47 | return aBuilder.build(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typetwo/Test1Fn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typetwo; 19 | 20 | import com.google.common.annotations.VisibleForTesting; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence; 23 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey; 24 | import com.google.dataflow.sample.timeseriesflow.metrics.BTypeTwoFn; 25 | import org.apache.beam.sdk.annotations.Experimental; 26 | import org.apache.beam.sdk.options.PipelineOptions; 27 | import org.apache.beam.sdk.transforms.SerializableFunction; 28 | import org.apache.beam.sdk.values.KV; 29 | 30 | @VisibleForTesting 31 | @Experimental 32 | /** This is a dummy class used as a test artifact only. */ 33 | public class Test1Fn extends BTypeTwoFn { 34 | 35 | public SerializableFunction, KV> getFunction( 36 | PipelineOptions options) { 37 | 38 | return (SerializableFunction, KV>) 39 | element -> { 40 | return KV.of(element.getKey(), TSAccum.newBuilder().build()); 41 | }; 42 | } 43 | 44 | /** Options for {@link Test1Fn} fn. */ 45 | public static interface TestFnOptions extends PipelineOptions {} 46 | } 47 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typetwo/Test2Fn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typetwo; 19 | 20 | import com.google.common.annotations.VisibleForTesting; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence; 23 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey; 24 | import com.google.dataflow.sample.timeseriesflow.metrics.BTypeTwoFn; 25 | import org.apache.beam.sdk.annotations.Experimental; 26 | import org.apache.beam.sdk.options.PipelineOptions; 27 | import org.apache.beam.sdk.transforms.SerializableFunction; 28 | import org.apache.beam.sdk.values.KV; 29 | 30 | @VisibleForTesting 31 | @Experimental 32 | /** This is a dummy class used as a test artifact only. */ 33 | public class Test2Fn extends BTypeTwoFn { 34 | 35 | public SerializableFunction, KV> getFunction( 36 | PipelineOptions options) { 37 | 38 | return (SerializableFunction, KV>) 39 | element -> { 40 | return KV.of(element.getKey(), TSAccum.newBuilder().build()); 41 | }; 42 | } 43 | 44 | /** Options for {@link Test2Fn} fn. */ 45 | public static interface TestFnOptions extends PipelineOptions {} 46 | } 47 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/options/GapFillOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.options; 19 | 20 | import org.apache.beam.sdk.options.Default; 21 | import org.apache.beam.sdk.options.Description; 22 | import org.apache.beam.sdk.options.PipelineOptions; 23 | 24 | public interface GapFillOptions extends PipelineOptions { 25 | 26 | @Description("Is Gap filling enabled.") 27 | Boolean isGapFillEnabled(); 28 | 29 | void setGapFillEnabled(Boolean ttlDurationSecs); 30 | 31 | @Description( 32 | "The time to live value for how long after a key does not output values we continue to gap fill.") 33 | Integer getTTLDurationSecs(); 34 | 35 | void setTTLDurationSecs(Integer ttlDurationSecs); 36 | 37 | @Description( 38 | "The absolute time EpocMilli to stop gap filling, used in bootstrap pipelines when working with Bounded sources") 39 | Long getAbsoluteStopTimeMSTimestamp(); 40 | 41 | void setAbsoluteStopTimeMSTimestamp(Long absoluteStopTimeMSTimestamp); 42 | 43 | @Description( 44 | "Enable hold and propagate of last known value for key, within the TTLDurationSec to gaps.") 45 | @Default.Boolean(false) 46 | Boolean getEnableHoldAndPropogateLastValue(); 47 | 48 | void setEnableHoldAndPropogateLastValue(Boolean enableHoldAndPropogate); 49 | } 50 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/options/GenerateComputationsOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.options; 19 | 20 | import java.util.List; 21 | import org.apache.beam.sdk.annotations.Experimental; 22 | import org.apache.beam.sdk.options.Description; 23 | import org.apache.beam.sdk.options.PipelineOptions; 24 | 25 | @Experimental 26 | /** 27 | * PipelineOptions to allow the Out of the box metrics to be called by name. For custom metrics use 28 | * {@link com.google.dataflow.sample.timeseriesflow.graph.GenerateComputations.Builder} . 29 | */ 30 | public interface GenerateComputationsOptions extends PipelineOptions { 31 | 32 | @Description("Type one computations, for example typeone.Sum") 33 | List getTypeOneBasicMetrics(); 34 | 35 | @Description("Type one computations, for example typeone.Sum") 36 | void setTypeOneBasicMetrics(List value); 37 | 38 | @Description("Type two basic computations (order is preserved), for example typetwo.basic.BB") 39 | List getTypeTwoBasicMetrics(); 40 | 41 | @Description("Type two basic computations (order is preserved), for example typetwo.basic.BB") 42 | void setTypeTwoBasicMetrics(List typeTwoBasicMetrics); 43 | 44 | @Description( 45 | "Type two basic computations (order is preserved), for example typetwo.complex.fsi.RSIGFn") 46 | List getTypeTwoComplexMetrics(); 47 | 48 | @Description( 49 | "Type two basic computations (order is preserved), for example typetwo.complex.fsi.RSIGFn") 50 | void setTypeTwoComplexMetrics(List typeTwoBasicMetrics); 51 | } 52 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/options/TFXOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.options.Description; 22 | 23 | @Experimental 24 | public interface TFXOptions extends TSFlowOptions { 25 | 26 | @Description("Metadata interchange location") 27 | public String getInterchangeLocation(); 28 | 29 | @Description("Metadata interchange location") 30 | public void setInterchangeLocation(String interchangeLocation); 31 | } 32 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/options/TSFlowOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.options; 19 | 20 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; 21 | import org.apache.beam.sdk.annotations.Experimental; 22 | import org.apache.beam.sdk.options.Description; 23 | 24 | @Experimental 25 | public interface TSFlowOptions 26 | extends DataflowPipelineOptions, 27 | GapFillOptions, 28 | GenerateComputationsOptions, 29 | TSOutputPipelineOptions { 30 | 31 | @Description("Fixed window length for type 1 computations in seconds") 32 | Integer getTypeOneComputationsLengthInSecs(); 33 | 34 | void setTypeOneComputationsLengthInSecs(Integer value); 35 | 36 | @Description("Sliding window length for type 2 computations in seconds") 37 | Integer getTypeTwoComputationsLengthInSecs(); 38 | 39 | void setTypeTwoComputationsLengthInSecs(Integer value); 40 | 41 | @Description("Sliding window offset length for type 2 computations in seconds") 42 | Integer getTypeTwoComputationsOffsetLengthInSecs(); 43 | 44 | void setTypeTwoComputationsOffsetLengthInSecs(Integer value); 45 | 46 | @Description( 47 | "The number of timesteps that will be output is based on this value divided by the type 1 fixed window length.") 48 | Integer getOutputTimestepLengthInSecs(); 49 | 50 | void setOutputTimestepLengthInSecs(Integer value); 51 | } 52 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/options/TSOutputPipelineOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.options; 19 | 20 | import org.apache.beam.sdk.annotations.Experimental; 21 | import org.apache.beam.sdk.options.PipelineOptions; 22 | 23 | @Experimental 24 | public interface TSOutputPipelineOptions extends PipelineOptions { 25 | 26 | String getBigQueryTableForTSAccumOutputLocation(); 27 | 28 | void setBigQueryTableForTSAccumOutputLocation(String bigQueryTableForTSAccumOutputLocation); 29 | 30 | String getPubSubTopicForTSAccumOutputLocation(); 31 | 32 | void setPubSubTopicForTSAccumOutputLocation(String pubSubTopicForTSAccumOutputLocation); 33 | } 34 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/transforms/ParseTSDataPointFromBytes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.transforms; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 21 | import com.google.protobuf.InvalidProtocolBufferException; 22 | import com.google.protobuf.util.Timestamps; 23 | import org.apache.beam.sdk.annotations.Experimental; 24 | import org.apache.beam.sdk.transforms.DoFn; 25 | import org.joda.time.Instant; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | /** Return a {@link TSDataPoint} from bytes. */ 30 | @Experimental 31 | public class ParseTSDataPointFromBytes extends DoFn { 32 | 33 | private static final Logger LOG = LoggerFactory.getLogger(ParseTSDataPointFromBytes.class); 34 | 35 | public static ParseTSDataPointFromBytes create() { 36 | return new ParseTSDataPointFromBytes(); 37 | } 38 | 39 | @ProcessElement 40 | public void process(@Element byte[] input, OutputReceiver o) { 41 | try { 42 | TSDataPoint dataPoint = TSDataPoint.parseFrom(input); 43 | o.outputWithTimestamp( 44 | dataPoint, Instant.ofEpochMilli(Timestamps.toMillis(dataPoint.getTimestamp()))); 45 | } catch (InvalidProtocolBufferException e) { 46 | LOG.error(e.getMessage()); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/transforms/TSAccumSequenceToRow.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.transforms; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence; 21 | import org.apache.beam.sdk.annotations.Experimental; 22 | import org.apache.beam.sdk.extensions.protobuf.ProtoMessageSchema; 23 | import org.apache.beam.sdk.schemas.transforms.Convert; 24 | import org.apache.beam.sdk.transforms.PTransform; 25 | import org.apache.beam.sdk.values.PCollection; 26 | import org.apache.beam.sdk.values.Row; 27 | 28 | @Experimental 29 | /** Convert a {@link TSAccumSequence} to a {@link Row}. */ 30 | public class TSAccumSequenceToRow 31 | extends PTransform, PCollection> { 32 | @Override 33 | public PCollection expand(PCollection input) { 34 | 35 | input 36 | .getPipeline() 37 | .getSchemaRegistry() 38 | .registerSchemaProvider(TSAccumSequence.class, new ProtoMessageSchema()); 39 | 40 | return input.apply(Convert.toRows()); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/transforms/TSAccumToJson.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.transforms; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey; 22 | import com.google.protobuf.InvalidProtocolBufferException; 23 | import com.google.protobuf.util.JsonFormat; 24 | import org.apache.beam.sdk.annotations.Experimental; 25 | import org.apache.beam.sdk.transforms.MapElements; 26 | import org.apache.beam.sdk.transforms.PTransform; 27 | import org.apache.beam.sdk.values.KV; 28 | import org.apache.beam.sdk.values.PCollection; 29 | import org.apache.beam.sdk.values.TypeDescriptors; 30 | 31 | @Experimental 32 | /** Return a Json representation of the TSAccum object. */ 33 | public class TSAccumToJson 34 | extends PTransform>, PCollection> { 35 | 36 | public static TSAccumToJson create() { 37 | return new TSAccumToJson(); 38 | } 39 | 40 | private TSAccumToJson() {}; 41 | 42 | @Override 43 | public PCollection expand(PCollection> input) { 44 | return input.apply( 45 | MapElements.into(TypeDescriptors.strings()) 46 | .via( 47 | x -> { 48 | String json = null; 49 | try { 50 | json = JsonFormat.printer().print(x.getValue().toBuilder().setKey(x.getKey())); 51 | } catch (InvalidProtocolBufferException e) { 52 | json = String.format("{error: %s}", e.getMessage()); 53 | } 54 | return json; 55 | })); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/transforms/TSDataPointToRow.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.transforms; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 21 | import org.apache.beam.sdk.annotations.Experimental; 22 | import org.apache.beam.sdk.extensions.protobuf.ProtoMessageSchema; 23 | import org.apache.beam.sdk.schemas.transforms.Convert; 24 | import org.apache.beam.sdk.transforms.PTransform; 25 | import org.apache.beam.sdk.values.PCollection; 26 | import org.apache.beam.sdk.values.Row; 27 | 28 | @Experimental 29 | /** Convert {@link TSDataPoint} to {@link Row} */ 30 | public class TSDataPointToRow extends PTransform, PCollection> { 31 | @Override 32 | public PCollection expand(PCollection input) { 33 | 34 | input 35 | .getPipeline() 36 | .getSchemaRegistry() 37 | .registerSchemaProvider(TSDataPoint.class, new ProtoMessageSchema()); 38 | 39 | return input.apply(Convert.toRows()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/transforms/TypeTwoComputation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.transforms; 19 | 20 | import java.lang.annotation.Retention; 21 | import java.lang.annotation.RetentionPolicy; 22 | 23 | @Retention(RetentionPolicy.RUNTIME) 24 | public @interface TypeTwoComputation { 25 | ComputeType computeType(); 26 | 27 | enum ComputeType { 28 | SINGLE_KEY, 29 | COMPOSITE_KEY 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/proto/TFExampleKeys.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more contributor license 3 | * agreements. See the NOTICE file distributed with this work for additional information regarding 4 | * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the 5 | * "License"); you may not use this file except in compliance with the License. You may obtain a 6 | * copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License 11 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | * or implied. See the License for the specific language governing permissions and limitations under 13 | * the License. 14 | */ 15 | syntax = "proto3"; 16 | 17 | package protos; 18 | 19 | option java_outer_classname = "TimeSeriesTFExampleKeys"; 20 | option java_multiple_files = false; 21 | option java_package = "com.google.dataflow.sample.timeseriesflow"; 22 | 23 | enum ExampleMetadata { 24 | METADATA_SPAN_START_TS = 0; 25 | METADATA_SPAN_END_TS = 1; 26 | METADATA_MAJOR_KEY = 3; 27 | METADATA_MINOR_KEY = 4; 28 | } 29 | 30 | enum ExampleTypes{ 31 | BYTE = 0; 32 | INT64 = 1; 33 | FLOAT = 2; 34 | } -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/proto/TSBaseKeys.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more contributor license 3 | * agreements. See the NOTICE file distributed with this work for additional information regarding 4 | * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the 5 | * "License"); you may not use this file except in compliance with the License. You may obtain a 6 | * copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License 11 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | * or implied. See the License for the specific language governing permissions and limitations under 13 | * the License. 14 | */ 15 | syntax = "proto3"; 16 | 17 | package protos; 18 | 19 | option java_outer_classname = "DerivedAggregations"; 20 | option java_multiple_files = false; 21 | option java_package = "com.google.dataflow.sample.timeseriesflow"; 22 | 23 | /** 24 | This proto stores all of the keys used in the accum map. 25 | 26 | */ 27 | enum Indicators{ 28 | 29 | DATA_POINT_COUNT = 0; 30 | 31 | SUM = 1; 32 | MIN = 2; 33 | MAX = 3; 34 | FIRST = 4; 35 | LAST = 5; 36 | 37 | // First timestamp within a aggregation 38 | FIRST_TIMESTAMP = 6; 39 | 40 | // Last timestamp within a aggregation 41 | LAST_TIMESTAMP = 7; 42 | 43 | // Day Of Week . In TSAccum this will be from First Value seen. UTC 44 | DOW = 8; 45 | // Day of Month. In TSAccum this will be from First Value seen. UTC 46 | DOM = 9; 47 | // Year. In TSAccum this will be from First Value seen. UTC 48 | YY = 10; 49 | // Indication if a value is generated via GapFill routine 50 | HB = 11; 51 | 52 | } 53 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/PerfectRectanglesScalability_5Days_5Keys_EvenGaps.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import org.apache.beam.runners.dataflow.DataflowRunner; 21 | import org.apache.beam.sdk.Pipeline; 22 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 23 | 24 | public class PerfectRectanglesScalability_5Days_5Keys_EvenGaps { 25 | 26 | public static void main(String args[]) { 27 | System.out.println("Running 1 Day 86400 with 1 Key"); 28 | 29 | ScaleTestingOptions options = 30 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class); 31 | 32 | options.setAppName("TestPerfectRectangles_86400S_NoGaps"); 33 | options.setTypeOneComputationsLengthInSecs(1); 34 | options.setTypeTwoComputationsLengthInSecs(60); 35 | options.setOutputTimestepLengthInSecs(60); 36 | options.setTTLDurationSecs(2); 37 | options.setNumKeys(5); 38 | options.setPerfectRecNumberDataSecs(86400 * 5); 39 | options.setRunner(DataflowRunner.class); 40 | options.setSkipEvens(true); 41 | 42 | Pipeline p = Pipeline.create(options); 43 | 44 | PerfectRectangleUtils.testPerfectRecScalability(p); 45 | 46 | long time = System.currentTimeMillis(); 47 | p.run(); 48 | System.out.println(System.currentTimeMillis() - time); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/PerfectRectanglesScalability_86400S_AllGaps.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import org.apache.beam.sdk.Pipeline; 21 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 22 | 23 | public class PerfectRectanglesScalability_86400S_AllGaps { 24 | 25 | public static void main(String args[]) { 26 | System.out.println("Running 1 Day 86400 with 1 Key"); 27 | 28 | ScaleTestingOptions options = 29 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class); 30 | 31 | options.setAppName("TestPerfectRectangles_86400S_NoGaps"); 32 | options.setTypeOneComputationsLengthInSecs(1); 33 | options.setTypeTwoComputationsLengthInSecs(60); 34 | options.setOutputTimestepLengthInSecs(60); 35 | options.setTTLDurationSecs(86400); 36 | options.setNumKeys(1); 37 | options.setPerfectRecNumberDataSecs(1); 38 | options.setSkipEvens(false); 39 | 40 | Pipeline p = Pipeline.create(options); 41 | 42 | PerfectRectangleUtils.testPerfectRecScalability(p); 43 | 44 | long time = System.currentTimeMillis(); 45 | p.run(); 46 | System.out.println(System.currentTimeMillis() - time); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/PerfectRectanglesScalability_86400S_EvenGaps.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import org.apache.beam.runners.dataflow.DataflowRunner; 21 | import org.apache.beam.sdk.Pipeline; 22 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 23 | 24 | public class PerfectRectanglesScalability_86400S_EvenGaps { 25 | 26 | public static void main(String args[]) { 27 | System.out.println("Running 1 Day 86400 with 1 Key"); 28 | 29 | ScaleTestingOptions options = 30 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class); 31 | 32 | options.setAppName("TestPerfectRectangles_86400S_NoGaps"); 33 | options.setTypeOneComputationsLengthInSecs(1); 34 | options.setTypeTwoComputationsLengthInSecs(60); 35 | options.setOutputTimestepLengthInSecs(60); 36 | options.setTTLDurationSecs(2); 37 | options.setNumKeys(1); 38 | options.setPerfectRecNumberDataSecs(86400); 39 | options.setRunner(DataflowRunner.class); 40 | options.setNumFeatures(1); 41 | options.setSkipEvens(true); 42 | 43 | Pipeline p = Pipeline.create(options); 44 | 45 | PerfectRectangleUtils.testPerfectRecScalability(p); 46 | 47 | long time = System.currentTimeMillis(); 48 | p.run(); 49 | System.out.println(System.currentTimeMillis() - time); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/PerfectRectanglesScalability_86400S_NoGaps.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey; 22 | import org.apache.beam.sdk.Pipeline; 23 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 24 | import org.apache.beam.sdk.values.KV; 25 | import org.apache.beam.sdk.values.PCollection; 26 | 27 | public class PerfectRectanglesScalability_86400S_NoGaps { 28 | 29 | public static void main(String args[]) { 30 | System.out.println("Running 1 Day 86400 with 1 Key"); 31 | 32 | ScaleTestingOptions options = 33 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class); 34 | 35 | options.setAppName("TestPerfectRectangles_86400S_NoGaps"); 36 | options.setTypeOneComputationsLengthInSecs(1); 37 | options.setTypeTwoComputationsLengthInSecs(60); 38 | options.setOutputTimestepLengthInSecs(60); 39 | options.setTTLDurationSecs(0); 40 | options.setNumKeys(1); 41 | options.setPerfectRecNumberDataSecs(86400); 42 | options.setSkipEvens(false); 43 | 44 | Pipeline p = Pipeline.create(options); 45 | 46 | PCollection> results = 47 | PerfectRectangleUtils.testPerfectRecScalability(p); 48 | 49 | long time = System.currentTimeMillis(); 50 | p.run(); 51 | System.out.println(System.currentTimeMillis() - time); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/ScaleTestingOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.options.TSFlowOptions; 21 | 22 | public interface ScaleTestingOptions extends TSFlowOptions { 23 | 24 | public Integer getNumSecs(); 25 | 26 | public void setNumSecs(Integer typeOneComputationsLengthInSecs); 27 | 28 | public Integer getNumFeatures(); 29 | 30 | public void setNumFeatures(Integer typeOneComputationsLengthInSecs); 31 | 32 | public Integer getNumKeys(); 33 | 34 | public void setNumKeys(Integer numKeys); 35 | 36 | public Boolean getWithFileOutput(); 37 | 38 | public void setWithFileOutput(Boolean withFileOutput); 39 | 40 | public Integer getPerfectRecNumberDataSecs(); 41 | 42 | public void setPerfectRecNumberDataSecs(Integer endTimestamp); 43 | 44 | public Boolean getSkipEvens(); 45 | 46 | public void setSkipEvens(Boolean skipEvens); 47 | } 48 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/SnapShotScalability_1D_100M_100K_1FW_60SW_withTFExampleSerlization.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence; 21 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils; 22 | import com.google.dataflow.sample.timeseriesflow.io.tfexample.FeaturesFromIterableAccumSequence; 23 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions; 24 | import org.apache.beam.sdk.Pipeline; 25 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 26 | import org.apache.beam.sdk.values.PCollection; 27 | 28 | public class SnapShotScalability_1D_100M_100K_1FW_60SW_withTFExampleSerlization { 29 | 30 | public static void main(String args[]) { 31 | System.out.println("Running 1 Day with 1 Key and 100 features @ Type 1 1 sec Type 2 60 sec"); 32 | 33 | ScaleTestingOptions options = 34 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class); 35 | 36 | options.setAppName("SimpleDataStreamTSDataPoints"); 37 | options.setTypeOneComputationsLengthInSecs(1); 38 | options.setTypeTwoComputationsLengthInSecs(60); 39 | options.setOutputTimestepLengthInSecs(60); 40 | options.setNumKeys(100); 41 | options.setNumSecs(86400); 42 | options.setNumFeatures(100); 43 | 44 | Pipeline p = Pipeline.create(options); 45 | 46 | PCollection> examples = SnapShotUtils.testSnapShotScalability(p); 47 | 48 | Integer timesteps = CommonUtils.getNumOfSequenceTimesteps(p.getOptions().as(TFXOptions.class)); 49 | 50 | examples.apply(new FeaturesFromIterableAccumSequence(timesteps, true)); 51 | 52 | p.run(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/SnapShotScalability_1D_100M_1K_1FW_60SW.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import org.apache.beam.sdk.Pipeline; 21 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 22 | 23 | public class SnapShotScalability_1D_100M_1K_1FW_60SW { 24 | 25 | public static void main(String args[]) { 26 | System.out.println("Running 1 Day with 1 Key and 100 features @ Type 1 1 sec Type 2 60 sec"); 27 | 28 | ScaleTestingOptions options = 29 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class); 30 | 31 | options.setAppName("SimpleDataStreamTSDataPoints"); 32 | options.setTypeOneComputationsLengthInSecs(1); 33 | options.setTypeTwoComputationsLengthInSecs(60); 34 | options.setOutputTimestepLengthInSecs(60); 35 | options.setNumKeys(1); 36 | options.setNumSecs(86400); 37 | options.setNumFeatures(1); 38 | 39 | Pipeline p = Pipeline.create(options); 40 | 41 | SnapShotUtils.testSnapShotScalability(p); 42 | 43 | p.run(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/SnapShotScalability_1D_100M_1K_1FW_60SW_withTFExampleSerlization.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence; 21 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils; 22 | import com.google.dataflow.sample.timeseriesflow.io.tfexample.FeaturesFromIterableAccumSequence; 23 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions; 24 | import org.apache.beam.sdk.Pipeline; 25 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 26 | import org.apache.beam.sdk.values.PCollection; 27 | 28 | public class SnapShotScalability_1D_100M_1K_1FW_60SW_withTFExampleSerlization { 29 | 30 | public static void main(String args[]) { 31 | System.out.println("Running 1 Day with 1 Key and 100 features @ Type 1 1 sec Type 2 60 sec"); 32 | 33 | ScaleTestingOptions options = 34 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class); 35 | 36 | options.setAppName("SimpleDataStreamTSDataPoints"); 37 | options.setTypeOneComputationsLengthInSecs(1); 38 | options.setTypeTwoComputationsLengthInSecs(60); 39 | options.setOutputTimestepLengthInSecs(60); 40 | options.setNumKeys(1); 41 | options.setNumSecs(86400); 42 | options.setNumFeatures(100); 43 | 44 | Pipeline p = Pipeline.create(options); 45 | 46 | PCollection> examples = SnapShotUtils.testSnapShotScalability(p); 47 | 48 | Integer timesteps = CommonUtils.getNumOfSequenceTimesteps(p.getOptions().as(TFXOptions.class)); 49 | 50 | examples.apply(new FeaturesFromIterableAccumSequence(timesteps, true)); 51 | 52 | p.run(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/TestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.google.dataflow.sample.timeseriesflow.test; 19 | 20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint; 21 | import com.google.protobuf.util.Timestamps; 22 | import org.apache.beam.sdk.values.TimestampedValue; 23 | import org.joda.time.Instant; 24 | 25 | public class TestUtils { 26 | 27 | public static TimestampedValue timestampedValueFromTSDataPoint( 28 | TSDataPoint tsDataPoint) { 29 | 30 | return TimestampedValue.of( 31 | tsDataPoint, Instant.ofEpochMilli(Timestamps.toMillis(tsDataPoint.getTimestamp()))); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/proto/TSTest.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more contributor license 3 | * agreements. See the NOTICE file distributed with this work for additional information regarding 4 | * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the 5 | * "License"); you may not use this file except in compliance with the License. You may obtain a 6 | * copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License 11 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | * or implied. See the License for the specific language governing permissions and limitations under 13 | * the License. 14 | */ 15 | syntax = "proto3"; 16 | 17 | package protos; 18 | 19 | import "google/protobuf/duration.proto"; 20 | import "google/protobuf/timestamp.proto"; 21 | 22 | option java_outer_classname = "TimeSeriesDataTest"; 23 | option java_multiple_files = false; 24 | option java_package = "com.google.dataflow.sample.timeseriesflow"; 25 | 26 | message Time { 27 | oneof time_point { 28 | 29 | AdvanceWatermarkExpression advance_watermark_expression = 1; 30 | 31 | int32 advance_watermark_seconds = 2; 32 | 33 | int32 advance_processing_timeSeconds = 3; 34 | } 35 | } 36 | 37 | message TSTimePointTest { 38 | Time time = 1; 39 | 40 | } 41 | 42 | enum AdvanceWatermarkExpression { 43 | LATE_ALLOWED = 0; 44 | TOO_LATE = 1; 45 | INFINITY = 2; 46 | } -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/resources/CreateCompositeTSAccumTest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "key": { 4 | "major_key": "Key-A", 5 | "minor_key_string": "MKey-a" 6 | }, 7 | "data": {"double_val": 1.0 8 | } 9 | }, 10 | { 11 | "key": { 12 | "major_key": "Key-A", 13 | "minor_key_string": "MKey-b" 14 | }, 15 | "data": { 16 | "double_val": 10.0 17 | }, 18 | "time": { 19 | "advance_watermark_seconds": 5 20 | }, 21 | "time": { 22 | "advance_watermark_expression": "INFINITY" 23 | } 24 | } 25 | ] -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // Needed for spotless apply 20 | buildscript { repositories { mavenCentral() }} 21 | 22 | // All common configuration 23 | allprojects { 24 | 25 | ext { 26 | beamVersion = '2.41.0' 27 | protobuf_version = '3.19.4' 28 | autoValueVersion = '1.6.3' 29 | } 30 | 31 | repositories { 32 | mavenCentral() 33 | maven { 34 | url "https://packages.confluent.io/maven" 35 | } 36 | } 37 | wrapper { 38 | gradleVersion = '6.8' //version required 39 | } 40 | } 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-java-applications/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.8-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-java-applications/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * License); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an AS IS BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | rootProject.name = 'timeseries-java-applications' 19 | 20 | include 'TimeSeriesPipeline' 21 | include 'TimeSeriesMetricsLibrary' 22 | include 'SyntheticExamples' 23 | include 'Examples' 24 | 25 | include 'Adapters' 26 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/encoder_decoder/encoder_decoder_preprocessing.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import Dict, Text, Any 17 | import tensorflow as tf 18 | import tensorflow_transform as tft 19 | from ml_pipeline.timeseries.utils import timeseries_transform_utils 20 | 21 | 22 | def preprocessing_fn(inputs: Dict[Text, Any], 23 | custom_config: Dict[Text, Any]) -> Dict[Text, Any]: 24 | """tf.transform's callback function for preprocessing inputs. 25 | 26 | Args: 27 | inputs: map from feature keys to raw not-yet-transformed features. 28 | custom_config: 29 | timesteps: The number of timesteps in the look back window 30 | features: Which of the features from the TF.Example to use in the model. 31 | 32 | Returns: 33 | Map from string feature key to transformed feature operations. 34 | """ 35 | timesteps = custom_config['timesteps'] 36 | 37 | outputs = inputs.copy() 38 | 39 | # Generate features to be used in the model 40 | train_x_tensors = timeseries_transform_utils.create_feature_list_from_dict( 41 | outputs, custom_config) 42 | 43 | # Scale the inputs with the exception of TIMESTAMPS 44 | 45 | for key in train_x_tensors: 46 | # TODO provide option for user to enable / disable -Timestamp scale 47 | # if not str(key).endswith('-TIMESTAMP'): 48 | train_x_tensors[key] = tft.scale_to_z_score(train_x_tensors[key]) 49 | 50 | train_x_values = [train_x_tensors[k] for k in sorted(train_x_tensors)] 51 | 52 | float32 = tf.reshape( 53 | tf.stack(train_x_values, axis=-1), 54 | [-1, timesteps, len(train_x_values)]) 55 | 56 | # Auto Encoder / Decoder requires label == data 57 | outputs = {'Float32': float32, 'LABEL': float32} 58 | return outputs 59 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/encoder_decoder/transforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/encoder_decoder/transforms/__init__.py -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/pipeline_templates/__init__.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/utils/__init__.py -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/__init__.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/__init__.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/inference/__init__.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/saved_model_example/serving_model_dir/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/saved_model_example/serving_model_dir/saved_model.pb -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/saved_model_example/serving_model_dir/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/saved_model_example/serving_model_dir/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/saved_model_example/serving_model_dir/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/saved_model_example/serving_model_dir/variables/variables.index -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/tf_transform_graph_dir/transform_fn/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/tf_transform_graph_dir/transform_fn/saved_model.pb -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/tf_transform_graph_dir/transformed_metadata/schema.pbtxt: -------------------------------------------------------------------------------- 1 | feature { 2 | name: "Float32" 3 | type: FLOAT 4 | presence { 5 | min_fraction: 1.0 6 | } 7 | shape { 8 | dim { 9 | size: 5 10 | } 11 | dim { 12 | size: 3 13 | } 14 | } 15 | } 16 | feature { 17 | name: "LABEL" 18 | type: FLOAT 19 | presence { 20 | min_fraction: 1.0 21 | } 22 | shape { 23 | dim { 24 | size: 5 25 | } 26 | dim { 27 | size: 3 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/ml_pipeline_examples/sin_wave_example/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/notebooks/img/FILLINGBQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/notebooks/img/FILLINGBQ.png -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/notebooks/img/FILLINGPANDAS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/notebooks/img/FILLINGPANDAS.png -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/notebooks/img/MA60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/notebooks/img/MA60.png -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/notebooks/img/STDDEV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/dataflow-sample-applications/beb6e20133939d7f7024c338ec2dc02e84e17475/timeseries-streaming/timeseries-python-applications/notebooks/img/STDDEV.png -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/setup.cfg: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | [tox:tox] 18 | envlist = yapf-check, yapf, lint 19 | toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox} 20 | 21 | # Configuration applicable to all tox environments. 22 | [testenv] 23 | commands_pre = 24 | python --version 25 | pip --version 26 | # TODO: Reenable pip check after dependency conflicts are resolved. 27 | # pip check 28 | 29 | # Don't warn that these commands aren't installed. 30 | whitelist_externals = 31 | echo 32 | 33 | # Autoformatter (yet-another-python-formatter) configuration 34 | [yapf] 35 | indent_width = 4 36 | continuation_indent_width = 8 37 | column_limit = 80 38 | allow_split_before_dict_value = False 39 | blank_line_before_module_docstring = True 40 | coalesce_brackets = True 41 | each_dict_entry_on_separate_line = True 42 | split_all_top_level_comma_separated_values = True 43 | split_arguments_when_comma_terminated = True 44 | split_before_expression_after_opening_paren = True 45 | split_before_first_argument = True 46 | split_before_logical_operator = False 47 | 48 | # Run autoformatter in dry-run mode. 49 | [testenv:yapf-check] 50 | deps = 51 | yapf 52 | commands = 53 | yapf --diff --recursive ml_pipeline/timeseries, ml_pipeline_examples/sin_wave_example 54 | skip_install = True 55 | 56 | [testenv:yapf] 57 | deps = 58 | yapf 59 | commands = 60 | yapf --parallel --recursive --in-place ml_pipeline/timeseries, ml_pipeline_examples/sin_wave_example 61 | skip_install = True 62 | 63 | [testenv:lint] 64 | deps = 65 | flake8 66 | pylint 67 | 68 | commands = 69 | echo "Running pylint..." 70 | pylint ml_pipeline/timeseries 71 | echo "Running flake8..." 72 | flake8 ml_pipeline/timeseries --show-source --statistics 73 | -------------------------------------------------------------------------------- /timeseries-streaming/timeseries-python-applications/setup.py: -------------------------------------------------------------------------------- 1 | # Lint as: python2, python3 2 | # Copyright 2020 Google LLC. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import setuptools 17 | 18 | setuptools.setup( 19 | name='tsflow', 20 | version='0.3.2-sample', 21 | python_requires='>=3.7', 22 | install_requires=[ 23 | 'tfx==0.24.0', 'kfp==1.0.3' 24 | ], 25 | packages=setuptools.find_packages() 26 | ) 27 | --------------------------------------------------------------------------------