excludeFromOutput();
45 | }
46 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/options/TSFlowOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.options;
19 |
20 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
21 | import org.apache.beam.sdk.annotations.Experimental;
22 | import org.apache.beam.sdk.options.Description;
23 |
24 | @Experimental
25 | public interface TSFlowOptions
26 | extends DataflowPipelineOptions,
27 | GapFillOptions,
28 | GenerateComputationsOptions,
29 | TSOutputPipelineOptions {
30 |
31 | @Description("Fixed window length for type 1 computations in seconds")
32 | Integer getTypeOneComputationsLengthInSecs();
33 |
34 | void setTypeOneComputationsLengthInSecs(Integer value);
35 |
36 | @Description("Sliding window length for type 2 computations in seconds")
37 | Integer getTypeTwoComputationsLengthInSecs();
38 |
39 | void setTypeTwoComputationsLengthInSecs(Integer value);
40 |
41 | @Description("Sliding window offset length for type 2 computations in seconds")
42 | Integer getTypeTwoComputationsOffsetLengthInSecs();
43 |
44 | void setTypeTwoComputationsOffsetLengthInSecs(Integer value);
45 |
46 | @Description(
47 | "The number of timesteps that will be output is based on this value divided by the type 1 fixed window length.")
48 | Integer getOutputTimestepLengthInSecs();
49 |
50 | void setOutputTimestepLengthInSecs(Integer value);
51 | }
52 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/SyntheticExamples/src/main/java/com/google/dataflow/sample/timeseriesflow/examples/simpledata/transforms/SinWaveExampleOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.examples.simpledata.transforms;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.metrics.core.TSMetricsOptions;
21 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions;
22 | import com.google.dataflow.sample.timeseriesflow.options.TSOutputPipelineOptions;
23 | import org.apache.beam.sdk.options.Default;
24 | import org.apache.beam.sdk.options.Description;
25 |
26 | public interface SinWaveExampleOptions
27 | extends TSOutputPipelineOptions, TSMetricsOptions, TFXOptions {
28 |
29 | @Description(
30 | "In order to see easy output of metrics for demos set this to true. This will result in all values being 'printed' to logs.")
31 | @Default.Boolean(false)
32 | Boolean getEnablePrintMetricsToLogs();
33 |
34 | void setEnablePrintMetricsToLogs(Boolean value);
35 |
36 | @Description(
37 | "In order to see easy output of TF.Examples for demos set this to true. This will result in all values being 'printed' to logs.")
38 | @Default.Boolean(false)
39 | Boolean getEnablePrintTFExamplesToLogs();
40 |
41 | void setEnablePrintTFExamplesToLogs(Boolean value);
42 |
43 | @Description("Enable sending outliers with the stream of synthetic data.")
44 | Boolean getWithOutliers();
45 |
46 | void setWithOutliers(Boolean value);
47 | }
48 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/Max.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
22 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne;
23 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils;
24 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder;
25 | import org.apache.beam.sdk.options.PipelineOptions;
26 |
27 | public class Max extends BTypeOne {
28 |
29 | public interface MaxOptions extends PipelineOptions {};
30 |
31 | @Override
32 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) {
33 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator);
34 | coreNumeric.setMax(TSDataUtils.findMaxValue(coreNumeric.getMaxOrNull(), dataPoint.getData()));
35 | return coreNumeric.build();
36 | }
37 |
38 | @Override
39 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) {
40 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a);
41 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b);
42 | aBuilder.setMax(TSDataUtils.findMaxValue(aBuilder.getMaxOrNull(), bBuilder.getMaxOrNull()));
43 |
44 | return aBuilder.build();
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/Min.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
22 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne;
23 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils;
24 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder;
25 | import org.apache.beam.sdk.options.PipelineOptions;
26 |
27 | public class Min extends BTypeOne {
28 |
29 | public interface MinOptions extends PipelineOptions {}
30 |
31 | @Override
32 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) {
33 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator);
34 | coreNumeric.setMin(TSDataUtils.findMinData(coreNumeric.getMinOrNull(), dataPoint.getData()));
35 | return coreNumeric.build();
36 | }
37 |
38 | @Override
39 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) {
40 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a);
41 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b);
42 | aBuilder.setMin(TSDataUtils.findMinData(aBuilder.getMinOrNull(), bBuilder.getMinOrNull()));
43 |
44 | return aBuilder.build();
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/core/options/RetailPipelineReportingOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.retail.businesslogic.core.options;
19 |
20 | import org.apache.beam.sdk.annotations.Experimental;
21 | import org.apache.beam.sdk.options.Default;
22 | import org.apache.beam.sdk.options.Description;
23 | import org.apache.beam.sdk.options.PipelineOptions;
24 |
25 | @Experimental
26 | public interface RetailPipelineReportingOptions extends PipelineOptions {
27 |
28 | @Description("Deadletter Table for pipeline.")
29 | @Default.String("Retail_Store.deadletter_data")
30 | String getDeadLetterTable();
31 |
32 | void setDeadLetterTable(String deadletterBigQueryTable);
33 |
34 | @Description("Project used for data warehousing.")
35 | String getDataWarehouseOutputProject();
36 |
37 | void setDataWarehouseOutputProject(String dataWarehouseOutputProject);
38 |
39 | @Default.String("Retail_Store")
40 | String getMainReportingDataset();
41 |
42 | void setMainReportingDataset(String mainReportingDataset);
43 |
44 | @Default.String("Retail_Store_Aggregations")
45 | String getAggregateBigQueryDataset();
46 |
47 | void setAggregateBigQueryDataset(String aggregateBigQueryDataset);
48 |
49 | @Default.String("/topics/global-stock-level-topic")
50 | String getAggregateStockPubSubOutputTopic();
51 |
52 | void setAggregateStockPubSubOutputTopic(String aggregateStockPubSubOutputTopic);
53 | }
54 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/combiners/typeone/TSCategoricalCombiner.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.combiners.typeone;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
22 | import com.google.dataflow.sample.timeseriesflow.combiners.TSCombiner;
23 | import org.apache.beam.sdk.annotations.Experimental;
24 |
25 | /** Not implemented. */
26 | @Experimental
27 | public class TSCategoricalCombiner extends TSBaseCombiner implements TSCombiner {
28 |
29 | // TODO implement in next cycle.
30 | private TSCategoricalCombiner() {}
31 |
32 | public static TSCategoricalCombiner combine() {
33 | return new TSCategoricalCombiner();
34 | }
35 |
36 | @Override
37 | public TSAccum mergeTypedDataAccum(TSAccum a, TSAccum b) {
38 |
39 | return null;
40 | }
41 |
42 | @Override
43 | public TSAccum addTypeSpecificInput(TSAccum accumulator, TSDataPoint dataPoint) {
44 | // AccumCategoricalBuilder accumStoreCoreCategorical = new AccumCategoricalBuilder(accum);
45 | // accumStoreCoreCategorical.setDOW(createNumData(time.toDateTime().dayOfWeek().get()));
46 | // accumStoreCoreCategorical.setDOM(createNumData(time.toDateTime().dayOfMonth().get()));
47 | // accumStoreCoreCategorical.setYY(createNumData(time.toDateTime().year().get()));
48 |
49 | return null;
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/retail/retail-java-applications/data-engineering-dept/business-logic/src/main/java/com/google/dataflow/sample/retail/businesslogic/externalservices/RetailCompanyServices.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.retail.businesslogic.externalservices;
19 |
20 | import com.google.dataflow.sample.retail.dataobjects.Item;
21 | import java.util.HashMap;
22 | import java.util.List;
23 | import java.util.Map;
24 | import org.apache.beam.sdk.annotations.Experimental;
25 | import org.apache.beam.sdk.schemas.Schema;
26 |
27 | /**
28 | * This class represents a mock client to a microservice implemented by the Demo Retail company.
29 | *
30 | * The class emulates communication between the Dataflow pipeline, and a hypothetical internal
31 | * microservice.
32 | *
33 | *
Real services will often take 10-100's of ms to respond, which cause back pressure within a
34 | * pipeline. This version of this mock does not cause push back.
35 | *
36 | *
TODO convert to a service which requires a few hundred ms to respond.
37 | */
38 | @Experimental
39 | public class RetailCompanyServices {
40 |
41 | public Map convertItemIdsToFullText(List itemIds, Schema itemSchema) {
42 |
43 | Map map = new HashMap<>();
44 |
45 | Item item =
46 | Item.builder()
47 | .setItemBrand("item_brand")
48 | .setItemCat01("foo_category")
49 | .setItemName("foo_name")
50 | .build();
51 |
52 | itemIds.forEach(x -> map.put(x, item));
53 | return map;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/Examples/src/main/java/com/google/dataflow/sample/timeseriesflow/examples/fsi/forex/ExampleForexPipelineOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.examples.fsi.forex;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions;
21 | import com.google.dataflow.sample.timeseriesflow.options.TSFlowOptions;
22 | import org.apache.beam.sdk.annotations.Experimental;
23 | import org.apache.beam.sdk.options.Description;
24 |
25 | @Experimental
26 | public interface ExampleForexPipelineOptions extends TSFlowOptions, TFXOptions {
27 |
28 | @Description("Option to specify BigQuery target table to push metrics")
29 | String getBigQueryTableForTSAccumOutputLocation();
30 |
31 | void setBigQueryTableForTSAccumOutputLocation(String bigQueryTableForTSAccumOutputLocation);
32 |
33 | @Description("Option to specify absolute path for input dataset")
34 | String getInputPath();
35 |
36 | void setInputPath(String inputPath);
37 |
38 | @Description(
39 | "Option to specify the end timestamp to stop filling gaps, e.g., end of time series dataset")
40 | String getEndTimestamp();
41 |
42 | void setEndTimestamp(String endTimestamp);
43 |
44 | @Description("Option to specify sampling period in seconds")
45 | Integer getResampleSec();
46 |
47 | void setResampleSec(Integer resampleSec);
48 |
49 | @Description("Option to specify rolling window to calculate metrics in seconds")
50 | Integer getWindowSec();
51 |
52 | void setWindowSec(Integer resampleSec);
53 | }
54 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesMetricsLibrary/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/Sum.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
22 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne;
23 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils;
24 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder;
25 | import org.apache.beam.sdk.options.PipelineOptions;
26 |
27 | public class Sum extends BTypeOne {
28 |
29 | public interface SumOptions extends PipelineOptions {}
30 |
31 | @Override
32 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) {
33 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator);
34 | coreNumeric.setSum(
35 | CommonUtils.sumNumericDataNullAsZero(coreNumeric.getSumOrNull(), dataPoint.getData()));
36 | return coreNumeric.build();
37 | }
38 |
39 | @Override
40 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) {
41 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a);
42 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b);
43 | aBuilder.setSum(
44 | CommonUtils.sumNumericDataNullAsZero(aBuilder.getSumOrNull(), bBuilder.getSumOrNull()));
45 |
46 | return aBuilder.build();
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/TestMax.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone;
19 |
20 | import com.google.common.annotations.VisibleForTesting;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
23 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne;
24 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils;
25 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder;
26 | import org.apache.beam.sdk.annotations.Experimental;
27 |
28 | @VisibleForTesting
29 | @Experimental
30 | /** This is a dummy class used as a test artifact only. */
31 | public class TestMax extends BTypeOne {
32 | @Override
33 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) {
34 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator);
35 | coreNumeric.setMax(TSDataUtils.findMaxValue(coreNumeric.getMaxOrNull(), dataPoint.getData()));
36 | return coreNumeric.build();
37 | }
38 |
39 | @Override
40 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) {
41 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a);
42 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b);
43 | aBuilder.setMax(TSDataUtils.findMaxValue(aBuilder.getMaxOrNull(), bBuilder.getMaxOrNull()));
44 |
45 | return aBuilder.build();
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/TestMin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone;
19 |
20 | import com.google.common.annotations.VisibleForTesting;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
23 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne;
24 | import com.google.dataflow.sample.timeseriesflow.common.TSDataUtils;
25 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder;
26 | import org.apache.beam.sdk.annotations.Experimental;
27 |
28 | @VisibleForTesting
29 | @Experimental
30 | /** This is a dummy class used as a test artifact only. */
31 | public class TestMin extends BTypeOne {
32 | @Override
33 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) {
34 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator);
35 | coreNumeric.setMin(TSDataUtils.findMinData(coreNumeric.getMinOrNull(), dataPoint.getData()));
36 | return coreNumeric.build();
37 | }
38 |
39 | @Override
40 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) {
41 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a);
42 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b);
43 | aBuilder.setMin(TSDataUtils.findMinData(aBuilder.getMinOrNull(), bBuilder.getMinOrNull()));
44 |
45 | return aBuilder.build();
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/datamap/AccumCoreNumericBuilder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.datamap;
19 |
20 | import com.google.common.base.Preconditions;
21 | import com.google.dataflow.sample.timeseriesflow.DerivedAggregations.Indicators;
22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.Data;
23 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
24 | import org.apache.beam.sdk.annotations.Experimental;
25 |
26 | @Experimental
27 | /** Accum Numeric Builder, dealing with common numeric aggregations Sum, Min, Max, First, Last. */
28 | public class AccumCoreNumericBuilder extends AccumCoreMetadataBuilder {
29 |
30 | public AccumCoreNumericBuilder(TSAccum tsAccum) {
31 | super(tsAccum);
32 | }
33 |
34 | public Data getSumOrNull() {
35 | return getValueOrNull(Indicators.SUM.name());
36 | }
37 |
38 | public Data getMaxOrNull() {
39 | return getValueOrNull(Indicators.MAX.name());
40 | }
41 |
42 | public Data getMinOrNull() {
43 | return getValueOrNull(Indicators.MIN.name());
44 | }
45 |
46 | public void setSum(Data data) {
47 | Preconditions.checkNotNull(data);
48 | setValue(Indicators.SUM.name(), data);
49 | }
50 |
51 | public void setMax(Data data) {
52 |
53 | Preconditions.checkNotNull(data);
54 | setValue(Indicators.MAX.name(), data);
55 | }
56 |
57 | public void setMin(Data data) {
58 | Preconditions.checkNotNull(data);
59 | setValue(Indicators.MIN.name(), data);
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/PerfectRectanglesScalability_86400S_NoGaps.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.test;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey;
22 | import org.apache.beam.sdk.Pipeline;
23 | import org.apache.beam.sdk.options.PipelineOptionsFactory;
24 | import org.apache.beam.sdk.values.KV;
25 | import org.apache.beam.sdk.values.PCollection;
26 |
27 | public class PerfectRectanglesScalability_86400S_NoGaps {
28 |
29 | public static void main(String args[]) {
30 | System.out.println("Running 1 Day 86400 with 1 Key");
31 |
32 | ScaleTestingOptions options =
33 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class);
34 |
35 | options.setAppName("TestPerfectRectangles_86400S_NoGaps");
36 | options.setTypeOneComputationsLengthInSecs(1);
37 | options.setTypeTwoComputationsLengthInSecs(60);
38 | options.setOutputTimestepLengthInSecs(60);
39 | options.setTTLDurationSecs(0);
40 | options.setNumKeys(1);
41 | options.setPerfectRecNumberDataSecs(86400);
42 | options.setSkipEvens(false);
43 |
44 | Pipeline p = Pipeline.create(options);
45 |
46 | PCollection> results =
47 | PerfectRectangleUtils.testPerfectRecScalability(p);
48 |
49 | long time = System.currentTimeMillis();
50 | p.run();
51 | System.out.println(System.currentTimeMillis() - time);
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/Adapters/src/main/java/com/google/dataflow/sample/timeseriesflow/adaptors/fsi/data/cme/LogElements.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.adaptors.fsi.data.cme;
19 |
20 | import org.apache.beam.sdk.annotations.Experimental;
21 | import org.apache.beam.sdk.transforms.DoFn;
22 | import org.apache.beam.sdk.transforms.PTransform;
23 | import org.apache.beam.sdk.transforms.ParDo;
24 | import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
25 | import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
26 | import org.apache.beam.sdk.values.PCollection;
27 | import org.slf4j.Logger;
28 | import org.slf4j.LoggerFactory;
29 |
30 | @Experimental
31 | class LogElements extends PTransform, PCollection> {
32 |
33 | private static final Logger LOG = LoggerFactory.getLogger(LogElements.class);
34 |
35 | @Override
36 | public PCollection expand(PCollection input) {
37 |
38 | return input.apply(
39 | "Logging Elements",
40 | ParDo.of(
41 | new DoFn() {
42 |
43 | @ProcessElement
44 | public void processElement(
45 | @Element T element, OutputReceiver out, BoundedWindow window) {
46 |
47 | String message = element.toString();
48 |
49 | if (!(window instanceof GlobalWindow)) {
50 | message = message + " Window: " + window.toString();
51 | }
52 |
53 | LOG.info(message);
54 |
55 | out.output(element);
56 | }
57 | }));
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-python-applications/ml_pipeline/timeseries/encoder_decoder/encoder_decoder_preprocessing.py:
--------------------------------------------------------------------------------
1 | # Lint as: python2, python3
2 | # Copyright 2020 Google LLC. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | from typing import Dict, Text, Any
17 | import tensorflow as tf
18 | import tensorflow_transform as tft
19 | from ml_pipeline.timeseries.utils import timeseries_transform_utils
20 |
21 |
22 | def preprocessing_fn(inputs: Dict[Text, Any],
23 | custom_config: Dict[Text, Any]) -> Dict[Text, Any]:
24 | """tf.transform's callback function for preprocessing inputs.
25 |
26 | Args:
27 | inputs: map from feature keys to raw not-yet-transformed features.
28 | custom_config:
29 | timesteps: The number of timesteps in the look back window
30 | features: Which of the features from the TF.Example to use in the model.
31 |
32 | Returns:
33 | Map from string feature key to transformed feature operations.
34 | """
35 | timesteps = custom_config['timesteps']
36 |
37 | outputs = inputs.copy()
38 |
39 | # Generate features to be used in the model
40 | train_x_tensors = timeseries_transform_utils.create_feature_list_from_dict(
41 | outputs, custom_config)
42 |
43 | # Scale the inputs with the exception of TIMESTAMPS
44 |
45 | for key in train_x_tensors:
46 | # TODO provide option for user to enable / disable -Timestamp scale
47 | # if not str(key).endswith('-TIMESTAMP'):
48 | train_x_tensors[key] = tft.scale_to_z_score(train_x_tensors[key])
49 |
50 | train_x_values = [train_x_tensors[k] for k in sorted(train_x_tensors)]
51 |
52 | float32 = tf.reshape(
53 | tf.stack(train_x_values, axis=-1),
54 | [-1, timesteps, len(train_x_values)])
55 |
56 | # Auto Encoder / Decoder requires label == data
57 | outputs = {'Float32': float32, 'LABEL': float32}
58 | return outputs
59 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/metrics/core/typeone/TestSum.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.metrics.core.typeone;
19 |
20 | import com.google.common.annotations.VisibleForTesting;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
22 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSDataPoint;
23 | import com.google.dataflow.sample.timeseriesflow.combiners.BTypeOne;
24 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils;
25 | import com.google.dataflow.sample.timeseriesflow.datamap.AccumCoreNumericBuilder;
26 | import org.apache.beam.sdk.annotations.Experimental;
27 |
28 | @VisibleForTesting
29 | @Experimental
30 | /** This is a dummy class used as a test artifact only. */
31 | public class TestSum extends BTypeOne {
32 | @Override
33 | public TSAccum addInput(TSAccum accumulator, TSDataPoint dataPoint) {
34 | AccumCoreNumericBuilder coreNumeric = new AccumCoreNumericBuilder(accumulator);
35 | coreNumeric.setSum(
36 | CommonUtils.sumNumericDataNullAsZero(coreNumeric.getSumOrNull(), dataPoint.getData()));
37 | return coreNumeric.build();
38 | }
39 |
40 | @Override
41 | public TSAccum mergeDataAccums(TSAccum a, TSAccum b) {
42 | AccumCoreNumericBuilder aBuilder = new AccumCoreNumericBuilder(a);
43 | AccumCoreNumericBuilder bBuilder = new AccumCoreNumericBuilder(b);
44 | aBuilder.setSum(
45 | CommonUtils.sumNumericDataNullAsZero(aBuilder.getSumOrNull(), bBuilder.getSumOrNull()));
46 |
47 | return aBuilder.build();
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/options/GenerateComputationsOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.options;
19 |
20 | import java.util.List;
21 | import org.apache.beam.sdk.annotations.Experimental;
22 | import org.apache.beam.sdk.options.Description;
23 | import org.apache.beam.sdk.options.PipelineOptions;
24 |
25 | @Experimental
26 | /**
27 | * PipelineOptions to allow the Out of the box metrics to be called by name. For custom metrics use
28 | * {@link com.google.dataflow.sample.timeseriesflow.graph.GenerateComputations.Builder} .
29 | */
30 | public interface GenerateComputationsOptions extends PipelineOptions {
31 |
32 | @Description("Type one computations, for example typeone.Sum")
33 | List getTypeOneBasicMetrics();
34 |
35 | @Description("Type one computations, for example typeone.Sum")
36 | void setTypeOneBasicMetrics(List value);
37 |
38 | @Description("Type two basic computations (order is preserved), for example typetwo.basic.BB")
39 | List getTypeTwoBasicMetrics();
40 |
41 | @Description("Type two basic computations (order is preserved), for example typetwo.basic.BB")
42 | void setTypeTwoBasicMetrics(List typeTwoBasicMetrics);
43 |
44 | @Description(
45 | "Type two basic computations (order is preserved), for example typetwo.complex.fsi.RSIGFn")
46 | List getTypeTwoComplexMetrics();
47 |
48 | @Description(
49 | "Type two basic computations (order is preserved), for example typetwo.complex.fsi.RSIGFn")
50 | void setTypeTwoComplexMetrics(List typeTwoBasicMetrics);
51 | }
52 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-python-applications/setup.cfg:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | [tox:tox]
18 | envlist = yapf-check, yapf, lint
19 | toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox}
20 |
21 | # Configuration applicable to all tox environments.
22 | [testenv]
23 | commands_pre =
24 | python --version
25 | pip --version
26 | # TODO: Reenable pip check after dependency conflicts are resolved.
27 | # pip check
28 |
29 | # Don't warn that these commands aren't installed.
30 | whitelist_externals =
31 | echo
32 |
33 | # Autoformatter (yet-another-python-formatter) configuration
34 | [yapf]
35 | indent_width = 4
36 | continuation_indent_width = 8
37 | column_limit = 80
38 | allow_split_before_dict_value = False
39 | blank_line_before_module_docstring = True
40 | coalesce_brackets = True
41 | each_dict_entry_on_separate_line = True
42 | split_all_top_level_comma_separated_values = True
43 | split_arguments_when_comma_terminated = True
44 | split_before_expression_after_opening_paren = True
45 | split_before_first_argument = True
46 | split_before_logical_operator = False
47 |
48 | # Run autoformatter in dry-run mode.
49 | [testenv:yapf-check]
50 | deps =
51 | yapf
52 | commands =
53 | yapf --diff --recursive ml_pipeline/timeseries, ml_pipeline_examples/sin_wave_example
54 | skip_install = True
55 |
56 | [testenv:yapf]
57 | deps =
58 | yapf
59 | commands =
60 | yapf --parallel --recursive --in-place ml_pipeline/timeseries, ml_pipeline_examples/sin_wave_example
61 | skip_install = True
62 |
63 | [testenv:lint]
64 | deps =
65 | flake8
66 | pylint
67 |
68 | commands =
69 | echo "Running pylint..."
70 | pylint ml_pipeline/timeseries
71 | echo "Running flake8..."
72 | flake8 ml_pipeline/timeseries --show-source --statistics
73 |
--------------------------------------------------------------------------------
/retail/retail-java-applications/data-engineering-dept/data-objects/src/main/java/com/google/dataflow/sample/retail/dataobjects/StockAggregation.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.retail.dataobjects;
19 |
20 | import com.google.auto.value.AutoValue;
21 | import javax.annotation.Nullable;
22 | import org.apache.beam.sdk.annotations.Experimental;
23 | import org.apache.beam.sdk.schemas.AutoValueSchema;
24 | import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
25 | import org.apache.beam.sdk.schemas.annotations.SchemaFieldName;
26 |
27 | @AutoValue
28 | @DefaultSchema(AutoValueSchema.class)
29 | @Experimental
30 | public abstract class StockAggregation {
31 |
32 | @Nullable
33 | public abstract Long getDurationMS();
34 |
35 | @Nullable
36 | public abstract Long getStartTime();
37 |
38 | @Nullable
39 | @SchemaFieldName("product_id")
40 | public abstract Integer getProductId();
41 |
42 | @Nullable
43 | @SchemaFieldName("store_id")
44 | public abstract Integer getStoreId();
45 |
46 | @Nullable
47 | public abstract Long getCount();
48 |
49 | public abstract StockAggregation.Builder toBuilder();
50 |
51 | public static StockAggregation.Builder builder() {
52 | return new AutoValue_StockAggregation.Builder();
53 | }
54 |
55 | @AutoValue.Builder
56 | public abstract static class Builder {
57 |
58 | public abstract Builder setDurationMS(Long value);
59 |
60 | public abstract Builder setStartTime(Long value);
61 |
62 | public abstract Builder setProductId(Integer value);
63 |
64 | public abstract Builder setStoreId(Integer value);
65 |
66 | public abstract Builder setCount(Long value);
67 |
68 | public abstract StockAggregation build();
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # NOTE: if you modify this file, you probably need to modify the file set that
2 | # is an input to 'maven-assembly-plugin' that generates source distribution.
3 | # This is typically in files named 'src.xml' throughout this repository.
4 |
5 | # Ignore any offline repositories the user may have created.
6 | **/offline-repository/**/*
7 |
8 | # Ignore files generated by the Gradle build process.
9 | **/.gradle/**/*
10 | **/.gogradle/**/*
11 | **/.nb-gradle/**/*
12 | **/gogradle.lock
13 | **/build/**/*
14 | .test-infra/**/vendor/**/*
15 | sdks/**/vendor/**/*
16 | runners/**/vendor/**/*
17 | **/.gradletasknamecache
18 |
19 | # Ignore files generated by the Maven build process.
20 | **/bin/**/*
21 | **/dependency-reduced-pom.xml
22 | **/target/**/*
23 |
24 | # Ignore generated archetypes
25 | sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/
26 | sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/
27 |
28 | # Ignore files generated by the Python build process.
29 | **/*.pyc
30 | **/*.pyo
31 | **/*.pyd
32 | **/*.egg-info/
33 | **/.eggs/
34 | **/nose-*.egg/
35 | **/.tox/**/*
36 | **/dist/**/*
37 | **/distribute-*/**/*
38 | **/env/**/*
39 | **/.mypy_cache
40 | **/.dmypy.json
41 | sdks/python/**/*.c
42 | sdks/python/**/*.so
43 | sdks/python/**/*.egg
44 | sdks/python/LICENSE
45 | sdks/python/NOTICE
46 | sdks/python/README.md
47 | sdks/python/apache_beam/portability/api/*pb2*.*
48 | sdks/python/apache_beam/portability/api/*.yaml
49 | sdks/python/nosetests*.xml
50 | sdks/python/pytest*.xml
51 | sdks/python/postcommit_requirements.txt
52 |
53 | # Ignore IntelliJ files.
54 | **/.idea/**/*
55 | **/*.iml
56 | **/*.ipr
57 | **/*.iws
58 | **/out/**/*
59 |
60 | # Ignore Eclipse files.
61 | **/.classpath
62 | **/.project
63 | **/.factorypath
64 | **/.checkstyle
65 | **/.fbExcludeFilterFile
66 | **/.apt_generated/**/*
67 | **/.settings/**/*
68 | **/.gitignore
69 |
70 | # Ignore Visual Studio Code files.
71 | **/.vscode/**/*
72 |
73 | # Hotspot VM leaves this log in a non-target directory when java crashes
74 | **/hs_err_pid*.log
75 |
76 | # Ignore files that end with '~', since they are most likely auto-save files
77 | # produced by a text editor.
78 | **/*~
79 |
80 | # Ignore MacOSX files.
81 | **/.DS_Store/**/*
82 | **/.DS_Store
83 |
84 | # Ignore Jupyter notebook checkpoints.
85 | **/.ipynb_checkpoints/**/*
86 |
87 | # JetBrains Education files
88 | !**/study_project.xml
89 | **/.coursecreator/**/*
90 |
91 | .pytest_cache
92 | .pytest_cache/**/*
93 |
94 | # Terraform intermediate artifacts
95 | .terraform/
96 | .terraform.lock.hcl
97 | *.tfstate
98 | *.tfstate.*
99 |
--------------------------------------------------------------------------------
/retail/retail-java-applications/data-engineering-dept/business-logic/README.MD:
--------------------------------------------------------------------------------
1 |
19 | # Clickstream Processing
20 |
21 | All business transformations for the processing and analysis of clickstream data.
22 |
23 | # Main Process Clickstream:
24 | * Read Click Stream Topic
25 | * Parse Messages to Beam SCHEMAS
26 | * Branch 1:
27 | * Write RAW JSON String Clickstream for storage
28 | * Branch 2:
29 | * Clean the data
30 | * Write Cleaned Data to BigQuery
31 | * Branch 2.1:
32 | * Filter out events of type ERROR
33 | * Count Page Views per product in 5 sec windows
34 | * Export page view aggregates to BigTable
35 | * Export page view aggregates to BigQuery
36 |
37 | # Main Transactions Processing
38 |
39 | All business transformations for the processing and analysis of transaction data.
40 |
41 | Process Transaction:
42 | * Read transaction Stream Topic
43 | * Branch 1:
44 | * Write RAW JSON String Transaction for storage
45 | * Branch 2:
46 | * Parse Messages to Beam SCHEMAS
47 | * Convert to transaction object
48 | * Enrich the data
49 | * Write enriched Data to BigQuery
50 |
51 | All business transformations for the processing and analysis of stock data.
52 |
53 | # Main Stock Processing
54 | * Read stock Stream Topic
55 | * Branch 1:
56 | * Write RAW JSON String stock for storage
57 | * Branch 2:
58 | * Parse Messages to Beam SCHEMAS
59 | * Convert to Stock Object
60 | * Write Data to BigQuery
61 |
62 | # Utility Transaction
63 | * Count sales per item per store within a fixed window
64 | * Count sales per item within a fixed window
65 |
66 | # Utility Stock
67 | * Count incoming stock per item per store within a fixed window
68 | * Count incoming stock per item within a fixed window
69 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/Examples/src/main/java/com/google/dataflow/sample/timeseriesflow/examples/fsi/forex/HistoryForexReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.examples.fsi.forex;
19 |
20 | import com.google.auto.value.AutoValue;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData;
22 | import java.util.Set;
23 | import org.apache.beam.sdk.io.TextIO;
24 | import org.apache.beam.sdk.transforms.PTransform;
25 | import org.apache.beam.sdk.values.PBegin;
26 | import org.apache.beam.sdk.values.PCollectionTuple;
27 | import org.apache.beam.sdk.values.TupleTag;
28 |
29 | @AutoValue
30 | public abstract class HistoryForexReader extends PTransform {
31 |
32 | public abstract String getSourceFilesURI();
33 |
34 | public abstract Set getTickers();
35 |
36 | public static Builder builder() {
37 | return new AutoValue_HistoryForexReader.Builder();
38 | }
39 |
40 | @AutoValue.Builder
41 | public abstract static class Builder {
42 |
43 | public abstract Builder setSourceFilesURI(String newSourceFilesURI);
44 |
45 | public abstract Builder setTickers(Set newTickers);
46 |
47 | public abstract HistoryForexReader build();
48 | }
49 |
50 | // Tags to implement basic example of deadletter queue pattern
51 | static final TupleTag successfulParse =
52 | new TupleTag();
53 | static final TupleTag deadLetterTag = new TupleTag();
54 |
55 | @Override
56 | public PCollectionTuple expand(PBegin input) {
57 | return input
58 | .apply(TextIO.read().from(getSourceFilesURI()))
59 | .apply(new ForexCSVAdaptor.ConvertCSVForex(getTickers()));
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/main/java/com/google/dataflow/sample/timeseriesflow/transforms/TSAccumToJson.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.transforms;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccum;
21 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSKey;
22 | import com.google.protobuf.InvalidProtocolBufferException;
23 | import com.google.protobuf.util.JsonFormat;
24 | import org.apache.beam.sdk.annotations.Experimental;
25 | import org.apache.beam.sdk.transforms.MapElements;
26 | import org.apache.beam.sdk.transforms.PTransform;
27 | import org.apache.beam.sdk.values.KV;
28 | import org.apache.beam.sdk.values.PCollection;
29 | import org.apache.beam.sdk.values.TypeDescriptors;
30 |
31 | @Experimental
32 | /** Return a Json representation of the TSAccum object. */
33 | public class TSAccumToJson
34 | extends PTransform>, PCollection> {
35 |
36 | public static TSAccumToJson create() {
37 | return new TSAccumToJson();
38 | }
39 |
40 | private TSAccumToJson() {};
41 |
42 | @Override
43 | public PCollection expand(PCollection> input) {
44 | return input.apply(
45 | MapElements.into(TypeDescriptors.strings())
46 | .via(
47 | x -> {
48 | String json = null;
49 | try {
50 | json = JsonFormat.printer().print(x.getValue().toBuilder().setKey(x.getKey()));
51 | } catch (InvalidProtocolBufferException e) {
52 | json = String.format("{error: %s}", e.getMessage());
53 | }
54 | return json;
55 | }));
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/SnapShotScalability_1D_100M_1K_1FW_60SW_withTFExampleSerlization.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.test;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence;
21 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils;
22 | import com.google.dataflow.sample.timeseriesflow.io.tfexample.FeaturesFromIterableAccumSequence;
23 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions;
24 | import org.apache.beam.sdk.Pipeline;
25 | import org.apache.beam.sdk.options.PipelineOptionsFactory;
26 | import org.apache.beam.sdk.values.PCollection;
27 |
28 | public class SnapShotScalability_1D_100M_1K_1FW_60SW_withTFExampleSerlization {
29 |
30 | public static void main(String args[]) {
31 | System.out.println("Running 1 Day with 1 Key and 100 features @ Type 1 1 sec Type 2 60 sec");
32 |
33 | ScaleTestingOptions options =
34 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class);
35 |
36 | options.setAppName("SimpleDataStreamTSDataPoints");
37 | options.setTypeOneComputationsLengthInSecs(1);
38 | options.setTypeTwoComputationsLengthInSecs(60);
39 | options.setOutputTimestepLengthInSecs(60);
40 | options.setNumKeys(1);
41 | options.setNumSecs(86400);
42 | options.setNumFeatures(100);
43 |
44 | Pipeline p = Pipeline.create(options);
45 |
46 | PCollection> examples = SnapShotUtils.testSnapShotScalability(p);
47 |
48 | Integer timesteps = CommonUtils.getNumOfSequenceTimesteps(p.getOptions().as(TFXOptions.class));
49 |
50 | examples.apply(new FeaturesFromIterableAccumSequence(timesteps, true));
51 |
52 | p.run();
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/timeseries-streaming/timeseries-java-applications/TimeSeriesPipeline/src/test/java/com/google/dataflow/sample/timeseriesflow/test/SnapShotScalability_1D_100M_100K_1FW_60SW_withTFExampleSerlization.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.google.dataflow.sample.timeseriesflow.test;
19 |
20 | import com.google.dataflow.sample.timeseriesflow.TimeSeriesData.TSAccumSequence;
21 | import com.google.dataflow.sample.timeseriesflow.common.CommonUtils;
22 | import com.google.dataflow.sample.timeseriesflow.io.tfexample.FeaturesFromIterableAccumSequence;
23 | import com.google.dataflow.sample.timeseriesflow.options.TFXOptions;
24 | import org.apache.beam.sdk.Pipeline;
25 | import org.apache.beam.sdk.options.PipelineOptionsFactory;
26 | import org.apache.beam.sdk.values.PCollection;
27 |
28 | public class SnapShotScalability_1D_100M_100K_1FW_60SW_withTFExampleSerlization {
29 |
30 | public static void main(String args[]) {
31 | System.out.println("Running 1 Day with 1 Key and 100 features @ Type 1 1 sec Type 2 60 sec");
32 |
33 | ScaleTestingOptions options =
34 | PipelineOptionsFactory.fromArgs(args).as(ScaleTestingOptions.class);
35 |
36 | options.setAppName("SimpleDataStreamTSDataPoints");
37 | options.setTypeOneComputationsLengthInSecs(1);
38 | options.setTypeTwoComputationsLengthInSecs(60);
39 | options.setOutputTimestepLengthInSecs(60);
40 | options.setNumKeys(100);
41 | options.setNumSecs(86400);
42 | options.setNumFeatures(100);
43 |
44 | Pipeline p = Pipeline.create(options);
45 |
46 | PCollection> examples = SnapShotUtils.testSnapShotScalability(p);
47 |
48 | Integer timesteps = CommonUtils.getNumOfSequenceTimesteps(p.getOptions().as(TFXOptions.class));
49 |
50 | examples.apply(new FeaturesFromIterableAccumSequence(timesteps, true));
51 |
52 | p.run();
53 | }
54 | }
55 |
--------------------------------------------------------------------------------