valueEntry : getValueToFeatureName().entrySet()) {
52 | // Get all the count if valueToFeatureName's key and factName are equal and there is only 1
53 | // value.
54 | if (valueEntry.getKey().equals(getFactName()) && getValueToFeatureName().size() == 1) {
55 | features.put(valueEntry.getValue(), valueBag.size());
56 | } else {
57 | features.put(valueEntry.getValue(), valueBag.getCount(valueEntry.getKey()));
58 | }
59 | }
60 | return features.build();
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/feature/accumulator/FeatureAccumulator.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator;
16 |
17 | import java.math.BigDecimal;
18 | import java.util.Map;
19 |
20 | /**
21 | * An object that accumulates values to generate feature or features for a ML-ready dataset.
22 | *
23 | * {@code Example: factName = "browser" valueToFeatureName = [("Chrome",
24 | * "PROPORTION_browser_Chrome"),("Safari", "PROPORTION_browser_Safari")]}
25 | */
26 | public abstract class FeatureAccumulator {
27 |
28 | // Map of value and its corresponding feature name.
29 | private final Map valueToFeatureName;
30 |
31 | // Fact that the feature is based on.
32 | private final String factName;
33 |
34 | // Value to accumulate if value is not in the valueToFeatureName map.
35 | private final String defaultValue;
36 |
37 | protected FeatureAccumulator(
38 | String factName, Map valueToFeatureName, String defaultValue) {
39 |
40 | this.factName = factName;
41 | this.valueToFeatureName = valueToFeatureName;
42 | this.defaultValue = defaultValue;
43 | }
44 |
45 | /** Accumulates value of a fact. */
46 | public abstract void accumulate(String value);
47 |
48 | /** Returns a map of features and their corresponding accumulated value. */
49 | public abstract Map getFeatures();
50 |
51 | protected String getFactName() {
52 | return factName;
53 | }
54 |
55 | protected Map getValueToFeatureName() {
56 | return valueToFeatureName;
57 | }
58 |
59 | protected String getDefaultValue() {
60 | return defaultValue;
61 | }
62 |
63 | protected static double roundedFeatureValue(double number) {
64 | return new BigDecimal(number).setScale(3, BigDecimal.ROUND_HALF_UP).doubleValue();
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/feature/accumulator/MostFrequentValueFeatureAccumulator.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator;
16 |
17 | import static java.util.Comparator.reverseOrder;
18 |
19 | import com.google.common.base.Strings;
20 | import com.google.common.collect.ImmutableMap;
21 | import java.util.Comparator;
22 | import java.util.Map;
23 | import java.util.Optional;
24 | import org.apache.commons.collections4.Bag;
25 | import org.apache.commons.collections4.bag.HashBag;
26 |
27 | /**
28 | * Feature accumulator to extract most frequent value among the values provided of a fact. Example:
29 | * Extract the most frequent value of the fact 'city' as a feature.
30 | */
31 | public class MostFrequentValueFeatureAccumulator extends FeatureAccumulator {
32 |
33 | private final Bag valueBag;
34 |
35 | public MostFrequentValueFeatureAccumulator(
36 | String column, ImmutableMap valueToFeatureName, String defaultValue) {
37 | super(column, valueToFeatureName, defaultValue);
38 | this.valueBag = new HashBag<>();
39 | }
40 |
41 | @Override
42 | public void accumulate(String value) {
43 | if (getValueToFeatureName().containsKey(getFactName())
44 | || getValueToFeatureName().containsKey(value)) {
45 | valueBag.add(value);
46 | } else if (!Strings.isNullOrEmpty(getDefaultValue())) {
47 | valueBag.add(getDefaultValue());
48 | }
49 | }
50 |
51 | @Override
52 | public Map getFeatures() {
53 |
54 | if (getValueToFeatureName().isEmpty()) {
55 | return ImmutableMap.of();
56 | }
57 |
58 | Optional valueWithMaxCount =
59 | valueBag.stream()
60 | .max(
61 | Comparator.comparingInt(valueBag::getCount)
62 | .thenComparing(s -> s, reverseOrder()));
63 | String featureName = getValueToFeatureName().values().iterator().next();
64 | return ImmutableMap.of(featureName, valueWithMaxCount.orElse(""));
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/feature/accumulator/ProportionValueFeatureAccumulator.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator;
16 |
17 | import com.google.common.base.Strings;
18 | import com.google.common.collect.ImmutableMap;
19 | import java.util.Map;
20 | import org.apache.commons.collections4.Bag;
21 | import org.apache.commons.collections4.bag.HashBag;
22 |
23 | /**
24 | * Feature accumulator to extract the proportion of values of a fact. Example: Extract the
25 | * proportion value of New York from all the values of the fact 'city' as a feature.
26 | */
27 | public class ProportionValueFeatureAccumulator extends FeatureAccumulator {
28 |
29 | private final Bag valueBag;
30 |
31 | public ProportionValueFeatureAccumulator(
32 | String column, ImmutableMap valueToFeatureName, String defaultValue) {
33 | super(column, valueToFeatureName, defaultValue);
34 | this.valueBag = new HashBag<>();
35 | }
36 |
37 | @Override
38 | public void accumulate(String value) {
39 | if (!Strings.isNullOrEmpty(getDefaultValue()) && !getValueToFeatureName().containsKey(value)) {
40 | valueBag.add(getDefaultValue());
41 | } else {
42 | valueBag.add(value);
43 | }
44 | }
45 |
46 | @Override
47 | public Map getFeatures() {
48 |
49 | ImmutableMap.Builder features = ImmutableMap.builder();
50 | int totalValueCount = valueBag.size();
51 |
52 | for (Map.Entry valueEntry : getValueToFeatureName().entrySet()) {
53 | int valueCount = valueBag.getCount(valueEntry.getKey());
54 | features.put(
55 | valueEntry.getValue(),
56 | totalValueCount > 0 ? roundedFeatureValue((double) valueCount / totalValueCount) : 0);
57 | }
58 | return features.build();
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/feature/accumulator/RecentValueFeatureAccumulator.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator;
16 |
17 | import static com.google.common.base.Strings.nullToEmpty;
18 |
19 | import com.google.common.base.Strings;
20 | import com.google.common.collect.ImmutableMap;
21 | import java.util.Map;
22 |
23 | /** Feature accumulator to extract the most recent value of a fact. */
24 | public class RecentValueFeatureAccumulator extends FeatureAccumulator {
25 |
26 | private String value;
27 |
28 | public RecentValueFeatureAccumulator(
29 | String column, ImmutableMap values, String defaultValue) {
30 | super(column, values, defaultValue);
31 | }
32 |
33 | @Override
34 | public void accumulate(String value) {
35 | if (getValueToFeatureName().containsKey(getFactName())
36 | || getValueToFeatureName().containsKey(value)) {
37 | this.value = value;
38 | } else if (!Strings.isNullOrEmpty(getDefaultValue())) {
39 | this.value = getDefaultValue();
40 | }
41 | }
42 |
43 | @Override
44 | public Map getFeatures() {
45 | String featureName = getValueToFeatureName().values().iterator().next();
46 | return ImmutableMap.of(featureName, nullToEmpty(value));
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/feature/accumulator/SumValueFeatureAccumulator.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator;
16 |
17 | import com.google.common.collect.ImmutableMap;
18 | import java.util.Map;
19 |
20 | /**
21 | * Feature accumulator to extract the sum of values of a fact. Example: Extract the sum of all the
22 | * values of the fact 'page_views' as a feature.
23 | */
24 | public class SumValueFeatureAccumulator extends FeatureAccumulator {
25 |
26 | private double sum;
27 |
28 | public SumValueFeatureAccumulator(
29 | String column, ImmutableMap valueToFeatureName, String defaultValue) {
30 | super(column, valueToFeatureName, defaultValue);
31 | }
32 |
33 | @Override
34 | public void accumulate(String value) {
35 | try {
36 | sum += Double.parseDouble(value);
37 | } catch (NumberFormatException e) {
38 | // Ignore invalid value.
39 | }
40 | }
41 |
42 | @Override
43 | public Map getFeatures() {
44 | String columnName = getValueToFeatureName().values().iterator().next();
45 | return ImmutableMap.of(columnName, sum);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/feature/accumulator/WindowBasedFeatureAccumulator.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator;
16 |
17 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.LookbackWindow;
18 | import java.util.Map;
19 |
20 | /**
21 | * An object that accumulates values to generate feature or features based on {@link LookbackWindow}
22 | * for a ML-ready dataset.
23 | */
24 | public abstract class WindowBasedFeatureAccumulator extends FeatureAccumulator {
25 |
26 | private LookbackWindow window;
27 |
28 | public WindowBasedFeatureAccumulator(
29 | String column, Map values, String defaultValue) {
30 | super(column, values, defaultValue);
31 | }
32 |
33 | @Override
34 | public void accumulate(String value) {
35 | if (window == null) {
36 | throw new IllegalStateException("LookbackWindow is not initialized.");
37 | }
38 | }
39 |
40 | public LookbackWindow getWindow() {
41 | return window;
42 | }
43 |
44 | public void setWindow(LookbackWindow window) {
45 | this.window = window;
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/feature/transform/CreateAccumulatorOptionsFn.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.transform;
16 |
17 | import com.google.common.collect.Multimap;
18 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator.AccumulatorOptions;
19 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator.AccumulatorType;
20 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.feature.accumulator.FeatureAccumulatorFactory;
21 | import java.util.Map;
22 | import org.apache.beam.sdk.transforms.DoFn;
23 | import org.apache.beam.sdk.values.KV;
24 |
25 | /** Function to create @{link {@link AccumulatorOptions}} based string parameter. */
26 | public class CreateAccumulatorOptionsFn extends DoFn> {
27 | private final FeatureAccumulatorFactory factory;
28 | private final AccumulatorType accumulatorType;
29 |
30 | public CreateAccumulatorOptionsFn(
31 | FeatureAccumulatorFactory factory, AccumulatorType accumulatorType) {
32 | this.factory = factory;
33 | this.accumulatorType = accumulatorType;
34 | }
35 |
36 | @ProcessElement
37 | public void processElement(ProcessContext c) {
38 | Multimap accumulators =
39 | factory.createAccumulatorOptions(accumulatorType, c.element());
40 | for (Map.Entry accumulatorMapEntry : accumulators.entries()) {
41 | c.output(KV.of(accumulatorMapEntry.getKey(), accumulatorMapEntry.getValue()));
42 | }
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/model/Field.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model;
16 |
17 | import com.google.common.base.MoreObjects;
18 | import java.io.Serializable;
19 | import java.util.Arrays;
20 | import java.util.Objects;
21 |
22 | /** A Field records a feature column name, description and type. */
23 | public class Field implements Serializable {
24 |
25 | private final String name;
26 | private final String description;
27 | private final String type;
28 |
29 | public Field(String name, String description, String type) {
30 | this.name = name;
31 | this.description = description;
32 | this.type = type;
33 | }
34 |
35 | public String getName() {
36 | return name;
37 | }
38 |
39 | public String getDescription() {
40 | return description;
41 | }
42 |
43 | public String getType() {
44 | return type;
45 | }
46 |
47 | @Override
48 | public boolean equals(Object other) {
49 | if (this == other) {
50 | return true;
51 | }
52 | if (!(other instanceof Field)) {
53 | return false;
54 | }
55 | Field otherField = (Field) other;
56 | return Objects.equals(this.name, otherField.name)
57 | && Objects.equals(this.type, otherField.type)
58 | && Objects.equals(this.description, otherField.description);
59 | }
60 |
61 | @Override
62 | public int hashCode() {
63 | return Arrays.hashCode(new Object[] {name, description, type});
64 | }
65 |
66 | @Override
67 | public String toString() {
68 | return MoreObjects.toStringHelper(this)
69 | .addValue(name)
70 | .addValue(description)
71 | .addValue(type)
72 | .toString();
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/DateUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import java.time.Duration;
18 | import java.time.LocalDate;
19 | import java.time.format.DateTimeFormatter;
20 | import org.joda.time.Instant;
21 |
22 | /**
23 | * Utility class for parsing command line date strings into Instants.
24 | */
25 | final class DateUtil {
26 | private DateUtil() {
27 | }
28 |
29 | // Returns the given dateString in dd/MM/yyyy format as an Instant.
30 | private static Instant parseDateStringToInstantOrDie(String dateString) {
31 | try {
32 | return new Instant(
33 | 1000 * LocalDate.parse(dateString, DateTimeFormatter.ofPattern("dd/MM/yyyy")).toEpochDay()
34 | * Duration.ofDays(1).getSeconds());
35 | } catch (Exception e) {
36 | throw new RuntimeException(e);
37 | }
38 | }
39 |
40 | // Returns the given stateDateString in dd/MM/yyyy format as an Instant. If the input is null or
41 | // empty, returns the Epoch Instant. Dies if the input is invalid.
42 | public static Instant parseStartDateStringToInstant(String stateDateString) {
43 | Instant startInstant = new Instant(0);
44 | if (stateDateString != null && !stateDateString.isEmpty()) {
45 | startInstant = parseDateStringToInstantOrDie(stateDateString);
46 | }
47 | return startInstant;
48 | }
49 |
50 | // Returns the given endDateString in dd/MM/yyyy format as an Instant. If the input is null or
51 | // empty, returns the maximum possible Instant. Dies if the input is invalid.
52 | public static Instant parseEndDateStringToInstant(String endDateString) {
53 | Instant endInstant = new Instant(Long.MAX_VALUE);
54 | if (endDateString != null && !endDateString.isEmpty()) {
55 | endInstant = parseDateStringToInstantOrDie(endDateString);
56 | }
57 | return endInstant;
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/MapFactToTableRow.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.api.services.bigquery.model.TableFieldSchema;
18 | import com.google.api.services.bigquery.model.TableRow;
19 | import com.google.api.services.bigquery.model.TableSchema;
20 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.Fact;
21 | import java.util.Arrays;
22 | import org.apache.beam.sdk.transforms.DoFn;
23 |
24 | /**
25 | * Converts a Fact to a BigQuery TableRow.
26 | */
27 | public class MapFactToTableRow extends DoFn {
28 |
29 | // Returns the table schema for Fact TableRows.
30 | public static TableSchema getTableSchema() {
31 | TableSchema schema = new TableSchema();
32 | schema.setFields(Arrays.asList(
33 | new TableFieldSchema().setName("sessionId").setType("STRING"),
34 | new TableFieldSchema().setName("userId").setType("STRING"),
35 | new TableFieldSchema().setName("timeInMillis").setType("INTEGER"),
36 | new TableFieldSchema().setName("name").setType("STRING"),
37 | new TableFieldSchema().setName("value").setType("STRING"),
38 | new TableFieldSchema().setName("hasPositiveLabel").setType("BOOLEAN")));
39 | return schema;
40 | }
41 |
42 | @ProcessElement
43 | public void processElement(ProcessContext context) {
44 | Fact fact = context.element();
45 | TableRow tablerow = new TableRow();
46 | tablerow.set("sessionId", fact.getSessionId());
47 | tablerow.set("userId", fact.getUserId());
48 | tablerow.set("timeInMillis", fact.getTime().getMillis());
49 | tablerow.set("name", fact.getName());
50 | tablerow.set("value", fact.getValue());
51 | tablerow.set("hasPositiveLabel", fact.getHasPositiveLabel());
52 | context.output(tablerow);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/MapSessionToFacts.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.Fact;
18 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.Session;
19 | import org.apache.beam.sdk.transforms.DoFn;
20 |
21 | /**
22 | * Maps a Session into its Facts.
23 | */
24 | public class MapSessionToFacts extends DoFn {
25 | @ProcessElement
26 | public void processElement(ProcessContext context) {
27 | Session session = context.element();
28 | for (Fact fact : session.getFacts()) {
29 | context.output(fact);
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/MapSortedSessionsIntoLookbackWindows.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.LookbackWindow;
18 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.Session;
19 | import java.util.List;
20 | import org.apache.beam.sdk.options.ValueProvider;
21 | import org.apache.beam.sdk.transforms.DoFn;
22 | import org.apache.beam.sdk.values.KV;
23 |
24 | /**
25 | * Parent class for mapping a user's sessions time into LookbackWindows.
26 | */
27 | public class MapSortedSessionsIntoLookbackWindows extends DoFn<
28 | KV>, LookbackWindow> {
29 | protected ValueProvider snapshotStartDateProvider;
30 | protected ValueProvider snapshotEndDateProvider;
31 | protected ValueProvider lookbackGapInSecondsProvider;
32 | protected ValueProvider windowTimeInSecondsProvider;
33 | protected ValueProvider minimumLookaheadTimeInSecondsProvider;
34 | protected ValueProvider maximumLookaheadTimeInSecondsProvider;
35 | protected ValueProvider stopOnFirstPositiveLabelProvider;
36 |
37 | public MapSortedSessionsIntoLookbackWindows(
38 | ValueProvider snapshotStartDate,
39 | ValueProvider snapshotEndDate,
40 | ValueProvider lookbackGapInSeconds,
41 | ValueProvider windowTimeInSeconds,
42 | ValueProvider minimumLookaheadTimeInSeconds,
43 | ValueProvider maximumLookaheadTimeInSeconds,
44 | ValueProvider stopOnFirstPositiveLabel) {
45 | snapshotStartDateProvider = snapshotStartDate;
46 | snapshotEndDateProvider = snapshotEndDate;
47 | lookbackGapInSecondsProvider = lookbackGapInSeconds;
48 | windowTimeInSecondsProvider = windowTimeInSeconds;
49 | minimumLookaheadTimeInSecondsProvider = minimumLookaheadTimeInSeconds;
50 | maximumLookaheadTimeInSecondsProvider = maximumLookaheadTimeInSeconds;
51 | stopOnFirstPositiveLabelProvider = stopOnFirstPositiveLabel;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/MapUserActivityToTableRow.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.api.services.bigquery.model.TableFieldSchema;
18 | import com.google.api.services.bigquery.model.TableRow;
19 | import com.google.api.services.bigquery.model.TableSchema;
20 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.UserActivity;
21 | import java.util.Arrays;
22 | import org.apache.beam.sdk.transforms.DoFn;
23 |
24 | /**
25 | * Converts a UserActivity to a BigQuery TableRow if the user has had some activity as of the
26 | * UserActivty's snapshotTime.
27 | */
28 | public class MapUserActivityToTableRow extends DoFn {
29 |
30 | // Returns the table schema for UserActivity TableRows.
31 | public static TableSchema getTableSchema() {
32 | TableSchema schema = new TableSchema();
33 | schema.setFields(Arrays.asList(
34 | new TableFieldSchema().setName("userId").setType("STRING"),
35 | new TableFieldSchema().setName("hasPositiveLabel").setType("BOOLEAN"),
36 | new TableFieldSchema().setName("daysSinceStartDate").setType("INTEGER"),
37 | new TableFieldSchema().setName("daysSinceFirstActivity").setType("INTEGER"),
38 | new TableFieldSchema().setName("daysSinceLatestActivity").setType("INTEGER"),
39 | new TableFieldSchema().setName("snapshotTimeInMillis").setType("INTEGER")));
40 | return schema;
41 | }
42 |
43 | @ProcessElement
44 | public void processElement(ProcessContext context) {
45 | UserActivity userActivity = context.element();
46 | if (userActivity.getDurationSinceFirstActivity() == null) {
47 | return;
48 | }
49 | TableRow tablerow = new TableRow();
50 | tablerow.set("userId", userActivity.getUserId());
51 | tablerow.set("hasPositiveLabel", userActivity.getHasPositiveLabel());
52 | tablerow.set("daysSinceStartDate", userActivity.getDurationSinceStartDate().getStandardDays());
53 | tablerow.set(
54 | "daysSinceFirstActivity", userActivity.getDurationSinceFirstActivity().getStandardDays());
55 | tablerow.set(
56 | "daysSinceLatestActivity", userActivity.getDurationSinceLatestActivity().getStandardDays());
57 | tablerow.set("snapshotTimeInMillis", userActivity.getSnapshotTime().getMillis());
58 | context.output(tablerow);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/MapUserIdToSession.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.Session;
18 | import org.apache.beam.sdk.transforms.DoFn;
19 | import org.apache.beam.sdk.values.KV;
20 |
21 | /**
22 | * Maps from a Session to pairs of (userId, Session), so that Sessions can be grouped by userId.
23 | */
24 | public class MapUserIdToSession extends DoFn> {
25 | public MapUserIdToSession() {
26 | }
27 |
28 | @ProcessElement
29 | public void processElement(ProcessContext context) {
30 | Session session = context.element();
31 | context.output(KV.of(session.getUserId(), session));
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/SortSessionsByTime.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.Session;
18 | import java.util.ArrayList;
19 | import java.util.Collections;
20 | import java.util.Comparator;
21 | import java.util.List;
22 | import org.apache.beam.sdk.transforms.DoFn;
23 | import org.apache.beam.sdk.values.KV;
24 |
25 | /**
26 | * Given a collection of Sessions for a user, outputs the Sessions sorted by time.
27 | */
28 | public class SortSessionsByTime extends DoFn<
29 | KV>, KV>> {
30 |
31 | public SortSessionsByTime() {
32 | }
33 |
34 | @ProcessElement
35 | public void processElement(ProcessContext context) {
36 | KV> kv = context.element();
37 | ArrayList sessions = new ArrayList<>();
38 | for (Session session : kv.getValue()) {
39 | sessions.add(session);
40 | }
41 | if (sessions.isEmpty()) {
42 | return;
43 | }
44 | Collections.sort(sessions, new Comparator() {
45 | @Override
46 | public int compare(Session lhs, Session rhs) {
47 | int result = lhs.getLastHitTime().compareTo(rhs.getLastHitTime());
48 | if (result == 0) {
49 | return lhs.getVisitStartTime().compareTo(rhs.getVisitStartTime());
50 | }
51 | return result;
52 | }
53 | });
54 | context.output(KV.of(kv.getKey(), sessions));
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/SortedSessionsUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.model.Session;
18 | import java.util.ArrayList;
19 | import org.joda.time.Duration;
20 | import org.joda.time.Instant;
21 |
22 | /**
23 | * Utility functions for processing Sessions sorted by time.
24 | */
25 | public class SortedSessionsUtil {
26 |
27 | private SortedSessionsUtil() {
28 | }
29 |
30 | // Returns the collection of positive label time instants for the given Sessions between the
31 | // given start (inclusive) and end instant (not inclusive).
32 | public static ArrayList getPositiveLabelTimes(
33 | ArrayList sessions, Instant startTime, Instant endTime) {
34 | ArrayList positiveLabelTimes = new ArrayList<>();
35 | for (Session session : sessions) {
36 | if (!session.hasPositiveLabel()) {
37 | continue;
38 | }
39 | if (session.getVisitStartTime().isBefore(startTime)
40 | || !session.getLastHitTime().isBefore(endTime)) {
41 | continue;
42 | }
43 | positiveLabelTimes.add(session.getVisitStartTime());
44 | }
45 | return positiveLabelTimes;
46 | }
47 |
48 | // Returns the first Instant in the collection between the start and end time inclusive.
49 | // Assumes the given instants collection is in sorted order.
50 | public static Instant getFirstInstantInInterval(
51 | ArrayList instants, Instant start, Instant finish) {
52 | for (Instant instant : instants) {
53 | if (instant.isAfter(finish)) {
54 | break;
55 | }
56 | if (!instant.isBefore(start)) {
57 | return instant;
58 | }
59 | }
60 | return null;
61 | }
62 |
63 | // Returns the day offset from Epoch for the given instant.
64 | public static long getEpochDay(Instant instant) {
65 | return Duration.millis(instant.getMillis()).getStandardDays();
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/ml-data-windowing-pipeline/transform/ValidateGATableRow.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.corp.gtech.ads.datacatalyst.components.mldatawindowingpipeline.transform;
16 |
17 | import com.google.api.services.bigquery.model.TableRow;
18 | import org.apache.beam.sdk.transforms.DoFn;
19 |
20 | /**
21 | * Validates the BigQuery TableRows contains required field names.
22 | */
23 | public class ValidateGATableRow extends DoFn {
24 | public ValidateGATableRow() {
25 | }
26 |
27 | // Converts BigQuery TableRows from Google Analytics to Sessions.
28 | @ProcessElement
29 | public void processElement(ProcessContext context) {
30 | TableRow tablerow = context.element();
31 | if (!tablerow.containsKey("fullVisitorId")
32 | || !tablerow.containsKey("visitId")
33 | || !tablerow.containsKey("visitStartTime")) {
34 | throw new IllegalArgumentException(String.format(
35 | "Input tablerow [%s] is missing at least one of the following required fields: %s",
36 | tablerow, "fullVisitorId, visitId or visitStartTime"));
37 | }
38 | context.output(tablerow);
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/marketing-analytics/predicting/tensorflow-lifetime-value/README.md:
--------------------------------------------------------------------------------
1 | This folder refers to a solution published in another Github repository about predicting Customer Lifetime Value using two main approaches:
2 |
3 | 1. RFM statistical models
4 | 2. Neural network models
5 |
6 | You can find the code at [https://github.com/GoogleCloudPlatform/tensorflow-lifetime-value](https://github.com/GoogleCloudPlatform/tensorflow-lifetime-value)
7 |
8 | The Github repository is a companion code to a four-part series that discusses how you can predict customer lifetime value (CLV) by using AI Platform (AI Platform) on Google Cloud. The articles in this series include the following:
9 |
10 | - [Part 1: Introduction](https://cloud.google.com/solutions/machine-learning/clv-prediction-with-offline-training-intro). Introduces customer lifetime value (CLV) and two modeling techniques for predicting CLV.
11 | - [Part 2: Training the model](https://cloud.google.com/solutions/machine-learning/clv-prediction-with-offline-training-train). Discusses how to prepare the data and train the models.
12 | - [Part 3: Deploying to production](https://cloud.google.com/solutions/machine-learning/clv-prediction-with-offline-training-deploy). Describes how to deploy the models discussed in Part 2 to a production system.
13 | - [Part 4: Using AutoML Tables](https://cloud.google.com/solutions/machine-learning/clv-prediction-with-automl-tables). Shows how to use AutoML Tables to build and deploy a model.
14 |
15 | Disclaimer: This is not an official Google product.
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/README.md:
--------------------------------------------------------------------------------
1 | This repository exists to facilitate the access to BigQuery queries on various data sources.
2 |
3 | Several of those data sources are imported into BigQuery using [BigQuery Data Transfer Service](https://cloud.google.com/bigquery/transfer/) but can also be done using various [BigQuery loading tools](https://cloud.google.com/bigquery/docs/loading-data).
4 |
5 | Note: This is not an officially supported Google product.
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/dv360/click_through_rate.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2018 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https://www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | -- This script computes the impression and click count as well as the
16 | -- click-through rate per ad. It uses the impression ID field to join the
17 | -- impression and click table.
18 | WITH
19 | ads AS (
20 | SELECT
21 | Ad_ID,
22 | ANY_VALUE(Ad) As Ad
23 | FROM
24 | `project.dataset.match_table_ads`
25 | GROUP BY
26 | Ad_ID)
27 | SELECT
28 | impressions.Ad_ID,
29 | ANY_VALUE(ads.Ad) AS Ad,
30 | COUNT(*) AS impression_count,
31 | COUNTIF(clicks.Impression_ID IS NOT NULL) AS click_count,
32 | COUNTIF(clicks.Impression_ID IS NOT NULL) / COUNT(*) AS click_through_rate
33 | FROM
34 | `project.dataset.impression` AS impressions
35 | LEFT JOIN
36 | `project.dataset.click` AS clicks
37 | ON
38 | impressions.Impression_ID = clicks.Impression_ID
39 | JOIN
40 | ads
41 | ON
42 | impressions.Ad_ID = ads.Ad_ID
43 | WHERE
44 | impressions._DATA_DATE BETWEEN DATE(2018, 5, 10)
45 | AND DATE(2018, 6, 10)
46 | GROUP BY
47 | impressions.Ad_ID
48 | ORDER BY
49 | click_through_rate DESC
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/dv360/date_range.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2018 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https://www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | -- This script obtains the partition and event date ranges for each of the view,
16 | -- click, and activity tables. It is meant to give you a range of dates that you
17 | -- can use in the other queries to process only limited data.
18 | SELECT
19 | "VIEW" AS Event_Type,
20 | FORMAT_DATE("%F", MIN(_DATA_DATE)) AS Start_Date_Partition,
21 | FORMAT_DATE("%F", MAX(_DATA_DATE)) AS End_Date_Partition,
22 | FORMAT_TIMESTAMP("%F %T", TIMESTAMP_MICROS(MIN(Event_Time))) AS Start_Date_Event,
23 | FORMAT_TIMESTAMP("%F %T", TIMESTAMP_MICROS(MAX(Event_Time))) AS End_Date_Event
24 | FROM
25 | `project.dataset.impression`
26 | UNION ALL
27 | SELECT
28 | "CLICK" AS Event_Type,
29 | FORMAT_DATE("%F", MIN(_DATA_DATE)) AS Start_Date_Partition,
30 | FORMAT_DATE("%F", MAX(_DATA_DATE)) AS End_Date_Partition,
31 | FORMAT_TIMESTAMP("%F %T", TIMESTAMP_MICROS(MIN(Event_Time))) AS Start_Date_Event,
32 | FORMAT_TIMESTAMP("%F %T", TIMESTAMP_MICROS(MAX(Event_Time))) AS End_Date_Event
33 | FROM
34 | `project.dataset.click`
35 | UNION ALL
36 | SELECT
37 | "CONVERSION" AS Event_Type,
38 | FORMAT_DATE("%F", MIN(_DATA_DATE)) AS Start_Date_Partition,
39 | FORMAT_DATE("%F", MAX(_DATA_DATE)) AS End_Date_Partition,
40 | FORMAT_TIMESTAMP("%F %T", TIMESTAMP_MICROS(MIN(Event_Time))) AS Start_Date_Event,
41 | FORMAT_TIMESTAMP("%F %T", TIMESTAMP_MICROS(MAX(Event_Time))) AS End_Date_Event
42 | FROM
43 | `project.dataset.activity`
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/dv360/dbm_data_dbm_device_type.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2018 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https://www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | -- I want to learn...
16 | -- How do I determine whether an event happened on mobile vs desktop for DBM impressions?
17 | --
18 | -- What can I do?
19 | -- Add mobile vs desktop breakouts in your analyses.
20 | --
21 | -- Tips
22 | -- * Include this with other queries to get performance, reach, and more by DBM Device Type.
23 |
24 | SELECT
25 | DBM_Device_Type,
26 | CASE
27 | WHEN DBM_Device_Type=0 THEN "Computer"
28 | WHEN DBM_Device_Type=1 THEN "Other"
29 | WHEN DBM_Device_Type=2 THEN "Smartphone"
30 | WHEN DBM_Device_Type=3 THEN "Tablet"
31 | WHEN DBM_Device_Type=4 THEN "Smart TV"
32 | END DBM_Device_Type_Name,
33 | COUNT(*) AS Impressions
34 | FROM `project.dataset.impression`
35 | WHERE DBM_Advertiser_ID IS NOT NULL
36 | GROUP BY DBM_Device_Type, DBM_Device_Type_Name
37 | ORDER BY Impressions DESC
38 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/dv360/effective_cpa.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2018 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https://www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | -- This script can be used to compute the effective CPA for any activity and any
16 | -- DBM line item. Currently in the DBM UI it is only possible to report on the
17 | -- eCPA for line items and activities that have been explictly linked for bid
18 | -- optimization. WIth this script you can report on all line items and activities.
19 | SELECT
20 | DBM_Line_Item_ID,
21 | ANY_VALUE(DBM_Insertion_Order_ID) AS DBM_Insertion_Order_ID,
22 | SUM(DBM_Billable_Cost_USD) / 1000000000 AS Total_Cost_USD,
23 | COUNTIF(activities.DBM_Auction_ID IS NOT NULL) AS activity_count,
24 | (SUM(DBM_Billable_Cost_USD) / COUNTIF(activities.DBM_Auction_ID IS NOT NULL)) / 1000000000 AS eCPA
25 | FROM
26 | `project.dataset.impression` AS impressions
27 | LEFT JOIN
28 | `project.dataset.activity` AS activities
29 | ON
30 | impressions.DBM_Auction_ID = activities.DBM_Auction_ID
31 | AND
32 | _DATA_DATE BETWEEN DATE(2018, 5, 10) AND DATE(2018, 6, 10)
33 | WHERE
34 | DBM_Billable_Cost_USD != 0
35 | GROUP BY
36 | DBM_Line_Item_ID
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/dv360/extract_variables_values_in_activity_file.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2018 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https://www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | -- I want to learn...
16 | -- How do I extract additional values that I pass through u-variables in conversion data?
17 | --
18 | -- What can I do?
19 | -- Analyze conversion data at a more granular level based on client-provided data. For example, analyze the most common conversion paths for a specific product.
20 | --
21 | -- Tips
22 | -- Use REGEXP_EXTRACT to extract u-variable data from the “Other_Data” field. Create new columns for each extracted u-variable.
23 |
24 | SELECT
25 | TRIM(REGEXP_EXTRACT(Other_Data, r"u4=(.+?);")) AS Product_Purchased,
26 | COUNT(*) AS Conversions
27 | FROM `project.dataset.activity`
28 | WHERE Activity_ID IN ("12345","56789")
29 | GROUP BY 1
30 | ORDER BY 2 DESC
31 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/dv360/performance_aggregated_click_n_conversion_rates.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2018 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https://www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | -- I want to learn...
16 | -- How do I join impression, click, and conversion data at an aggregated level (e.g. campaign level)?
17 | --
18 | -- What can I do?
19 | -- Automate custom reporting tables that power business insights dashboards.
20 | --
21 | -- Tips
22 | -- * Try replicating this method at other aggregation levels such as Advertiser, Site, Placement, Ad, Creative, etc.
23 | -- * Matching to the activity table using Advertiser ID, Campaign ID, Site, ID, etc. will assume your default attribution methodology and lookback windows like in DCM UI Reporting.
24 | -- * This method is the simplest method to join impression, click, and activity tables, but is less flexible. Other join methods include joining with Impression ID or User ID (see following slides).
25 |
26 | WITH impression_data AS (
27 | SELECT
28 | Campaign_ID,
29 | COUNT(*) AS Impressions
30 | FROM `project.dataset.impression`
31 | GROUP BY Campaign_ID
32 | ),
33 |
34 | click_data AS (
35 | SELECT
36 | Campaign_ID,
37 | COUNT(*) AS Clicks
38 | FROM `project.dataset.click`
39 | GROUP BY Campaign_ID
40 | ),
41 |
42 | conversion_data AS (
43 | SELECT
44 | Campaign_ID,
45 | COUNT(*) AS Conversions
46 | FROM `project.dataset.activity`
47 | WHERE Activity_ID IN ("12345","67890")
48 | GROUP BY Campaign_ID
49 | )
50 |
51 | SELECT
52 | *,
53 | Clicks/Impressions AS Click_Rate,
54 | Conversions/Impressions AS Conversion_Rate
55 | FROM impression_data
56 | LEFT JOIN click_data USING(Campaign_ID)
57 | LEFT JOIN conversion_data USING(Campaign_ID)
58 | ORDER BY Impressions DESC
59 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/3_days_using_union.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This is using the Google analytics sample data set
18 | -- This script aggregates 3 days worth of data (visits, pageviews, transactions, revenue) into one table using a UNION ALL
19 |
20 | WITH ga_tables AS (
21 | SELECT
22 | date,
23 | SUM(totals.visits) AS visits,
24 | SUM(totals.pageviews) AS pageviews,
25 | SUM(totals.transactions) AS transactions,
26 | SUM(totals.transactionRevenue)/1000000 AS revenue
27 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20160801`
28 | GROUP BY date
29 |
30 | UNION ALL
31 |
32 | SELECT
33 | date,
34 | SUM(totals.visits) AS visits,
35 | SUM(totals.pageviews) AS pageviews,
36 | SUM(totals.transactions) AS transactions,
37 | SUM(totals.transactionRevenue)/1000000 AS revenue
38 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20160802`
39 | GROUP BY date
40 |
41 | UNION ALL
42 |
43 | SELECT
44 | date,
45 | SUM(totals.visits) AS visits,
46 | SUM(totals.pageviews) AS pageviews,
47 | SUM(totals.transactions) AS transactions,
48 | SUM(totals.transactionRevenue)/1000000 AS revenue
49 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20160803`
50 | GROUP BY date
51 |
52 | )
53 | SELECT
54 | date,
55 | visits,
56 | pageviews,
57 | transactions,
58 | revenue,
59 | FROM ga_tables
60 | ORDER BY date ASC
61 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/avg_amount_of_money_per_session.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script calculates the average amount of money spent per session
19 |
20 | WITH Sessions AS (
21 | SELECT
22 | fullVisitorId,
23 | SUM(totals.visits) AS total_visits_per_user,
24 | SUM(totals.transactionRevenue) AS total_transactionrevenue_per_user
25 | FROM
26 | `bigquery-public-data.google_analytics_sample.ga_sessions_*`
27 | WHERE
28 | _TABLE_SUFFIX BETWEEN '20170701' AND '20170731'
29 | AND totals.visits > 0
30 | AND totals.transactions >= 1
31 | AND totals.transactionRevenue IS NOT NULL
32 | GROUP BY fullVisitorId
33 | )
34 | SELECT
35 | (SUM(total_transactionrevenue_per_user/1e6) /
36 | SUM(total_visits_per_user)) AS avg_revenue_by_user_per_visit
37 | FROM Sessions
38 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/avg_bounce_rate_per_traffic_source.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script calculates the average bounce rate per traffic source
19 |
20 | SELECT
21 | source,
22 | total_visits,
23 | total_no_of_bounces,
24 | ( ( total_no_of_bounces / total_visits ) * 100 ) AS bounce_rate
25 | FROM (
26 | SELECT
27 | trafficSource.source AS source,
28 | COUNT ( trafficSource.source ) AS total_visits,
29 | SUM ( totals.bounces ) AS total_no_of_bounces
30 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
31 | WHERE
32 | _TABLE_SUFFIX BETWEEN '20170701' AND '20170731'
33 | GROUP BY
34 | source )
35 | ORDER BY
36 | total_visits DESC
37 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/avg_product_pageviews_by_non_purchasers.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script calculates the average number of product page views for users who did not make a purchase
19 |
20 | SELECT
21 | ( SUM(total_pagesviews_per_user) / COUNT(users) ) AS avg_pageviews_per_user
22 | FROM (
23 | SELECT
24 | fullVisitorId AS users,
25 | SUM(totals.pageviews) AS total_pagesviews_per_user
26 | FROM`bigquery-public-data.google_analytics_sample.ga_sessions_*`
27 | WHERE
28 | _TABLE_SUFFIX BETWEEN '20170701' AND '20170731'
29 | AND
30 | totals.transactions IS NULL
31 | GROUP BY
32 | users )
33 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/avg_product_pageviews_by_purchaser_type.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script calculates the average number of product page views by purchaser type (purchasers vs non-purchasers)
19 |
20 |
21 | SELECT
22 | ( SUM(total_pagesviews_per_user) / COUNT(users) ) AS avg_pageviews_per_user
23 | FROM
24 | SELECT
25 | fullVisitorId AS users,
26 | SUM(totals.pageviews) AS total_pagesviews_per_user
27 | FROM`bigquery-public-data.google_analytics_sample.ga_sessions_*`
28 | WHERE
29 | _TABLE_SUFFIX BETWEEN '20170701' AND '20170731'
30 | AND
31 | totals.transactions >=1
32 | GROUP BY
33 | users )
34 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/avg_transactions_per_purchaser.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script calculates the average number of transactions per purchaser
19 |
20 | SELECT
21 | (SUM (total_transactions_per_user) / COUNT(fullVisitorId) ) AS avg_total_transactions_per_user
22 | FROM (
23 | SELECT
24 | fullVisitorId,
25 | SUM (totals.transactions) AS total_transactions_per_user
26 | FROM
27 | `bigquery-public-data.google_analytics_sample.ga_sessions_*`
28 | WHERE
29 | _TABLE_SUFFIX BETWEEN '20170701' AND '20170731'
30 | AND totals.transactions IS NOT NULL
31 | GROUP BY
32 | fullVisitorId )
33 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/last_1095_days_using_table_suffix.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script aggregates 1095 days worth of data (visits, pageviews, transactions, revenue) into one table using _TABLE_SUFFIX
19 |
20 | SELECT
21 | date,
22 | SUM(totals.visits) AS visits,
23 | SUM(totals.pageviews) AS pageviews,
24 | SUM(totals.transactions) AS transactions,
25 | SUM(totals.transactionRevenue)/1000000 AS revenue
26 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
27 | WHERE
28 | _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1095 DAY))
29 | AND
30 | FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
31 | GROUP BY date
32 | ORDER BY date ASC
33 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/last_36_months_using_table_suffix.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script aggregates the last 36 months worth of data (visits, pageviews, transactions, revenue) into one table using _TABLE_SUFFIX
19 |
20 | SELECT
21 | date,
22 | SUM(totals.visits) AS visits,
23 | SUM(totals.pageviews) AS pageviews,
24 | SUM(totals.transactions) AS transactions,
25 | SUM(totals.transactionRevenue)/1000000 AS revenue
26 | FROM
27 | (TABLE_DATE_RANGE([bigquery-public-data.google_analytics_sample.ga_sessions_],
28 | DATE_ADD(CURRENT_TIMESTAMP(), -36, 'MONTH'), DATE_ADD(CURRENT_TIMESTAMP(), -1, 'DAY')))
29 | GROUP BY
30 | date
31 | ORDER BY
32 | date ASC
33 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/last_3_years_plus_today_using_union_all_and_table_suffix:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script aggregates the last 3 years worth of data including today (visits, pageviews, transactions, revenue) into one table using UNION ALL and _TABLE_SUFFIX
19 |
20 | WITH ga_tables AS ( SELECT
21 | date,
22 | SUM(totals.visits) AS visits,
23 | SUM(totals.pageviews) AS pageviews,
24 | SUM(totals.transactions) AS transactions,
25 | SUM(totals.transactionRevenue)/1000000 AS revenue
26 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
27 | WHERE
28 | _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 3 YEAR))
29 | AND
30 | FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
31 | GROUP BY date
32 | UNION ALL
33 |
34 | SELECT
35 | date,
36 | SUM(totals.visits) AS visits,
37 | SUM(totals.pageviews) AS pageviews,
38 | SUM(totals.transactions) AS transactions,
39 | SUM(totals.transactionRevenue)/1000000 AS revenue
40 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
41 | WHERE
42 | _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
43 | AND
44 | FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
45 | GROUP BY date
46 | )
47 | SELECT
48 | date,
49 | visits,
50 | pageviews,
51 | transactions,
52 | revenue,
53 | FROM ga_tables
54 | ORDER BY date ASC
55 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/last_3_years_using_table_suffix.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script aggregates the last 3 years worth of data (visits, pageviews, transactions, revenue) into one table using _TABLE_SUFFIX
19 |
20 | SELECT
21 | date,
22 | SUM(totals.visits) AS visits,
23 | SUM(totals.pageviews) AS pageviews,
24 | SUM(totals.transactions) AS transactions,
25 | SUM(totals.transactionRevenue)/1000000 AS revenue
26 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
27 | WHERE
28 | _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 3 YEAR))
29 | AND
30 | FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
31 | GROUP BY date
32 | ORDER BY date ASC
33 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/sequence_of_hits.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script calculates the sequence of hits
19 |
20 | SELECT
21 | fullVisitorId,
22 | visitId,
23 | visitNumber,
24 | hits.hitNumber AS hitNumber,
25 | hits.page.pagePath AS pagePath
26 | FROM
27 | `bigquery-public-data.google_analytics_sample.ga_sessions_*`,
28 | UNNEST(hits) as hits
29 | WHERE
30 | _TABLE_SUFFIX BETWEEN '20170701' AND '20170731'
31 | AND
32 | hits.type="PAGE"
33 | ORDER BY
34 | fullVisitorId,
35 | visitId,
36 | visitNumber,
37 | hitNumber
38 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/specific_date_range_using_table_suffix.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script aggregates data for a specific date range (visits, pageviews, transactions, revenue) into one table using _TABLE_SUFFIX
19 |
20 | SELECT
21 | date,
22 | SUM(totals.visits) AS visits,
23 | SUM(totals.pageviews) AS pageviews,
24 | SUM(totals.transactions) AS transactions,
25 | SUM(totals.transactionRevenue)/1000000 AS revenue
26 | FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
27 | WHERE
28 | _TABLE_SUFFIX BETWEEN '20160801' AND '20170731'
29 | GROUP BY date
30 | ORDER BY date ASC
31 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/bigquery-exports-queries/google/ga360/total_transactions_per_device.sql:
--------------------------------------------------------------------------------
1 | #standardSQL
2 |
3 | -- Copyright 2018 Google LLC
4 | --
5 | -- Licensed under the Apache License, Version 2.0 (the "License");
6 | -- you may not use this file except in compliance with the License.
7 | -- You may obtain a copy of the License at
8 | --
9 | -- https://www.apache.org/licenses/LICENSE-2.0
10 | --
11 | -- Unless required by applicable law or agreed to in writing, software
12 | -- distributed under the License is distributed on an "AS IS" BASIS,
13 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | -- See the License for the specific language governing permissions and
15 | -- limitations under the License.
16 |
17 | -- This example is using the Google analytics sample data set
18 | -- This script calculates the total transactions per device for a given date range
19 |
20 | SELECT
21 | (SUM (total_transactions_per_user) / COUNT(fullVisitorId) ) AS avg_total_transactions_per_user
22 | FROM (
23 | SELECT
24 | fullVisitorId,
25 | SUM (totals.transactions) AS total_transactions_per_user
26 | FROM
27 | `bigquery-public-data.google_analytics_sample.ga_sessions_*`
28 | WHERE
29 | _TABLE_SUFFIX BETWEEN '20170701' AND '20170731'
30 | AND totals.transactions IS NOT NULL
31 | GROUP BY
32 | fullVisitorId )
33 |
--------------------------------------------------------------------------------
/marketing-analytics/understanding/oculi/README.md:
--------------------------------------------------------------------------------
1 | **Note:** This is not an officially supported Google product. It is a reference
2 | implementation.
3 |
4 | # Oculi
5 |
6 | Oculi is a Google Cloud-based pipeline for tagging large sets of images or
7 | videos with labels based on their content, generating a BigQuery dataset for
8 | further analysis. Content tagging is done through Cloud's pre-trained computer
9 | vision models (Vision API and Video Intelligence API).
10 |
11 | The primary use case is for analyzing creatives (images and videos) in digital
12 | advertising. Combined with creative performance data, the output from this
13 | pipeline can be used to explore correlations between advertising content and
14 | performance (e.g. creatives with a human model tend to perform better).
15 |
16 | **Oculi is available in its own repository at [github.com/google/oculi](https://github.com/google/oculi).**
17 |
--------------------------------------------------------------------------------
/marketing-technology/README.md:
--------------------------------------------------------------------------------
1 | Folder for Marketing Technology sample codes.
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/00_prework/README.md:
--------------------------------------------------------------------------------
1 | # Prework (optional)
2 |
3 | ## Files
4 | ### load_no_create.sh:
5 | This file
6 | 1. Downloads the file from gs://[YOUR_PREFIX]-gcs-pixel-tracking
7 | 2. Uses that file to attack through Vegeta
8 |
9 | Note: Targets.txt is about 12MB but the download should not take more than 1 sec from GCS to Pods. This is faster than creating a file every time we create a pod
10 |
11 | It will be called using ENTRYPOINT in the Docker file and its parameters will be passed using args from the Deployments. Passed parameters are:
12 | - V_DURATION: This is the time that the attack will last for
13 | - V_RATE: This is the amount of requests sent per second
14 | - TARGETS_FILE: This is the location of the file container the urls to attack similar to "GET http://LB_IP_OR_DOMAIN/pixel.png?params"
15 |
16 | ### Dockerfile
17 | The Dockerfile will help us create the container that we need. It will use golang base image and we will:
18 | 1. Install Vegeta
19 | 2. Launch the load script through ENTRYPOINT so we will be able to send it some parameters through the replication controller
20 |
21 | To make it available through your gcr.io:
22 | ```
23 | docker build --no-cache -t USERNAME/vegeta docker
24 | docker tag matthieum/vegeta gcr.io/CLOUD_REPOSITORY/vegeta
25 | gcloud docker -- push gcr.io/CLOUD_REPOSITORY/vegeta
26 | ```
27 |
28 | The one that we make available for this tutorial is available at gcr.io/cloud-solutions-images/vegeta
29 |
30 | If you want to test it Locally
31 | ```
32 | docker build --no-cache -t vegeta docker
33 | docker run vegeta 1s 1000 gcs-pixel-tracking/targets.txt
34 | ```
35 |
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/00_prework/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | FROM golang
15 |
16 | MAINTAINER Matthieu Mayran "mayran@google.com"
17 |
18 | RUN go get github.com/tsenart/vegeta
19 |
20 | ADD load_no_create.sh /usr/local/bin
21 |
22 | ENTRYPOINT ["sh", "-C", "/usr/local/bin/load_no_create.sh"]
23 |
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/00_prework/docker/load_no_create.sh:
--------------------------------------------------------------------------------
1 | #!bin/bash
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | V_DURATION="$1" #5s
16 | V_RATE=$2 #1000
17 | TARGETS_FILE=$3 #gcs-pixel-tracking
18 |
19 | echo $V_DURATION
20 | echo $V_RATE
21 | echo $TARGETS_FILE
22 |
23 | curl https://storage.googleapis.com/$TARGETS_FILE > /tmp/targets.txt
24 |
25 | vegeta attack -targets=/tmp/targets.txt -duration=$V_DURATION -rate=$V_RATE | tee /tmp/report_targets.bin | vegeta report
26 | cat /tmp/report_targets.bin | vegeta report -reporter=plot >| /tmp/plot_targets.html
27 |
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/01_prep/create_targets.sh:
--------------------------------------------------------------------------------
1 | #!bin/bash
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # bash create_targets.sh LB_IP NB_URLS OUTPUT_FILE
17 |
18 | PIXEL_DOMAIN_OR_IP="$1"
19 | NB_URLS=$2
20 | OUTPUT="$3"
21 |
22 | DATA_PATH=data
23 | rm $3
24 | u=1
25 | while [[ $u -le $NB_URLS ]]
26 | do
27 | # Create the simple params
28 | PAGE_NAME=$(gshuf -n 1 "$DATA_PATH/page_names.txt")
29 | EVENT=$(gshuf -n 1 "$DATA_PATH/events.txt")
30 |
31 | # Create the url param http%3A%2F%2Fexample.com%2Fcart
32 | RANDOM_DOMAIN=$(gshuf -n 1 "$DATA_PATH/domains.txt")
33 | PAGE_URL="http%3A%2F%2F$RANDOM_DOMAIN%2F$PAGE_NAME"
34 |
35 | # Create the product string which has at least one product for the product
36 | # page and an extra possibl3 if in page related to cart
37 | #declare -a PAGES_NEED_PRODUCT=(cart shopping_cart checkout products)
38 | PAGES_NEED_PRODUCT[0]="cart"
39 | PAGES_NEED_PRODUCT[1]="shopping_cart"
40 | PAGES_NEED_PRODUCT[2]="checkout"
41 | PAGES_NEED_PRODUCT[3]="products"
42 |
43 | if [[ " ${PAGES_NEED_PRODUCT[*]} " == *"$PAGE_NAME"* ]]; then
44 | RANDOM_PRODUCT=$(gshuf -n 1 "$DATA_PATH/products.txt")
45 | PRODUCT="&pr=$RANDOM_PRODUCT;"
46 | if [ "$PAGE_NAME" != "products" ]; then
47 | # Add random amount of product (between 1 and 3)
48 | ADD_X_PRODUCTS=$(( ( RANDOM % 3 ) + 1 ))
49 | RANDOM_X_PRODUCTS=$(gshuf -n $ADD_X_PRODUCTS "$DATA_PATH/products.txt")
50 | for i in ${RANDOM_X_PRODUCTS[@]};
51 | do
52 | PRODUCT+="${i};"
53 | done
54 | fi
55 | # Remove the last ";"
56 | PRODUCT=${PRODUCT%?}
57 | fi
58 |
59 | # Create a random user id between 10000 and 100000
60 | USER_ID=$(( ( RANDOM%90000 ) + 10000 ))
61 | echo "GET http://$PIXEL_DOMAIN_OR_IP/pixel.png?uid=$USER_ID&pn=$PAGE_NAME&purl=$PAGE_URL&e=${EVENT}${PRODUCT}" >> $3
62 | ((u = u + 1))
63 | done
64 |
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/01_prep/data/domains.txt:
--------------------------------------------------------------------------------
1 | hinneng.com
2 | berpoe.net
3 | watifart.org
4 | prishis.fr
5 | ansignt.com
6 | bidept.com
7 | stinalrat.net
8 | butince.biz
9 | volings.com
10 | wardern.org
11 | grodark.com
12 | sibibus.biz
13 | chumbing.net
14 | recapok.fr
15 | wollearl.ca
16 | coatchis.ie
17 | parectier.ie
18 | muludifte.fr
19 | linewite.com
20 | nateark.net
21 | jecult.com
22 | figive.ie
23 | distioun.ca
24 | niethas.fr
25 | ocitand.net
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/01_prep/data/events.txt:
--------------------------------------------------------------------------------
1 | pv
2 | pl
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/01_prep/data/page_names.txt:
--------------------------------------------------------------------------------
1 | cart
2 | welcome
3 | home
4 | shopping_cart
5 | checkout
6 | product
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/01_prep/data/products.txt:
--------------------------------------------------------------------------------
1 | ozerwarm
2 | zaamcom
3 | stringplus
4 | roundin
5 | strongdanstock
6 | zertraxfan
7 | sandom
8 | movehotlux
9 | stimflex
10 | solzamin
11 | zontone
12 | ranfix
13 | meding
14 | ontocore
15 | kan-tom
16 | funlex
17 | holdbam
18 | quotop
19 | blackron
20 | hotfresh
21 | vivatonit
22 | conron
23 | geokix
24 | silvertam
25 | tanfresh
26 | groovesoft
27 | zerlotzap
28 | ecodonhold
29 | toplux
30 | lightflex
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/01_prep/targets_sample.txt:
--------------------------------------------------------------------------------
1 | GET http://IP_OF_LB/pixel.png?uid=19679&pn=checkout&purl=http%3A%2F%2Fnateark.net%2Fcheckout&e=pl&pr=stringplus;hotfresh
2 | GET http://IP_OF_LB/pixel.png?uid=29281&pn=welcome&purl=http%3A%2F%2Fnateark.net%2Fwelcome&e=pl&pr=stringplus;hotfresh
3 | GET http://IP_OF_LB/pixel.png?uid=12772&pn=home&purl=http%3A%2F%2Fmuludifte.fr%2Fhome&e=pv&pr=stringplus;hotfresh
4 | GET http://IP_OF_LB/pixel.png?uid=29313&pn=home&purl=http%3A%2F%2Fjecult.com%2Fhome&e=pl&pr=stringplus;hotfresh
5 | GET http://IP_OF_LB/pixel.png?uid=38594&pn=checkout&purl=http%3A%2F%2Fwollearl.ca%2Fcheckout&e=pl&pr=vivatonit;zontone;tanfresh
6 | GET http://IP_OF_LB/pixel.png?uid=12292&pn=cart&purl=http%3A%2F%2Fjecult.com%2Fcart&e=pv&pr=tanfresh;zontone;sandom
7 | GET http://IP_OF_LB/pixel.png?uid=32965&pn=welcome&purl=http%3A%2F%2Fbidept.com%2Fwelcome&e=pl&pr=tanfresh;zontone;sandom
8 | GET http://IP_OF_LB/pixel.png?uid=28108&pn=checkout&purl=http%3A%2F%2Fberpoe.net%2Fcheckout&e=pv&pr=movehotlux;ozerwarm
9 | GET http://IP_OF_LB/pixel.png?uid=37628&pn=cart&purl=http%3A%2F%2Fprishis.fr%2Fcart&e=pl&pr=groovesoft;ozerwarm;zaamcom;conron
10 | GET http://IP_OF_LB/pixel.png?uid=34312&pn=checkout&purl=http%3A%2F%2Focitand.net%2Fcheckout&e=pl&pr=stringplus;lightflex;quotop
11 |
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/02_load/load_scaleup.sh:
--------------------------------------------------------------------------------
1 | #!bin/bash
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # This is based on the ReplicationController args (currently 30000 urls in 30s at 1000/s)
17 | # We want to get to 100000/s so we will need to go up to 100 pods with doubling every 30s
18 | NB_PODS=100
19 | COUNTER=1
20 | SLEEP=15 #It takes a while to create the target.txt file
21 | while [ $COUNTER -le $NB_PODS ];
22 | do
23 | kubectl scale deployment vegeta --replicas=$COUNTER
24 | echo $COUNTER
25 | if [ $COUNTER -eq $NB_PODS ]
26 | then
27 | echo 'break'
28 | break
29 | fi
30 | sleep $(($COUNTER*$SLEEP))
31 | COUNTER=$((2*$COUNTER))
32 |
33 | if [ $COUNTER -gt $NB_PODS ]
34 | then
35 | COUNTER=$NB_PODS
36 | fi
37 |
38 | done
39 |
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/02_load/vegeta-deployment.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | kind: Deployment
16 | apiVersion: extensions/v1beta1
17 | metadata:
18 | name: vegeta
19 | spec:
20 | template:
21 | metadata:
22 | labels:
23 | run: vegeta
24 | spec:
25 | containers:
26 | - name: vegeta
27 | image: gcr.io/cloud-solutions-images/vegeta:latest
28 | resources:
29 | limits:
30 | cpu: 400m
31 | args:
32 | - "0" # The load will last forever
33 | - "1000" # We attack 1000 urls per second per pod
34 | - "mam-ext/cdn_load.txt"
35 | #- "[YOUR_PREFIX]-gcs-pixel-tracking/targets.txt" # Targets
36 |
--------------------------------------------------------------------------------
/marketing-technology/gcs-pixel-tracking/CONTRIBUTING:
--------------------------------------------------------------------------------
1 | Want to contribute? Great! First, read this page (including the small print at the end).
2 |
3 | ### Before you contribute
4 | Before we can use your code, you must sign the
5 | [Google Individual Contributor License Agreement]
6 | (https://cla.developers.google.com/about/google-individual)
7 | (CLA), which you can do online. The CLA is necessary mainly because you own the
8 | copyright to your changes, even after your contribution becomes part of our
9 | codebase, so we need your permission to use and distribute your code. We also
10 | need to be sure of various other things—for instance that you'll tell us if you
11 | know that your code infringes on other people's patents. You don't have to sign
12 | the CLA until after you've submitted your code for review and a member has
13 | approved it, but you must do it before we can put your code into our codebase.
14 | Before you start working on a larger contribution, you should get in touch with
15 | us first through the issue tracker with your idea so that we can help out and
16 | possibly guide you. Coordinating up front makes it much easier to avoid
17 | frustration later on.
18 |
19 | ### Code reviews
20 | All submissions, including submissions by project members, require review. We
21 | use Github pull requests for this purpose.
22 |
23 | ### The small print
24 | Contributions made by corporations are covered by a different agreement than
25 | the one above, the
26 | [Software Grant and Corporate Contributor License Agreement]
27 | (https://cla.developers.google.com/about/google-corporate).
28 |
--------------------------------------------------------------------------------