├── .gitattributes
├── .gitignore
├── 03-ingestion-gcs-to-bq
    ├── README.md
    ├── cloudbuild.yaml
    ├── config.yaml
    ├── crm_permissions.csv
    ├── crm_permissions_20210708.csv
    ├── crm_users_20210708.csv
    ├── main.py
    └── requirements.txt
├── 04-storage
    ├── airflow-gar-agg.py
    ├── dataflow-udf-ga4.js
    ├── diagrams.R
    ├── ga4-agg.sql
    └── ga4-bigquery-buildtrigger.yml
├── 06-activation
    ├── bigquery-clientid.R
    ├── gtm-ss-http-to-pubsub.js
    ├── http-to-pubsub.py
    ├── send_email.R
    └── user-activity-ga4.sql
├── 08-segmentation
    ├── bq-to-firestore
    │   ├── bigquery-parsing-workflow.yaml
    │   ├── bq-gcs-workflow.yaml
    │   ├── bq-pagination-workflows.yaml
    │   ├── bq-row-to-fs.py
    │   ├── bq-to-firestore.yaml
    │   ├── bq-to-fs-main-workflow.yaml
    │   └── connector_write_firestore_workflow.yaml
    └── crm_imports
    │   ├── crm_fake_data.R
    │   └── fake_crm.csv
├── 09-realtime-forecasting
    └── importing-tidy-ga4-data.R
├── admin
    └── image-figs.R
├── code-examples.Rproj
├── figure-log.csv
└── gar_email
    └── Dockerfile


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 08-segmentation/crm_imports/fake_crm.csv
6 | 08-segmentation/crm_imports/ga4-demo-cids.csv
7 | 08-segmentation/crm_imports/fake_crm.csv
8 | 


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/README.md:
--------------------------------------------------------------------------------
1 | # Cloud Function - Google Cloud Storage to BigQuery
2 | 
3 | This is an example of a cloud function that can be used to trigger BigQuery imports when specified files hit Cloud Storage.
4 | 
5 | It uses Cloud Build to deploy to Cloud Functions upon each git commit.
6 | 
7 | 


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | steps:
 2 | - name: gcr.io/cloud-builders/gcloud
 3 |   args: ['functions',
 4 |          'deploy',
 5 |          'gcs_to_bq',
 6 |          '--source=03-ingestion-gcs-to-bq',
 7 |          '--runtime=python39',
 8 |          '--region=europe-west1',
 9 |          '--trigger-resource=marks-crm-imports-2021',
10 |          '--trigger-event=google.storage.object.finalize']
11 | 


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/config.yaml:
--------------------------------------------------------------------------------
 1 | project: learning-ga4
 2 | datasetid: crm_imports
 3 | schema:
 4 |   crm_bookings:
 5 |     fields:
 6 |         - name: BOOK_ID
 7 |           type: STRING
 8 |         - name: BOOKING_ACTIVE
 9 |           type: STRING
10 |         - name: BOOKING_DEPOSIT
11 |           type: STRING
12 |         - name: DATE
13 |           type: STRING
14 |         - name: DEPARTURE_DATE
15 |           type: STRING
16 |   crm_permissions:
17 |     fields:
18 |         - name: USER_ID
19 |           type: STRING
20 |         - name: PERMISSION
21 |           type: STRING
22 |         - name: STATUS
23 |           type: STRING
24 |         - name: SOURCE
25 |           type: STRING
26 |         - name: PERMISSION_DATE
27 |           type: STRING
28 |   crm_sales:
29 |     fields:
30 |         - name: SALES_ID
31 |           type: STRING
32 |         - name: SALES_EMAIL
33 |           type: STRING
34 |         - name: SALES_FIRST_NAME
35 |           type: STRING
36 |         - name: SALES_LAST_NAME
37 |           type: STRING


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/crm_permissions.csv:
--------------------------------------------------------------------------------
1 | USER_ID,PERMISSION,STATUS,SOURCE,PERMISSION_DATE
2 | AB12345,Marketing1,True,Email,2021-01-21
3 | AB34252,Marketing3,True,Website,2020-12-02
4 | RF45343,-,False,-,-


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/crm_permissions_20210708.csv:
--------------------------------------------------------------------------------
1 | USER_ID,PERMISSION,STATUS,SOURCE,PERMISSION_DATE
2 | AB12345,Marketing1,True,Email,2021-01-21
3 | AB34252,Marketing3,True,Website,2020-12-02
4 | RF45343,-,False,-,-


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/crm_users_20210708.csv:
--------------------------------------------------------------------------------
1 | USER_ID,EMAIL,TOTAL_LIFETIME_REVENUE
2 | AB12345,david@email.com,56789
3 | AB34252,sanne@freeemail.com,34234
4 | RF45343,rose@medson.com,23123


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import logging
  4 | import re
  5 | import datetime
  6 | from google.cloud import bigquery
  7 | from google.cloud.bigquery import LoadJobConfig
  8 | from google.cloud.bigquery import SchemaField
  9 | import google.cloud.logging
 10 | 
 11 | # set up logging https://cloud.google.com/logging/docs/setup/python
 12 | client = google.cloud.logging.Client()
 13 | client.get_default_handler()
 14 | client.setup_logging()
 15 | 
 16 | # load config.yaml into config
 17 | config_file = "config.yaml"
 18 | 
 19 | if os.path.isfile(config_file):
 20 |   with open("config.yaml", "r") as stream:
 21 |       try:
 22 |           config = yaml.safe_load(stream)
 23 |       except yaml.YAMLError as exc:
 24 |           logging.error(exc)
 25 | else:
 26 |   logging.error("config.yaml needs to be added")
 27 | 
 28 | # create a list of SchemaField objects from a schema config.yaml file
 29 | def create_schema(schema_config):
 30 | 
 31 |     SCHEMA = []
 32 |     for scheme in schema_config:
 33 | 
 34 |         if 'description' in scheme:
 35 |             description = scheme['description']
 36 |         else:
 37 |             description = ''
 38 | 
 39 |         if 'mode' in scheme:
 40 |             mode = scheme['mode']
 41 |         else:
 42 |             mode = 'NULLABLE'
 43 | 
 44 |         try:
 45 |             assert isinstance(scheme['name'], str)
 46 |             assert isinstance(scheme['type'], str)
 47 |             assert isinstance(mode, str)
 48 |             assert isinstance(description, str)
 49 |         except AssertionError as e:
 50 |             logging.info(
 51 |                 'Error in schema: name {} - type {} - mode - {} description {}'.format(scheme['name'], scheme['type'], mode, description))
 52 |             break
 53 | 
 54 |         entry = SchemaField(name=scheme['name'],
 55 |                             field_type=scheme['type'],
 56 |                             mode=mode,
 57 |                             description=description)
 58 |         SCHEMA.append(entry)
 59 |         
 60 |     logging.debug('SCHEMA created {}'.format(SCHEMA))
 61 | 
 62 |     return SCHEMA
 63 | 
 64 | 
 65 |     
 66 | def make_tbl_name(table_id, schema=False):
 67 | 
 68 |     t_split = table_id.split('_20')
 69 | 
 70 |     name = t_split[0]
 71 |     
 72 |     if schema: return name
 73 | 
 74 |     suffix = ''.join(re.findall('\d\d', table_id)[0:4])
 75 | 
 76 |     return name + '$' + suffix
 77 |     
 78 | 
 79 | def query_schema(table_id, job_config):
 80 | 
 81 |     schema_name = make_tbl_name(table_id, schema=True)
 82 | 
 83 |     logging.info('Looking for schema_name: {} for import: {}'.format(schema_name, table_id))
 84 |     # if we have no configuration attempt auto-detection
 85 |     # recommended only for development tables
 86 |     if schema_name not in config['schema']:
 87 |         logging.info('No config found. Using auto detection of schema')
 88 |         job_config.autodetect = True
 89 |         return job_config
 90 | 
 91 |     logging.info('Found schema for ' + schema_name)
 92 | 
 93 |     schema_config = config['schema'][schema_name]['fields']
 94 | 
 95 |     job_config.schema = create_schema(schema_config)
 96 | 
 97 |     # standard csv load behaviour can be defined here
 98 |     job_config.quote_character = '"'
 99 |     job_config.skip_leading_rows = 1
100 |     job_config.field_delimiter = ','
101 |     job_config.allow_quoted_newlines = True
102 | 
103 |     return job_config
104 | 
105 | def load_gcs_bq(uri, table_id, project, dataset_id):
106 | 
107 |     client = bigquery.Client(project=project)
108 |     dataset_ref = client.dataset(dataset_id)
109 | 
110 |     # Change the below configuration according to your import needs
111 |     job_config = LoadJobConfig()
112 |     job_config.source_format = bigquery.SourceFormat.CSV
113 |     job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
114 |     job_config.encoding = bigquery.Encoding.UTF_8
115 |     job_config.time_partitioning = bigquery.TimePartitioning()
116 | 
117 |     job_config = query_schema(table_id, job_config)
118 | 
119 |     table_name = make_tbl_name(table_id)
120 |     table_ref = dataset_ref.table(table_name)
121 | 
122 |     job = client.load_table_from_uri(
123 |         uri,
124 |         table_ref,
125 |         location='EU',
126 |         job_config=job_config)  # API request
127 | 
128 | 
129 | 
130 | def gcs_to_bq(data, context):
131 |     """Background Cloud Function to be triggered by Cloud Storage.
132 |        This functions constructs the file URI and uploads it to BigQuery.
133 | 
134 |     Args:
135 |         data (dict): The Cloud Functions event payload.
136 |         context (google.cloud.functions.Context): Metadata of triggering event.
137 |     Returns:
138 |         None; the output is written to Stackdriver Logging
139 |     """
140 | 
141 |     object_name = data['name']
142 |     project = config['project']
143 |     dataset_id = config['datasetid']
144 | 
145 |     if object_name:
146 |         # create a bigquery table related to the filename
147 |         table_id = os.path.splitext(os.path.basename(object_name))[0].replace('.','_')
148 |         uri = 'gs://{}/{}'.format(data['bucket'], object_name)
149 | 
150 |         load_gcs_bq(uri, table_id, project, dataset_id)
151 | 
152 |     else:
153 |         logging.info('Nothing to load')
154 | 
155 |     return


--------------------------------------------------------------------------------
/03-ingestion-gcs-to-bq/requirements.txt:
--------------------------------------------------------------------------------
1 | google-cloud-bigquery==2.20.0
2 | google-cloud-logging==2.5.0
3 | pyyaml==5.4.1
4 | 


--------------------------------------------------------------------------------
/04-storage/airflow-gar-agg.py:
--------------------------------------------------------------------------------
 1 | from airflow.contrib.operators.bigquery_operator import BigQueryOperator
 2 | from airflow.contrib.operators.bigquery_check_operator import BigQueryCheckOperator
 3 | from airflow.operators.dummy_operator import DummyOperator
 4 | from airflow import DAG
 5 | from airflow.utils.dates import days_ago
 6 | import datetime
 7 | 
 8 | VERSION = '0.1.7' # increment this each version of the DAG
 9 | 
10 | DAG_NAME = 'ga4-transformation-' + VERSION
11 | 
12 | default_args = {
13 |     'start_date': days_ago(1),  # change this to a fixed date for backfilling
14 |     'email_on_failure': True,
15 |     'email': 'mark@example.com',
16 |     'email_on_retry': False,
17 |     'depends_on_past': False,
18 |     'retries': 3,
19 |     'retry_delay': datetime.timedelta(minutes=10),
20 |     'project_id': 'learning-ga4',
21 |     'execution_timeout': datetime.timedelta(minutes=60)
22 | }
23 | 
24 | schedule_interval = '2 4 * * *'  # min, hour, day of month, month, day of week
25 | 
26 | dag = DAG(DAG_NAME, default_args=default_args, schedule_interval=schedule_interval)
27 | 
28 | 
29 | start = DummyOperator(
30 |     task_id='start',
31 |     dag=dag
32 | )
33 | 
34 | # uses the Airflow macro {{ ds_nodash }} to insert todays date in YYYYMMDD form
35 | check_table = BigQueryCheckOperator(
36 |     task_id='check_table',
37 |     dag=dag,
38 |     sql='''
39 |     SELECT count(1) > 5000 
40 |     FROM `learning-ga4.analytics_250021309.events_{{ ds_nodash }}`"
41 |     '''
42 | )
43 | 
44 | checked = DummyOperator(
45 |     task_id='checked',
46 |     dag=dag
47 | )
48 | 
49 | # a function so you can loop over many tables, SQL files
50 | def make_bq(table_id):
51 | 
52 |     task = BigQueryOperator(
53 |         task_id='make_bq_'+table_id,
54 |         write_disposition='WRITE_TRUNCATE',
55 |         create_disposition='CREATE_IF_NEEDED',
56 |         destination_dataset_table='learning_ga4.ga4_aggregations.{}${{ ds_nodash}}'.format(table_id),
57 |         sql='./ga4_sql/{}.sql'.format(table_id),
58 |         use_legacy_sql=False,
59 |         dag=dag
60 |     )
61 | 
62 |     return task
63 | 
64 | ga_tables = [
65 |   'pageview-aggs',
66 |   'ga4-join-crm',
67 |   'ecom-fields'
68 | ]
69 | 
70 | ga_aggregations = [] # helpful if you are doing other downstream transformations
71 | for table in ga_tables:
72 |   task = make_bq(table)
73 |   checked >> task
74 |   ga_aggregations.append(task)
75 | 
76 | 
77 | # create the DAG 
78 | start >> check_table >> checked


--------------------------------------------------------------------------------
/04-storage/dataflow-udf-ga4.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A transform function which filters out fields starting with x-ga
 3 |  * @param {string} inJson
 4 |  * @return {string} outJson
 5 |  */
 6 |  function transform(inJson) {
 7 |     var obj = JSON.parse(inJson);
 8 |     var keys = Object.keys(obj);
 9 |     var outJson = {};
10 | 
11 |     // don't output keys that starts with x-ga
12 |     var outJson = keys.filter(function(key) {
13 |         return !key.startsWith('x-ga');
14 |     }).reduce(function(acc, key) {
15 |         acc[key] = obj[key];
16 |         return acc;
17 |     }, {});
18 |     
19 |     return JSON.stringify(outJson);
20 |   }


--------------------------------------------------------------------------------
/04-storage/diagrams.R:
--------------------------------------------------------------------------------
 1 | DiagrammeR::mermaid("
 2 | graph LR
 3 |   import_ga4-->tidy_ga4
 4 |   tidy_ga4-->operations_dashboard
 5 |   import_crm-->tidy_crm
 6 |   tidy_crm-->join_data
 7 |   tidy_ga4-->join_data
 8 |   join_data-->marketing_data
 9 |   join_data-->sales_data
10 |   join_data-->retention_data
11 |   marketing_data-->web_enrichment
12 |   web_enrichment-->user_api
13 |   marketing_data-->marketing_dashboard_data
14 |   marketing_data-->csuite_dashboard
15 |   sales_data-->csuite_dashboard
16 |   retention_data-->csuite_dashboard
17 | ")
18 | 
19 | 


--------------------------------------------------------------------------------
/04-storage/ga4-agg.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     -- event_date (the date on which the event was logged)
 3 |     parse_date('%Y%m%d',event_date) as event_date,
 4 |     -- event_timestamp (in microseconds, utc)
 5 |     timestamp_micros(event_timestamp) as event_timestamp,
 6 |     -- event_name (the name of the event)
 7 |     event_name,
 8 |     -- event_key (the event parameter's key)
 9 |     (SELECT key FROM UNNEST(event_params) WHERE key = 'page_location') as event_key,
10 |     -- event_string_value (the string value of the event parameter)
11 |     (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') as event_string_value
12 | FROM
13 |     -- your GA4 exports - change to your location
14 |     `mark-edmondson-gde.analytics_206670707.events_*` 
15 | WHERE
16 |     -- limits query to use table from yesterday only
17 |     _table_suffix = format_date('%Y%m%d',date_sub(current_date(), interval 1 day))
18 |     and event_name = 'page_view' 


--------------------------------------------------------------------------------
/04-storage/ga4-bigquery-buildtrigger.yml:
--------------------------------------------------------------------------------
1 | steps:
2 | - name: 'gcr.io/cloud-builders/gcloud'
3 |   entrypoint: 'bash'
4 |   dir: '04-storage'
5 |   args: ['-c',
6 |          "bq --location=eu --project_id=$PROJECT_ID query --use_legacy_sql=false --destination_table=tidydata.ga4_pageviews$$${(date '+%Y%m%d')} < ./ga4-agg.sql"]
7 | 


--------------------------------------------------------------------------------
/06-activation/bigquery-clientid.R:
--------------------------------------------------------------------------------
 1 | library(bigQueryR)
 2 | options(googleAuthR.scopes.selected = "https://www.googleapis.com/auth/cloud-platform")
 3 | googleAuthR::gar_gce_auth()
 4 | 
 5 | # the GA4 dataset
 6 | bqr_global_project("mark-edmondson-gde")
 7 | bqr_global_dataset("analytics_206670707")
 8 | 
 9 | query_client_id <- function(client_id, sql_file){
10 |   
11 |   # read in SQL file and interpolate client_id
12 |   sql <- readChar(sql_file, file.size(sql_file))
13 |   sql_client_id <- sprintf(sql, client_id)
14 |   
15 |   results <- tryCatch(bqr_query(
16 |     query = sql_client_id,
17 |     useLegacySql=FALSE
18 |   ), error = function(err){
19 |     message(sql_client_id)
20 |     stop("Error in query:", results$error, results$message, call. = FALSE)
21 |   })
22 |   
23 |   str(results)
24 |   
25 |   message("Writing ", nrow(results), " rows to bigquery_results.csv")
26 |   write.csv(results, file = "/workspace/bigquery_results.csv", row.names = FALSE)
27 | 
28 |   
29 |   TRUE
30 |   
31 | }
32 | 
33 | client_id <- Sys.getenv("CLIENT_ID")
34 | if(nzchar(client_id)){
35 |   query_client_id(client_id, "/workspace/06-activation/user-activity-ga4.sql")
36 | } else {
37 |   stop("Could not find client_id")
38 | }
39 | 


--------------------------------------------------------------------------------
/06-activation/gtm-ss-http-to-pubsub.js:
--------------------------------------------------------------------------------
 1 | const getAllEventData = require('getAllEventData');
 2 | const log = require("logToConsole");
 3 | const JSON = require("JSON");
 4 | const sendHttpRequest = require('sendHttpRequest');
 5 | 
 6 | log(data);
 7 | 
 8 | const postBody = JSON.stringify(getAllEventData());
 9 | 
10 | log('postBody parsed to:', postBody);
11 | 
12 | const url = data.endpoint + '/' + data.topic_path
13 | 
14 | log('Sending event data to:' + url);
15 | 
16 | const options = {method: 'POST', 
17 |                  headers: {'Content-Type':'application/json'}};
18 | 
19 | // Sends a POST request
20 | sendHttpRequest(url, (statusCode) => {
21 |   if (statusCode >= 200 && statusCode < 300) {
22 |     data.gtmOnSuccess();
23 |   } else {
24 |     data.gtmOnFailure();
25 |   }
26 | }, options, postBody);
27 | 


--------------------------------------------------------------------------------
/06-activation/http-to-pubsub.py:
--------------------------------------------------------------------------------
 1 | import os, json
 2 | from google.cloud import pubsub_v1 # google-cloud-pubsub==2.8.0
 3 | 
 4 | def http_to_pubsub(request):
 5 |     request_json = request.get_json()
 6 |     request_args = request.args
 7 | 
 8 |     print('Request json: {}'.format(request_json))
 9 | 
10 |     if request_json:
11 |         res = trigger(json.dumps(request_json).encode('utf-8'), request.path)
12 |         return res
13 |     else:
14 |         return 'No data found', 204
15 | 
16 | 
17 | def trigger(data, topic_name):
18 |   publisher = pubsub_v1.PublisherClient()
19 | 
20 |   topic_name = 'projects/{project_id}/topics{topic}'.format(
21 |     project_id=os.getenv('GCP_PROJECT'),
22 |     topic=topic_name,
23 |   )
24 | 
25 |   print ('Publishing message to topic {}'.format(topic_name))
26 |   
27 |   # create topic if necessary
28 |   try:
29 |     future = publisher.publish(topic_name, data)
30 |     future_return = future.result()
31 |     print('Published message {}'.format(future_return))
32 | 
33 |     return future_return
34 | 
35 |   except Exception as e:
36 |     print('Topic {} does not exist? Attempting to create it'.format(topic_name))
37 |     print('Error: {}'.format(e))
38 | 
39 |     publisher.create_topic(name=topic_name)
40 |     print ('Topic created ' + topic_name)
41 | 
42 |     return 'Topic Created', 201


--------------------------------------------------------------------------------
/06-activation/send_email.R:
--------------------------------------------------------------------------------
 1 | library(blastula)
 2 | library(formattable)
 3 | 
 4 | the_data <- read.csv("/workspace/bigquery_results.csv")
 5 | 
 6 | if(nrow(the_data) < 1){
 7 |   stop("Data only one row, stopping")
 8 | }
 9 | 
10 | # Get a nicely formatted date/time string
11 | date_time <- add_readable_time()
12 | ga4_table <- format_table(the_data)
13 | 
14 | email <-
15 |   compose_email(
16 |     body = md(glue::glue(
17 |       "Hello,
18 |       
19 |   You requested your GA4 browsing history from Mark Edmondson's website.  Here it is!
20 |   {ga4_table}
21 |   
22 | ")),
23 | footer = md(glue::glue("Email sent on {date_time}."))
24 |   )
25 | 
26 | the_email <- Sys.getenv("EMAIL")
27 | 
28 | if(nzchar(the_email)){
29 |   email %>%
30 |     smtp_send(
31 |       to = the_email,
32 |       from = "me@markedmondson.me",
33 |       subject = "Your GA4 history for Mark Edmondson's blog",
34 |       credentials = creds_file("/workspace/blastula_gmail_creds")
35 |     )
36 | } else {
37 |   stop("Could not find email in EMAIL env var")
38 | }
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/06-activation/user-activity-ga4.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |     format_timestamp("%Y%m%d", timestamp_micros(event_timestamp)) as event_date,
 3 |     CAST(user_pseudo_id AS STRING) as cid,
 4 |     event_name,
 5 |     TIMESTAMP_MICROS(user_first_touch_timestamp) as first_touch,
 6 |     (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') as page_location,
 7 |     (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_referrer') as page_referrer
 8 | FROM
 9 |     `mark-edmondson-gde.analytics_206670707.events_*`
10 | WHERE
11 |     _table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval 90 day)) 
12 |        and format_date('%Y%m%d',date_sub(current_date(), interval 0 day))
13 |     and event_name = 'page_view' 
14 |     and user_pseudo_id = '%s'
15 | GROUP BY 1,2,3,4,5,6
16 | ORDER BY event_date


--------------------------------------------------------------------------------
/08-segmentation/bq-to-firestore/bigquery-parsing-workflow.yaml:
--------------------------------------------------------------------------------
 1 | map_bq_result:
 2 |   params: [row, names]
 3 |   steps:
 4 |     - init_cell_map:
 5 |       assign:
 6 |         - cell_map: []
 7 |     - processRow:
 8 |         for:
 9 |             value: cell
10 |             in: ${row.f}
11 |             index: i
12 |             steps:
13 |             - map_cell:
14 |                 assign: 
15 |                 - name: ${names[i]}
16 |                 - cell: {"${name}" : "${cell.v}"}
17 |                 - cell_map: ${cell_values} + ${cell}
18 |     - returnRowMap:
19 |         return: cell_map
20 | 


--------------------------------------------------------------------------------
/08-segmentation/bq-to-firestore/bq-gcs-workflow.yaml:
--------------------------------------------------------------------------------
 1 | - init:
 2 |     assign:
 3 |     - project_id: "learning-ga4"
 4 |     - bq_dataset_export: "firestore_export"
 5 |     - bq_table_export: "uid_"
 6 |     - bq_query: >
 7 |             select
 8 |                 user_id,
 9 |                 device_first,
10 |                 channel_grouping_first
11 |             from
12 |                 `stacktonic-cloud.st_core.dim_customer`
13 |     - gcs_bucket: "<your-export-bucket>"
14 |     - gcs_filepath: "firestore-export/firestore-export.json"
15 |     - pubsub_topic: "<your-pubsub-topic-name>"
16 |     - pubsub_message: {
17 |             "projectId": "<your-firestore-project-id>",
18 |             "bucketName": "<your-export-bucket>",
19 |             "bucketPath": "firestore-export/firestore-export.json",
20 |             "firestoreCollection": "<your-firestore-collection>",
21 |             "firestoreKey": "<your-key-to-use-as-firestore-document-id>"
22 |         }
23 | - bigquery-create-export-table:
24 |                 call: googleapis.bigquery.v2.jobs.insert
25 |                 args:
26 |                     projectId: ${project_id}
27 |                     body:
28 |                         configuration:
29 |                             query:
30 |                                 query: ${bq_query}
31 |                                 destinationTable:
32 |                                     projectId: ${project_id}
33 |                                     datasetId: ${bq_dataset_export}
34 |                                     tableId: ${bq_table_export}
35 |                                 create_disposition: "CREATE_IF_NEEDED"
36 |                                 write_disposition: "WRITE_TRUNCATE"
37 |                                 allowLargeResults: true
38 |                                 useLegacySql: false
39 | 
40 | - bigquery-table-to-gcs:
41 |     call: googleapis.bigquery.v2.jobs.insert
42 |     args:
43 |         projectId: ${project_id}
44 |         body:
45 |             configuration:
46 |                 extract:
47 |                     compression: NONE
48 |                     destinationFormat: "NEWLINE_DELIMITED_JSON"
49 |                     destinationUris: ['${"gs://" + gcs_bucket + "/" + gcs_filepath}']
50 |                     sourceTable:
51 |                         projectId: ${project_id}
52 |                         datasetId: ${bq_dataset_export}
53 |                         tableId: ${bq_table_export}
54 | - publish_message_to_pubsub:
55 |     call: googleapis.pubsub.v1.projects.topics.publish
56 |     args:
57 |       topic: ${"projects/" + project_id + "/topics/" + pubsub_topic}
58 |       body:
59 |         messages:
60 |           - data: ${base64.encode(json.encode(pubsub_message))}


--------------------------------------------------------------------------------
/08-segmentation/bq-to-firestore/bq-pagination-workflows.yaml:
--------------------------------------------------------------------------------
 1 | main:
 2 |   params: [input]
 3 |   steps:
 4 |     - init:
 5 |         assign:
 6 |           - pageToken: null
 7 |     - startQuery:
 8 |         call: googleapis.bigquery.v2.jobs.query
 9 |         args:
10 |           projectId: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
11 |           body:
12 |             useLegacySql: false
13 |             # Remove LIMIT from the query to iterate through all results
14 |             query: > 
15 |               SELECT name, SUM(number) AS total 
16 |               FROM `bigquery-public-data.usa_names.usa_1910_2013` 
17 |               GROUP BY name ORDER BY total DESC LIMIT 50
18 |         result: query
19 |     - getPage:
20 |         call: googleapis.bigquery.v2.jobs.getQueryResults
21 |         args:
22 |           projectId: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
23 |           jobId: ${query.jobReference.jobId}
24 |           maxResults: 500
25 |           pageToken: ${pageToken}
26 |         result: page
27 |     - processPage:
28 |         for:
29 |           value: row
30 |           in: ${page.rows}
31 |           steps:
32 |             - processRow:
33 |                 call: sys.log
34 |                 args:
35 |                   data: ${row}
36 |     - checkIfDone:
37 |         switch:
38 |           - condition: ${"pageToken" in page and page.pageToken != ""}
39 |             assign:
40 |               - pageToken: ${page.pageToken}
41 |             next: getPage


--------------------------------------------------------------------------------
/08-segmentation/bq-to-firestore/bq-row-to-fs.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import firebase_admin # 5.2.0 
 3 | from firebase_admin import credentials
 4 | from firebase_admin import firestore
 5 | import datetime
 6 | 
 7 | _MAX_LOSSLESS=9007199254740992
 8 | 
 9 | def export_fs(call, fs_collection_name, db, fs_fields):
10 | 
11 |   if len(fs_fields) != len(call):
12 |     return 'error: number of fields does not match number of fields in call'
13 | 
14 |   try:
15 |     id = call[0]
16 |     data = dict(zip(fs_fields, call))
17 |     db.collection(fs_collection_name).document(id).set(data)
18 |     return 'added {} to Firestore: {}' % id, datetime.datetime.now()
19 | 
20 |   except Exception as e:
21 |     return 'error: failed to add to Firestore: {}', datetime.datetime.now()
22 | 
23 | def fs_add(request):
24 |   try:
25 |     return_value = []
26 |     request_json = request.get_json()
27 |     calls = request_json['calls']
28 |     print('request: {}'.format(request_json))
29 |   except Exception as inst:
30 |     return json.dumps( { "errorMessage": 'something unexpected in input' } ), 400
31 | 
32 |   if request_json and 'userDefinedContext' in request_json:
33 |     userDefinedContext = request_json['userDefinedContext']
34 |     if 'fs_collection_name' in userDefinedContext:
35 |       fs_collection_name = userDefinedContext['fs_collection_name']
36 |     else:
37 |       return json.dumps( { "errorMessage": 'no fs_collection_name specified' } ), 400
38 | 
39 |     if 'fs_fields' in userDefinedContext:
40 |       fs_fields = userDefinedContext['fs_fields'].split(",")
41 |     else:
42 |       return json.dumps( { "errorMessage": 'no fs_fields specified' } ), 400
43 | 
44 |     if 'fs_project_id' in userDefinedContext:
45 |       fs_project_id = userDefinedContext['fs_project_id']
46 |     else:
47 |       return json.dumps( { "errorMessage": 'no fs_project_id specified' } ), 400
48 |   else:
49 |     return json.dumps( { "errorMessage": 'no userDefinedContext specified' } ), 400
50 | 
51 |   if len(fs_fields) == 0:
52 |     return json.dumps( { "errorMessage": 'length of fs_fields=0' } ), 400
53 |   
54 |   
55 |   # Use the application default credentials
56 |   cred = credentials.ApplicationDefault()
57 |   firebase_admin.initialize_app(cred, {
58 |     'projectId': fs_project_id,
59 |   })
60 | 
61 |   db = firestore.client()
62 | 
63 |   print('Authenticated')
64 |   print('fs_collection_name: {}' % fs_collection_name)
65 |   print('fs_fields: {}' % fs_fields)
66 |   print('fs_project_id: {}' % fs_project_id)
67 | 
68 | 
69 |   if len(calls) > 500:
70 |     return json.dumps( { "errorMessage": "too many rows - 500 limit" } ), 400
71 | 
72 |   for call in calls:
73 |     value = export_fs(call=call, fs_collection_name=fs_collection_name, db=db, fs_fields=fs_fields)
74 |     return_value.append(value)
75 | 
76 |   replies = [str(x) if x > _MAX_LOSSLESS or x < -_MAX_LOSSLESS else x for x in return_value]
77 |   return_json = json.dumps( { "replies" :  replies} )
78 |   return return_json
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 |   


--------------------------------------------------------------------------------
/08-segmentation/bq-to-firestore/bq-to-firestore.yaml:
--------------------------------------------------------------------------------
 1 | # iam roles: 
 2 | #  - bq job user
 3 | #  - bq data owner
 4 | #  - cloud datastore user
 5 | #  - logging admin
 6 | # workflows-bq-firestore@learning-ga4.iam.gserviceaccount.com
 7 | main:
 8 |     params: [args]
 9 |     steps:
10 |     - init_fs_writes:
11 |         assign:
12 |             - fs_writes: []
13 |             - page: ${args.page}
14 |             - schema_names: ${args.schema_names} 
15 |             - projectId: ${args.projectId} 
16 |             - collection: ${args.collection} 
17 |             - fs_key: ${args.fs_key}
18 |     - processPage:
19 |         for:
20 |           value: row
21 |           in: ${page.rows}
22 |           steps:
23 |             - parse_data:
24 |                 call: map_bq_result
25 |                 args:
26 |                     row: ${row}
27 |                     names: ${schema_names}
28 |                 result: bq_map
29 |             - list_to_dict:
30 |                   call: list_to_dict
31 |                   args:
32 |                     a_list: ${bq_map}
33 |                   result: bq_dict            
34 |             - assign_row_values:
35 |                 assign: 
36 |                     - fs_key_value: ${map.get(bq_dict, fs_key)}
37 |                     - document_id: ${fs_key_value.stringValue}
38 |                     - name: ${"projects/"+projectId+"/databases/(default)/documents/"+collection+"/"+document_id}
39 |                     - document: {update: {fields: '${bq_dict}', name: '${name}'}}
40 |                     - fs_writes: ${list.concat(fs_writes, document)}
41 |     - log_fs_writes:
42 |         call: sys.log
43 |         args:
44 |             data: ${fs_writes}
45 |     - batch_write_to_firestore:
46 |         call: googleapis.firestore.v1.projects.databases.documents.batchWrite
47 |         args:
48 |             database: ${"projects/"+projectId+"/databases/(default)"}
49 |             body:
50 |                 writes: ${fs_writes}
51 |         result: write_result
52 | 
53 | map_bq_result:
54 |     params: [row, names]
55 |     steps:
56 |         - init_cell_list:
57 |             assign:
58 |                 - cell_list: []
59 |         - processRow: 
60 |         # TODO: map different types to BigQuery schema
61 |         # https://cloud.google.com/firestore/docs/reference/rest/Shared.Types/ArrayValue#Value
62 |             for:
63 |                 value: cell
64 |                 in: ${row.f}
65 |                 index: i
66 |                 steps:
67 |                 - map_cell:
68 |                     assign: 
69 |                     - name: ${names[i]}
70 |                     - value: ${default(cell.v, "")}
71 |                     - cell: {"${name}" : { "stringValue": '${value}'}}
72 |                     - cell_list: ${list.concat(cell_list, cell)}
73 |         - returnRowList:
74 |             return: ${cell_list}
75 | 
76 | list_to_dict:
77 |     params: [a_list]
78 |     steps:
79 |       - init_dict: 
80 |           assign:
81 |             - the_dict: {}
82 |       - loop_list:
83 |           for:
84 |               value: entry
85 |               in: ${a_list}
86 |               steps:
87 |               - map_entry:
88 |                   assign:
89 |                   - the_name: ${keys(entry)[0]}
90 |                   - the_value: ${entry[the_name]}
91 |                   - the_dict[the_name]: ${the_value}   
92 |       - return_dict:
93 |           return: ${the_dict}
94 | 


--------------------------------------------------------------------------------
/08-segmentation/bq-to-firestore/bq-to-fs-main-workflow.yaml:
--------------------------------------------------------------------------------
  1 | # https://medium.com/google-cloud/parallel-executions-with-google-workflows-3a16f8fee0eb
  2 | # iam roles: 
  3 | #  - bq job user
  4 | #  - bq data owner
  5 | #  - cloud datastore user
  6 | #  - logging admin
  7 | #  - workflows invoker
  8 | # workflows-bq-firestore@learning-ga4.iam.gserviceaccount.com
  9 | main:
 10 |     params: [args]
 11 |     steps:
 12 |     - init:
 13 |         assign:
 14 |             - pageToken: null
 15 |             - default_query: >
 16 |                   SELECT A.name, A.crm_id, created, job,
 17 |                       sum(A.transactions) as sum_crm_trans, 
 18 |                       sum(A.revenue) as sum_crm_rev, 
 19 |                       sum(sum_web_trans) as sum_web_trans,
 20 |                       sum(sum_web_rev) as sum_web_rev,
 21 |                   FROM (
 22 |                       (SELECT * FROM `learning-ga4.crm_imports.fake_crm_transactions`) AS A
 23 |                   LEFT JOIN  
 24 |                       (SELECT user_pseudo_id,  
 25 |                               count(distinct ecommerce.transaction_id) as sum_web_trans,
 26 |                               sum(ecommerce.purchase_revenue_in_usd) as sum_web_rev,
 27 |                       FROM `learning-ga4.ga4_public_dataset.events_*`
 28 |                       GROUP BY 1) as B
 29 |                   ON B.user_pseudo_id = A.cid)
 30 |                   GROUP BY 1,2,3,4
 31 |                   LIMIT 2000  # for testing, in production remove LIMIT
 32 |             - bq_query: ${default(map.get(args, "bq_query"), default_query)}
 33 |             - projectId: ${default(map.get(args, "projectId"), sys.get_env("GOOGLE_CLOUD_PROJECT_ID"))}
 34 |             - collection: ${default(map.get(args, "collection"), "crm-import")}
 35 |             - fs_key: ${default(map.get(args, "fs_key"), "crm_id")}  # change to column holding firestore key
 36 |             - maximumBytesBilled: ${default(map.get(args, "maximumBytesBilled"), 1000000000)} #10GB
 37 |             - bq_page_size: ${default(map.get(args, "bq_page_size"), 50)} # firestore only accepts batches up to 500.
 38 |     - log_init:
 39 |         call: sys.log
 40 |         args:
 41 |             data: ${bq_query + projectId + collection + fs_key + string(maximumBytesBilled) + string(bq_page_size)}
 42 |     - startQuery:
 43 |         call: googleapis.bigquery.v2.jobs.insert
 44 |         args:
 45 |             projectId: ${projectId}
 46 |             body:
 47 |                 configuration:
 48 |                     query:
 49 |                         useLegacySql: false
 50 |                         maximumBytesBilled: ${maximumBytesBilled}
 51 |                         query: ${bq_query}
 52 |         result: query
 53 |     - getPage:
 54 |         call: googleapis.bigquery.v2.jobs.getQueryResults
 55 |         args:
 56 |             projectId: ${projectId}
 57 |             jobId: ${query.jobReference.jobId}
 58 |             maxResults: ${bq_page_size}
 59 |             pageToken: ${pageToken}
 60 |         result: page
 61 |     - extract_schema:
 62 |         call: extract_schema
 63 |         args:
 64 |             page: ${page}
 65 |         result: schema_names
 66 |     - log_schema:
 67 |         call: sys.log
 68 |         args:
 69 |             data: ${schema_names}
 70 |     - parallel-executor:
 71 |         call: googleapis.workflowexecutions.v1.projects.locations.workflows.executions.run
 72 |         args:
 73 |             workflow_id: bq-to-firestore
 74 |             argument: 
 75 |                 page: ${page}
 76 |                 schema_names: ${schema_names}
 77 |                 projectId: ${projectId}
 78 |                 collection: ${collection}
 79 |                 fs_key: ${fs_key}  # change to column holding firestore key
 80 |     - checkIfDone:
 81 |         switch:
 82 |             - condition: ${"pageToken" in page and page.pageToken != ""}
 83 |               assign:
 84 |                 - pageToken: ${page.pageToken}
 85 |               next: getPage
 86 | 
 87 | 
 88 | extract_schema:
 89 |     params: [page]
 90 |     steps:
 91 |         - init_schema_list:
 92 |             assign:
 93 |                 - schema_list: []
 94 |                 - i: 0
 95 |                 - array: ${page.schema.fields}
 96 |         - check_condition:
 97 |             switch:
 98 |                 - condition: ${len(array) > i}
 99 |                   next: iterate
100 |             next: exit_loop
101 |         - iterate:
102 |             assign:
103 |               - value: ${array[i].name}
104 |               - schema_list: ${list.concat(schema_list, value)}
105 |               - i: ${i+1}
106 |             next: check_condition
107 |         - exit_loop:
108 |             return: ${schema_list}


--------------------------------------------------------------------------------
/08-segmentation/bq-to-firestore/connector_write_firestore_workflow.yaml:
--------------------------------------------------------------------------------
 1 | # [START workflows_connector_write_firestore]
 2 | - init_variables:
 3 |     assign:
 4 |       - project: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
 5 |       - collection: "peopleDatabase"
 6 |       - document: "smith.j"
 7 |       - values_to_write:
 8 |           FirstName:
 9 |             stringValue: "John"
10 |           LastName:
11 |             stringValue: "Smith"
12 |           Age:
13 |             integerValue: 32
14 | - write_to_firestore:
15 |     call: googleapis.firestore.v1.projects.databases.documents.patch
16 |     args:
17 |       name: ${"projects/"+project+"/databases/(default)/documents/"+collection+"/"+document}
18 |       body:
19 |         fields: ${values_to_write}
20 |     result: write_result
21 | - last:
22 |     return: ${write_result.name}
23 | # [END workflows_connector_write_firestore]


--------------------------------------------------------------------------------
/08-segmentation/crm_imports/crm_fake_data.R:
--------------------------------------------------------------------------------
 1 | library(charlatan)  # generate fake names
 2 | library(bigQueryR)
 3 | 
 4 | bqr_global_project("learning-ga4")
 5 | bqr_global_dataset("crm_imports_us")
 6 | 
 7 | # downloaded from GA4 BigQuery demo dataset
 8 | cids <- read.csv(file = "08-segmentation/crm_imports/ga4-demo-cids.csv",
 9 |                 colClasses = "character")
10 | 
11 | # random order, only 50% of cids
12 | fake_logins <- cids[sample(nrow(cids)), , drop = FALSE]
13 | fake_logins <- head(fake_logins, nrow(fake_logins) / 2)
14 | fake_logins$crm_id <- sprintf("CRM%06d", seq.int(nrow(fake_logins)))
15 | 
16 | # distinct user_ids
17 | 
18 | fake_people <- nrow(fake_logins)
19 | ga4_last_date <- as.Date("2021-01-31")
20 | 
21 | fake <- ch_generate("name", "job", n = fake_people)
22 | 
23 | z <- DateTimeProvider$new()
24 | 
25 | fake$created_ts <- unlist(lapply(1:fake_people,
26 |                               function(x){
27 |                                 z$date_time_between(start_date = as.Date("2001-03-05"),
28 |                                                     end_date = ga4_last_date) 
29 |                               }))
30 | fake$created <- as.POSIXct(fake$created_ts, origin = "1970-01-01")
31 | 
32 | # make it more likely to transact if you have these jobs
33 | fake$bias <- grepl("teacher|researcher|academic|school|engine|doctor|prof|surgeon|phd|dr|science", 
34 |                    fake$job, ignore.case = TRUE)
35 | fake$transactions <- as.numeric(difftime(ga4_last_date, fake$created)) %/% 
36 |                                 runif(fake_people, 10000,90000)
37 | fake$transactions <- abs(ifelse(fake$bias, 
38 |                             round(fake$transactions*runif(fake_people, 1.1, 2)), 
39 |                             fake$transactions))
40 | 
41 | fake$revenue <- round(fake$transactions * runif(fake_people, 1,150),2)
42 | fake$permission <- as.logical(round(runif(fake_people, min = 0.4, max = 1)))
43 | fake$crm_id <- fake_logins$crm_id
44 | fake$cid <- as.character(fake_logins$user_pseudo_id)
45 | fake$bias <- NULL
46 | fake$created_ts <- NULL
47 | 
48 | filename <- "08-segmentation/crm_imports/fake_crm.csv"
49 | write.csv(fake, file = filename, row.names = FALSE)
50 | 
51 | # fake <- read.csv(filename,stringsAsFactors = FALSE, colClasses = "character")
52 | 
53 | bqr_auth(email = "email@example.com")
54 | bqr_global_project("learning-ga4")
55 | bqr_global_dataset("crm_imports_us")
56 | 
57 | bqr_delete_table(tableId = "fake_crm_transactions")
58 | bqr_create_table(tableId = "fake_crm_transactions",
59 |                  timePartitioning = TRUE)
60 | bqr_upload_data(tableId = "fake_crm_transactions",
61 |                 upload_data = fake)
62 | 


--------------------------------------------------------------------------------
/09-realtime-forecasting/importing-tidy-ga4-data.R:
--------------------------------------------------------------------------------
 1 | library(googleAnalyticsR)
 2 | library(rtweet)
 3 | 
 4 | ga4s <- ga_account_list("ga4")
 5 | 
 6 | gaid <- 206670707
 7 | meta <- ga_meta("data", propertyId = gaid)
 8 | 
 9 | article_reads <- ga_data(gaid,
10 |         metrics = "eventCount",
11 |         date_range = c("2021-07-01",as.character(Sys.Date())),
12 |         dimensions = c("date", "customEvent:category"),
13 |         orderBys = ga_data_order(+date),
14 |         dim_filters = ga_data_filter(!"customEvent:category" == c("(not set)","null")),
15 |         limit = -1)
16 | 
17 | library(tidyr)
18 | library(dplyr)
19 | 
20 | clean_cats <- article_reads |>
21 |         rename(category = "customEvent:category",
22 |                reads = "eventCount") |>
23 |         mutate(category = tolower(category)) |>
24 |         separate(category,
25 |                  into = paste0("category_",1:6),
26 |                  sep = "[^[:alnum:]-]+",
27 |                  fill = "right", extra = "drop")
28 | 
29 | long_cats <- clean_cats |>
30 |         pivot_longer(
31 |                 cols = starts_with("category_"),
32 |                 values_to = "categories",
33 |                 values_drop_na = TRUE
34 |         )
35 | 
36 | 
37 | agg_cats <- long_cats |>
38 |         group_by(date, categories) |>
39 |         summarise(category_reads = sum(reads), .groups = "drop_last") |>
40 |         arrange(date, desc(category_reads))
41 | 


--------------------------------------------------------------------------------
/admin/image-figs.R:
--------------------------------------------------------------------------------
 1 | # replace with the folder where your .asciidoc files are
 2 | book_folder <- "../learning-google-analytics/"
 3 | 
 4 | # list the .asciidoc files
 5 | chapters <- list.files(book_folder,pattern = "asciidoc$", full.names = TRUE)
 6 | 
 7 | # read in the chapters
 8 | book_text <- lapply(chapters, readLines)
 9 | names(book_text) <- basename(chapters)
10 | 
11 | # parse out only the image:: lines
12 | book_images <- lapply(book_text, \(x) x[grepl("^image", x)])
13 | 
14 | # create a list of dfs with image info
15 | images_df <- lapply(names(book_images), \(x){
16 |   o <- book_images[[x]]
17 |   if(length(o) == 0) return(NULL)
18 |   reggy <- "^image::images/(.+?\\.(png|jpg))\\[(.*)\\]"
19 |   filenames <- gsub(reggy, "\\1", o)
20 |   captions <- gsub(reggy, "\\3", o)
21 |   chapter <- substr(x, 1,2)
22 |   the_df <- data.frame(filename = filenames, caption = captions)
23 |   the_df$fig_num <- paste0(chapter, "-", gsub(" ", "0", sprintf("%2d", 1:nrow(the_df))))
24 |   
25 |   the_df
26 | })
27 | 
28 | # turn it into one data.frame and write to csv
29 | all_images <- Reduce(rbind, images_df)
30 | write.csv(all_images, file = "figure-log.csv", row.names = FALSE)
31 | 
32 | # I then imported the CSV into GoogleSheets for review


--------------------------------------------------------------------------------
/code-examples.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/figure-log.csv:
--------------------------------------------------------------------------------
  1 | "filename","caption","fig_num"
  2 | "gcp-pyramid.png","Google Cloud Platform Pyramid Hierarchy on which service you should choose for your applications","01-01"
  3 | "data-architecture-7.png","Data Architecture for the Predictive Audiences use case:  website data is sent to Google Analytics 4 which creates the predictive audience that is then exported to Google Ads","01-02"
  4 | "chapter8_predictive.drawio.png","Data Architecture for the User Segmentation use case","01-03"
  5 | "data-architecture-chapter_09.png","Real-time data is taken from GA4 and a forecast is created to inform employee what content they should prioritise for social media content and on-site banners via Google Optimise","01-04"
  6 | "storage-decision-tree.png","A flowchart decision tree on selecting the correct GCP storage option","02-01"
  7 | "enhanced-measurement-03.png","","03-01"
  8 | "ga4-custom-event-parameters.png","Custom events and their parameters.  As well as the custom parameters you send in, GA4 also collects useful data points such as ga_session_id and page_location","03-02"
  9 | "gtm-setup-ga4-article_read.png","A suggested GTM setting for sending a GA4 custom event: article_read","03-03"
 10 | "gtm-dom-selector-webpage.png","The blog post's publish date is available in the page HTML.  Using your web browser's console you can find the CSS selector to pick up that data for use within Google Tag Manager","03-04"
 11 | "gtm-dom-selector.png","The CSS code for the data can be used within Google Tag Manager's DOM Element Variable for use with GA4 and other tags","03-05"
 12 | "ga4-custom-dimension-config.png","Configuring a custom dimension from the new parameter created with the article_read event","03-06"
 13 | "simo-base-example-gtag-get-api.jpg","Write Client ID And Other GTAG Fields Into dataLayer","03-07"
 14 | "messy-event-category-03.png","The article_read event contains a category parameter that has messy data due to multiple tags being recorded","03-08"
 15 | "creating-r-viewer-03.png","Creating a custom event based off data captured via other events - in this case r_viewer is derived from the article_read event.","03-09"
 16 | "created-events-list-03.png","Several created events based off of the custom category parameter from article_read","03-10"
 17 | "mark-as-npa.png","Marking a use property as NPA to avoid it being used in targeting audiences","03-11"
 18 | "cookieinfo-gtm-permissions.png","Permissions for the template code","03-12"
 19 | "gtm-variable-template-instance.png","Creating a variable from the template","03-13"
 20 | "gtm-consent-user-setup.png","Using the consent variable in a GA4 Event tag","03-14"
 21 | "user_consent_setup.png","Setting up a user consent parameter in GA4","03-15"
 22 | "event_consent_setup.png","Setting up an event consent parameter in GA4","03-16"
 23 | "mp_sequence_diagram.png","","03-17"
 24 | "ga4-bigquery-link.png","An example of a completed linking to BigQuery from the GA4 configuration screen","03-18"
 25 | "ga4-bigquery-sql-example-1-result.png","An example result for running the BigQuery SQL on your GA4 exports from","03-19"
 26 | "gcs-lifecycle-rules.png","Google Cloud Storage Lifecycle rules","03-20"
 27 | "cloud-function-gcs-trigger-1.png","Creating a Cloud Function to trigger from a newly uploaded file in Cloud Storage","03-21"
 28 | "cloud-function-gcs-trigger-2.png","Creating a Cloud Function to trigger from a newly uploaded file in Cloud Storage","03-22"
 29 | "cloud-function-gcs-code-1.png","Adding code for Python Cloud Functions","03-23"
 30 | "gcs-to-bq-logs1.png","Inspecting the Cloud Function logs we can see one file was imported to a specified schema, one used auto-detection","03-24"
 31 | "gcs-to-bq-logs2.png","Inspecting the BigQuery logs to see the schema has been specified as expected","03-25"
 32 | "gcs-to-bq-loaded.png","The BigQuery tables are imported from the CSV on Cloud Storage with the specified schema","03-26"
 33 | "cloud-build-setup-2.png","Linking Cloud Build to the GitHub repository holding your files","03-27"
 34 | "cloud-build-setup-3.png","A Cloud Build trigger that will activate the contents of the cloudbuild.yaml upon each commit to the GitHub repository","03-28"
 35 | "cloud-build-setup-4.png","Setting the Cloud Build permission to deploy Cloud Functions","03-29"
 36 | "cloud-build-setup-1.png","Files in a folder enabled for git","03-30"
 37 | "cloud-build-setup-5.png","A successful deployment of a Cloud Function via Cloud Build","03-31"
 38 | "tidy-1.png","Following three rules makes a dataset tidy: variables are in columns, observations are in rows, and values are in cells. From *R for Data Science* by Hadley Wickham and Garret Grolemund","04-01"
 39 | "bigquery-ga4-exports-logs.png","A Cloud Logging filter for seeing when your GA4 BigQuery exports are ready. We shall use this to create a PubSub topic.","04-02"
 40 | "bigquery-ga4-export-pubsub.png","Setting up your GA4 BigQuery log so it sends the entries to PubSub topic named ga4-bigquery","04-03"
 41 | "cloud-storage-example.png","Files sitting within Cloud Storage in its WebUI","04-04"
 42 | "cloud-storage-metadata.png","Various metadata associated with a file upload to Google Cloud Storage","04-05"
 43 | "bigquery-scheduler.png","Setting up the query into a scheduled query","04-06"
 44 | "airflow_dags.png","An example of an Airflow DAG.","04-07"
 45 | "airflow-example-dag.png","An example of the DAG created in Airflow","04-08"
 46 | "cloud-scheduler-list.png","Some Cloud Schedulers I have enabled for some tasks within my own Google Cloud Project","04-09"
 47 | "cloud-build-log-example.png","A Cloud Build that has successfully built within the Google Cloud Console.","04-10"
 48 | "bigquery-ga4-export-build-trigger.png","Setting up a Build Trigger that will build once the BigQuery export for GA4 is complete","04-11"
 49 | "bigquery-ga4-export-build-permissions.png","Adding to the Cloud Build service account the permissions to execute BigQuery jobs","04-12"
 50 | "pubsub-setting-up-dataflow-config.png","Setting up a Dataflow from within the Google Cloud Console for a PubSub topic into BigQuery via the pre-defined template","04-13"
 51 | "dataflow-running.png","Starting up a running job for importing PubSub messages into BigQuery in real-time","04-14"
 52 | "pubsub-dataflow-bigquery-schema.png","The BigQuery data schema to receive the PubSub Json","04-15"
 53 | "pubsub-dataflow-bigquery-errors.png","Any errors from the data flow will appear in its own BigQuery table so you can examine the payloads","04-16"
 54 | "pubsub-dataflow-bigquery.png","A successful streaming import from GA4 into GTM-SS to PubSub to BigQuery","04-17"
 55 | "pubsub-to-bq-env-args.png","Setting the environment arguments for use within the cloud function","04-18"
 56 | "pubsub-to-bq-parse-json.png","The raw data table receiving the PubSub stream from GA4 via GTM-SS can have its JSON parsed out with BigQuery's functions such as JSON_VALUE()","04-19"
 57 | "bigquery-dataset-table-expiration.png","You can configure the table expiration time when you create a dataset","04-20"
 58 | "ga4-attribution.png","In GA4 you can set attribution settings on how your conversions are attributed to which channel","05-01"
 59 | "ga4-reporting-id.png","Selecting how users can be identified within GA4 reports","05-02"
 60 | "lowest-channel.png","Writing questions in the GA4 search bar will parse itself to try and find the most appropriate GA4 report for you","05-03"
 61 | "insights-ga4.png","Insights looks to flag the most important findings of the day when you log in","05-04"
 62 | "bq-model-cheatsheet.png","This cheat sheet shows what use cases and BigQuery ML model may be most appropriate","05-05"
 63 | "nlp-pipeline.png","An event based pipeline for processing text files on Cloud Storage as they arrive and put the Natural Language API results into a BigQuery table","05-06"
 64 | "vertex-ai-dataset.png","Creating a dataset from BigQuery in Vertex AI","05-07"
 65 | "ga4-demo-audience-list.png","A list of GA4 Audiences taken from the GA4 demo account for Google Merchandise Store","06-01"
 66 | "audience-session-start-2-pvs.png","A configuration for session_start events with 2 further page_view events.","06-02"
 67 | "audience-got-not-open.png","A configuration for users who received a notification but did not open it","06-03"
 68 | "suggested-predictive-audiences.png","If you fulfil the criteria you will see predictive audiences available in your GA4 configuration","06-04"
 69 | "optimise-target-audience.png","Selecting a GA4 audience within Google Optimise","06-05"
 70 | "optimise-banner.png","Setting up a banner for the website that will trigger when the GA4 Audience segment is fulfilled.","06-06"
 71 | "ga4-reports-trend.png","GA4 Standard Reports show you real-time updates and trends for your GA4 event data","06-07"
 72 | "image:images/ga4-user-acquisition.png[Showing how users first arrived to my blog]","image:images/ga4-user-acquisition.png[Showing how users first arrived to my blog]","06-08"
 73 | "ga4-select-goal-event.png","Selecting which of your events or goal conversions that channel contributed to","06-09"
 74 | "ga4-segment-comparisons.png","Comparison of All Users and users who arrived via Google for the count of google analytics category articles","06-10"
 75 | "ga4-merchandise-anomaly.png","An anomaly spike found in the merchandise data within the Google Merchandise Store demo GA$ account","06-11"
 76 | "ga4-customise-collection.png","A custom collection of reports for my Blog","06-12"
 77 | "ga4-explorations.png","The start of your Exploration work flow involves selecting or creating one from the start screen","06-13"
 78 | "ga4-explorations-select-variables.png","Select the variables you think you will need in your exploration","06-14"
 79 | "ga4-explorations-segments.png","Using the Segment overlap technique to see which users are from the US and use mobile devices","06-15"
 80 | "ga4-exploration-fields.png","Selecting the appropriate fields for your exploration report","06-16"
 81 | "ga4-exploration-user-explorer.png","The User Explorer report can drill down on an individual cookie ID","06-17"
 82 | "ga4-pathing.png","Path analysis of which pages were visited after the two_pageviews event was triggered","06-18"
 83 | "ga4-funnels.png","Examining a funnel drop-out journey","06-19"
 84 | "ga4-cohorts.png","How many users who triggered the googleanalytics_viewer event came back to the website the subsequent months","06-20"
 85 | "datastudio-connectors.png","Connecting Data Studio to GA4","06-21"
 86 | "gar-data-studio-connectors.png","Connecting Data Studio via the Google Analytics connector vs the BigQuery table","06-22"
 87 | "ga4-datastudio-databloo.png","A GA4 Data Studio example for GA4 from Databloo","06-23"
 88 | "ga4_looker.jpg","Looker connects to GA4's BigQuery dataset and uses its LookML language to create useful data points","06-24"
 89 | "form_submit_to_pubsub.png","The tag within GTM-SS for forwarding on your events to a HTTP endpoint","06-25"
 90 | "firestore-example.png","Example data within a Firestore instance","06-26"
 91 | "firestore-lookup-gtmss.png","A Firestore Lookup Variable in Google Tag Manager Serverside","06-27"
 92 | "data-architecture-7.png","Data Architecture for the Predictive Audiences use case:  website data is sent to Google Analytics 4 which creates the predictive audience that is then exported to Google Ads","07-01"
 93 | "audience-creation.png","Customising a predictive audience","07-02"
 94 | "predicitve-audeince-config.png","Configuration of an Audience showing likely purchasers in the next seven days","07-03"
 95 | "chapter8_predictive.drawio.png","Data Architecture for the User Segmentation use case","08-01"
 96 | "user_profession_08.png","Configuration of a custom field to hold user profession in GA4","08-02"
 97 | "fake-crm-data-08.png","Fake CRM data  within BigQuery generated to overlap the Google Merchandise Store cookieIds","08-03"
 98 | "sql-example-8-ga4-screenshot.png","The results of a transaction query upon the public GA4 data","08-04"
 99 | "ga4-crm-join-result-08.png","An example showing the result of joining the demo datasets from GA4 and CRM","08-05"
100 | "firestore-crm-import.png","Your CRM data imported into Firestore from BigQuery","08-06"
101 | "ga4-gtm-ss-user-id.png","Setting up a custom event to extract the user_id we will use as the document name to fetch data from Firestore","08-07"
102 | "ga4-gtm-ss-job.png","Configuring a GTM-SS Firestore Lookup variable so call our Firestore collection containing the CRM data, using user_id as its document reference","08-08"
103 | "ga4-gtm-ss-tag.png","Configuring a GA4 GTM-SS event tag with user properties adding the Firestore value","08-09"
104 | "doctors-who-may-purchase.png","Adding a new custom dimension to our Audience definitions, that will combine with the existing Predictive Audience","08-10"
105 | "predicitve-audeince-config.png","Configuration of an Audience showing likely purchasers in the next seven days","08-11"
106 | "data-architecture-chapter_09.png","Real-time data is taken from GA4 and a forecast is created to inform employee what content they should prioritise for social media content and on-site banners via Google Optimise","09-01"
107 | "medical-users-medical-content-audiences.png","Creating an audience we can query in the Real-Time API matching /medical-books to users who are Doctors","09-02"
108 | "medical-users-medical-content-event-trigger.png","When users qualify for the audience, they can set off an event that can be seen within the Real-Time API","09-03"
109 | "cloud-run-container.png","The Container Image URL will be the one you have specified for your local Docker build","09-04"
110 | "r-forecasts.png","Some example output for forecasting GA4 real-time Audience data","09-05"
111 | "r-highcharter.png","Output from the script that turns R forecast objects into `www.highcharts.com` plots","09-06"
112 | "create-service-key-for-ga4.png","Create a service key for use within your app","09-07"
113 | "create-new-json-key.png","Once you have created the service account, download a JSON key for use within your application","09-08"
114 | "adding-service-account-ga4.png","Adding a service email as a user to the GA4 interface for use within your scripts","09-09"
115 | "ga4-realtime-shiny-app.png","A running Shiny app with real-time GA4 data and a forecast","09-10"
116 | 


--------------------------------------------------------------------------------
/gar_email/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gcr.io/gcer-public/googleauthr-verse:latest
2 | RUN install2.r --error \
3 |     -r 'http://cran.rstudio.com' \
4 |     blastula formattable
5 | 


--------------------------------------------------------------------------------