├── ecom_analytics
├── bronze
│ ├── macros
│ │ └── .gitkeep
│ ├── seeds
│ │ ├── .gitkeep
│ │ └── product_category_name_translation.csv
│ ├── tests
│ │ └── .gitkeep
│ ├── analyses
│ │ └── .gitkeep
│ ├── snapshots
│ │ └── .gitkeep
│ ├── models
│ │ ├── olist_order_payments.sql
│ │ ├── olist_orders.sql
│ │ ├── olist_order_items.sql
│ │ └── olist_products.sql
│ ├── README.md
│ ├── dbt_project.yml
│ └── logs
│ │ └── dbt.log.legacy
├── gold
│ ├── analyses
│ │ └── .gitkeep
│ ├── macros
│ │ └── .gitkeep
│ ├── seeds
│ │ └── .gitkeep
│ ├── snapshots
│ │ └── .gitkeep
│ ├── tests
│ │ └── .gitkeep
│ ├── .gitignore
│ ├── models
│ │ └── sales_values_by_category.sql
│ ├── README.md
│ └── dbt_project.yml
├── silver
│ ├── macros
│ │ ├── .gitkeep
│ │ └── classify_abc.sql
│ ├── seeds
│ │ └── .gitkeep
│ ├── tests
│ │ └── .gitkeep
│ ├── analyses
│ │ └── .gitkeep
│ ├── snapshots
│ │ └── .gitkeep
│ ├── packages.yml
│ ├── models
│ │ ├── silver
│ │ │ ├── dim_products.sql
│ │ │ └── fact_sales.sql
│ │ ├── gold
│ │ │ └── sales_values_by_category.sql
│ │ └── orders_sources.yml
│ ├── dbt_project.yml
│ └── logs
│ │ └── dbt.log.legacy
├── .user.yml
├── profiles.yml
└── Makefile
├── trino
├── node.properties
├── catalog
│ ├── de_psql.properties
│ ├── de_mysql.properties
│ └── warehouse.properties
├── config.properties
└── jvm.config
├── requirements.txt
├── Makefile
├── kyuubi
├── spark-defaults.conf
├── hive-site.xml
└── Dockerfile
├── spark
├── spark-defaults.conf
├── Dockerfile
└── hive-site.xml
├── .env
├── hive-metastore
├── entrypoint.sh
├── Dockerfile
└── metastore-site.xml
├── mysql_schemas.sql
├── .gitignore
├── README.md
└── docker-compose.yml
/ecom_analytics/bronze/macros/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/bronze/seeds/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/bronze/tests/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/analyses/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/macros/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/seeds/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/snapshots/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/tests/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/macros/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/seeds/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/tests/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/bronze/analyses/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/bronze/snapshots/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/analyses/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/snapshots/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ecom_analytics/.user.yml:
--------------------------------------------------------------------------------
1 | id: 4e0c8ec6-2dd5-4241-927d-db28b6327d8a
2 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | target/
3 | dbt_packages/
4 | logs/
5 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/models/sales_values_by_category.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM warehouse.silver.sales_values_by_category
--------------------------------------------------------------------------------
/trino/node.properties:
--------------------------------------------------------------------------------
1 | node.environment=docker
2 | node.data-dir=/data/trino
3 | plugin.dir=/usr/lib/trino/plugin
4 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 | - package: dbt-labs/dbt_external_tables
3 | version: 0.8.2
4 | - package: dbt-labs/dbt_utils
5 | version: 0.9.2
--------------------------------------------------------------------------------
/trino/catalog/de_psql.properties:
--------------------------------------------------------------------------------
1 | connector.name=postgresql
2 | connection-url=jdbc:postgresql://de_psql:5432/ecom_analytics
3 | connection-user=admin
4 | connection-password=admin123
--------------------------------------------------------------------------------
/trino/config.properties:
--------------------------------------------------------------------------------
1 | coordinator=true
2 | node-scheduler.include-coordinator=true
3 | http-server.http.port=8080
4 | discovery.uri=http://localhost:8080
5 | discovery-server.enabled=true
6 |
--------------------------------------------------------------------------------
/trino/catalog/de_mysql.properties:
--------------------------------------------------------------------------------
1 | connector.name=mysql
2 | connection-url=jdbc:mysql://de_mysql:3306?allowPublicKeyRetrieval=true&useSSL=false
3 | connection-user=root
4 | connection-password=admin
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.5.2
2 | SQLAlchemy==1.4.45
3 | PyMySQL==1.0.2
4 | minio==7.1.12
5 | PyHive==0.6.5
6 | dbt-core==1.4.0
7 | dbt-spark==1.4.0
8 | dbt-spark[PyHive]
9 | dbt-spark[session]
10 | dbt-trino==1.4.0
11 | dbt-postgres==1.4.0
--------------------------------------------------------------------------------
/ecom_analytics/silver/macros/classify_abc.sql:
--------------------------------------------------------------------------------
1 | {% macro classify_abc(column_name) %}
2 |
3 | CASE
4 | WHEN "{{column_name}}" <= 50 THEN 'A (50%)'
5 | WHEN "{{column_name}}" <= 90 THEN 'B (40%)'
6 | ELSE 'C (10%)'
7 | END
8 |
9 | {% endmacro %}
--------------------------------------------------------------------------------
/ecom_analytics/silver/models/silver/dim_products.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | rp.product_id
3 | , pcnt.product_category_name_english
4 | FROM {{ source('silver', 'olist_products') }} rp
5 | JOIN {{ source('silver', 'product_category_name_translation') }} pcnt
6 | ON rp.product_category_name = pcnt.product_category_name
--------------------------------------------------------------------------------
/trino/catalog/warehouse.properties:
--------------------------------------------------------------------------------
1 | connector.name=delta-lake
2 | hive.metastore.uri=thrift://hive-metastore:9083
3 | hive.s3.endpoint=http://minio:9000
4 | hive.s3.aws-access-key=minio
5 | hive.s3.aws-secret-key=minio123
6 | hive.s3.path-style-access=true
7 | delta.enable-non-concurrent-writes=true
8 | delta.unique-table-location=false
--------------------------------------------------------------------------------
/ecom_analytics/bronze/models/olist_order_payments.sql:
--------------------------------------------------------------------------------
1 | {{
2 | config(
3 | unique_key="order_id"
4 | )
5 | }}
6 |
7 | SELECT
8 | order_id
9 | ,payment_sequential
10 | ,payment_type
11 | ,payment_installments
12 | ,payment_value
13 | FROM de_mysql.brazillian_ecommerce.olist_order_payments_dataset
--------------------------------------------------------------------------------
/trino/jvm.config:
--------------------------------------------------------------------------------
1 | -server
2 | -Xmx1G
3 | -XX:-UseBiasedLocking
4 | -XX:+UseG1GC
5 | -XX:G1HeapRegionSize=32M
6 | -XX:+ExplicitGCInvokesConcurrent
7 | -XX:+HeapDumpOnOutOfMemoryError
8 | -XX:+UseGCOverheadLimit
9 | -XX:+ExitOnOutOfMemoryError
10 | -XX:ReservedCodeCacheSize=256M
11 | -Djdk.attach.allowAttachSelf=true
12 | -Djdk.nio.maxCachedBufferSize=2000000
--------------------------------------------------------------------------------
/ecom_analytics/silver/models/silver/fact_sales.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | ro.order_id
3 | , ro.customer_id
4 | , ro.order_purchase_timestamp
5 | , roi.product_id
6 | , rop.payment_value
7 | , ro.order_status
8 | FROM {{ source("silver", "olist_orders") }} ro
9 | JOIN {{ source("silver", "olist_order_items")}} roi
10 | ON ro.order_id = roi.order_id
11 | JOIN {{ source("silver", "olist_order_payments")}} rop
12 | ON ro.order_id = rop.order_id
--------------------------------------------------------------------------------
/ecom_analytics/bronze/models/olist_orders.sql:
--------------------------------------------------------------------------------
1 | {{
2 | config(
3 | unique_key="order_id"
4 | )
5 | }}
6 |
7 | SELECT
8 | order_id
9 | , customer_id
10 | , order_status
11 | , order_purchase_timestamp
12 | , order_approved_at
13 | , order_delivered_carrier_date
14 | , order_delivered_customer_date
15 | , order_estimated_delivery_date
16 | FROM de_mysql.brazillian_ecommerce.olist_orders_dataset
--------------------------------------------------------------------------------
/ecom_analytics/bronze/models/olist_order_items.sql:
--------------------------------------------------------------------------------
1 | {{
2 | config(
3 | materialized='incremental',
4 | incremental_strategy='delete+insert',
5 | unique_key="order_id"
6 | )
7 | }}
8 |
9 | SELECT
10 | order_id
11 | , order_item_id
12 | , product_id
13 | , seller_id
14 | , shipping_limit_date
15 | , price
16 | , freight_value
17 | FROM de_mysql.brazillian_ecommerce.olist_order_items_dataset
--------------------------------------------------------------------------------
/ecom_analytics/bronze/models/olist_products.sql:
--------------------------------------------------------------------------------
1 | {{
2 | config(
3 | unique_key="product_id"
4 | )
5 | }}
6 |
7 | SELECT
8 | product_id
9 | , product_category_name
10 | , product_name_lenght
11 | , product_description_lenght
12 | , product_photos_qty
13 | , product_weight_g
14 | , product_length_cm
15 | , product_height_cm
16 | , product_width_cm
17 | FROM de_mysql.brazillian_ecommerce.olist_products_dataset
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | include .env
2 |
3 | build:
4 | docker-compose build
5 |
6 | up:
7 | docker-compose --env-file .env up -d
8 |
9 | down:
10 | docker-compose --env-file .env down
11 |
12 | restart:
13 | make down && make up
14 |
15 | to_mysql:
16 | docker exec -it de_mysql mysql --local-infile=1 -u"${MYSQL_USER}" -p"${MYSQL_PASSWORD}" brazillian_ecommerce
17 |
18 | to_mysql_root:
19 | docker exec -it de_mysql mysql -u"root" -p"${MYSQL_ROOT_PASSWORD}"
20 |
21 | to_psql:
22 | docker exec -ti de_psql psql postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}
23 |
24 |
--------------------------------------------------------------------------------
/ecom_analytics/bronze/README.md:
--------------------------------------------------------------------------------
1 | Welcome to your new dbt project!
2 |
3 | ### Using the starter project
4 |
5 | Try running the following commands:
6 | - dbt run
7 | - dbt test
8 |
9 |
10 | ### Resources:
11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
13 | - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
14 | - Find [dbt events](https://events.getdbt.com) near you
15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
16 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/README.md:
--------------------------------------------------------------------------------
1 | Welcome to your new dbt project!
2 |
3 | ### Using the starter project
4 |
5 | Try running the following commands:
6 | - dbt run
7 | - dbt test
8 |
9 |
10 | ### Resources:
11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
13 | - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
14 | - Find [dbt events](https://events.getdbt.com) near you
15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
16 |
--------------------------------------------------------------------------------
/kyuubi/spark-defaults.conf:
--------------------------------------------------------------------------------
1 | spark.jars jars/*
2 | spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension
3 | spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog
4 | spark.hadoop.fs.s3a.endpoint http://minio:9000
5 | spark.hadoop.fs.s3a.access.key minio
6 | spark.hadoop.fs.s3a.secret.key minio123
7 | spark.hadoop.fs.s3a.path.style.access true
8 | spark.hadoop.fs.s3a.connection.ssl.enabled false
9 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem
--------------------------------------------------------------------------------
/spark/spark-defaults.conf:
--------------------------------------------------------------------------------
1 | spark.jars jars/*
2 | spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension
3 | spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog
4 | spark.hadoop.fs.s3a.endpoint http://minio:9000
5 | spark.hadoop.fs.s3a.access.key minio
6 | spark.hadoop.fs.s3a.secret.key minio123
7 | spark.hadoop.fs.s3a.path.style.access true
8 | spark.hadoop.fs.s3a.connection.ssl.enabled false
9 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem
--------------------------------------------------------------------------------
/ecom_analytics/profiles.yml:
--------------------------------------------------------------------------------
1 | trino:
2 | target: dev
3 | outputs:
4 | dev:
5 | type: trino
6 | user: admin
7 | host: localhost
8 | port: 8080
9 | database: warehouse
10 | schema: bronze
11 | threads: 1
12 |
13 | spark:
14 | target: dev
15 | outputs:
16 | dev:
17 | type: spark
18 | method: thrift
19 | host: localhost
20 | port: 10000
21 | schema: silver
22 | connect_retries: 5
23 | connect_timeout: 60
24 | retry_all: true
25 |
26 | gold:
27 | target: dev
28 | outputs:
29 | dev:
30 | type: trino
31 | user: admin
32 | host: localhost
33 | port: 8080
34 | database: de_psql
35 | schema: gold
36 | threads: 1
--------------------------------------------------------------------------------
/ecom_analytics/Makefile:
--------------------------------------------------------------------------------
1 | run_bronze:
2 | dbt run --project-dir ./bronze --profiles-dir ./ --full-refresh
3 |
4 | run_silver:
5 | dbt run --project-dir ./silver --profiles-dir ./ --full-refresh
6 |
7 | run_gold:
8 | dbt run --project-dir ./gold --profiles-dir ./ --full-refresh
9 |
10 | run_all:
11 | make run_bronze && make seed && make run_external && make run_silver && make run_gold
12 |
13 | run_external:
14 | dbt run-operation --project-dir ./silver stage_external_sources
15 |
16 | install_deps:
17 | dbt deps --project-dir ./silver
18 |
19 | seed:
20 | dbt seed --project-dir ./bronze --profiles-dir ./ --full-refresh
21 |
22 | docs:
23 | dbt docs generate --project-dir ./silver --profiles-dir ./ && dbt docs serve --port 8081 --project-dir ./silver --profiles-dir ./
24 |
25 | select:
26 | dbt run --profiles-dir ./silver --select $(script)
27 |
28 | test:
29 | dbt test --profiles-dir ./silver
--------------------------------------------------------------------------------
/spark/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM docker.io/bitnami/spark:3.3
2 |
3 | USER root
4 |
5 | # Install prerequisites
6 | RUN apt-get update && apt-get install -y curl
7 |
8 | RUN curl -O https://repo1.maven.org/maven2/software/amazon/awssdk/s3/2.18.41/s3-2.18.41.jar \
9 | && curl -O https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.12.367/aws-java-sdk-1.12.367.jar \
10 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.2.0/delta-core_2.12-2.2.0.jar \
11 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-storage/2.2.0/delta-storage-2.2.0.jar \
12 | && curl -O https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.19/mysql-connector-java-8.0.19.jar \
13 | && mv s3-2.18.41.jar /opt/bitnami/spark/jars \
14 | && mv aws-java-sdk-1.12.367.jar /opt/bitnami/spark/jars \
15 | && mv delta-core_2.12-2.2.0.jar /opt/bitnami/spark/jars \
16 | && mv delta-storage-2.2.0.jar /opt/bitnami/spark/jars \
17 | && mv mysql-connector-java-8.0.19.jar /opt/bitnami/spark/jars
--------------------------------------------------------------------------------
/ecom_analytics/silver/models/gold/sales_values_by_category.sql:
--------------------------------------------------------------------------------
1 | WITH daily_sales_products AS (
2 | SELECT
3 | CAST(order_purchase_timestamp AS DATE) AS daily
4 | , product_id
5 | , ROUND(SUM(CAST(payment_value AS FLOAT)), 2) AS sales
6 | , COUNT(DISTINCT(order_id)) AS bills
7 | FROM {{ref("fact_sales")}}
8 | WHERE order_status = 'delivered'
9 | GROUP BY
10 | CAST(order_purchase_timestamp AS DATE)
11 | , product_id
12 | ), daily_sales_categories AS (
13 | SELECT
14 | ts.daily
15 | , DATE_FORMAT(ts.daily, 'y-MM') AS monthly
16 | , p.product_category_name_english AS category
17 | , ts.sales
18 | , ts.bills
19 | , (ts.sales / ts.bills) AS values_per_bills
20 | FROM daily_sales_products ts
21 | JOIN {{ref("dim_products")}} p
22 | ON ts.product_id = p.product_id
23 | )
24 | SELECT
25 | monthly
26 | , category
27 | , SUM(sales) AS total_sales
28 | , SUM(bills) AS total_bills
29 | , (SUM(sales) * 1.0 / SUM(bills)) AS values_per_bills
30 | FROM daily_sales_categories
31 | GROUP BY
32 | monthly
33 | , category
--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | # MySQL
2 | MYSQL_HOST=de_mysql
3 | MYSQL_PORT=3306
4 | MYSQL_DATABASE=metastore_db
5 | MYSQL_ROOT_PASSWORD=admin
6 | MYSQL_USER=admin
7 | MYSQL_PASSWORD=admin
8 |
9 | # PostgreSQL
10 | POSTGRES_HOST=de_psql
11 | POSTGRES_PORT=5432
12 | POSTGRES_DB=postgres
13 | POSTGRES_USER=admin
14 | POSTGRES_PASSWORD=admin123
15 | POSTGRES_HOST_AUTH_METHOD=trust
16 |
17 | # MinIO
18 | MINIO_ROOT_USER=minio
19 | MINIO_ROOT_PASSWORD=minio123
20 | MINIO_ACCESS_KEY=minio
21 | MINIO_SECRET_KEY=minio123
22 |
23 | # MinIO credentials
24 | AWS_ACCESS_KEY_ID=minio
25 | AWS_SECRET_ACCESS_KEY=minio123
26 | AWS_ACCESS_KEY=minio
27 | AWS_SECRET_KEY=minio123
28 |
29 | # Spark worker
30 | SPARK_MODE="worker"
31 | SPARK_MASTER_URL="spark://spark-master:7077"
32 | SPARK_WORKER_MEMORY="2G"
33 | SPARK_WORKER_CORES="1"
34 | SPARK_RPC_AUTHENTICATION_ENABLED="no"
35 | SPARK_RPC_ENCRYPTION_ENABLED="no"
36 | SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED="no"
37 | SPARK_SSL_ENABLED="no"
38 |
39 | # Metabase
40 | MB_DB_TYPE=postgres
41 | MB_DB_DBNAME=metabaseappdb
42 | MB_DB_PORT=5432
43 | MB_DB_USER=admin
44 | MB_DB_PASS=admin123
45 | MB_DB_HOST=de_psql
46 |
--------------------------------------------------------------------------------
/hive-metastore/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | export HADOOP_HOME=/opt/hadoop-3.2.0
4 | export HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.375.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-3.2.0.jar
5 | export JAVA_HOME=/usr/local/openjdk-8
6 |
7 | # Make sure mariadb is ready
8 | MAX_TRIES=8
9 | CURRENT_TRY=1
10 | SLEEP_BETWEEN_TRY=4
11 | until [ "$(telnet mariadb 3306 | sed -n 2p)" = "Connected to mariadb." ] || [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; do
12 | echo "Waiting for mariadb server..."
13 | sleep "$SLEEP_BETWEEN_TRY"
14 | CURRENT_TRY=$((CURRENT_TRY + 1))
15 | done
16 |
17 | if [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; then
18 | echo "WARNING: Timeout when waiting for mariadb."
19 | fi
20 |
21 | # Check if schema exists
22 | /opt/apache-hive-metastore-3.0.0-bin/bin/schematool -dbType mysql -info
23 |
24 | if [ $? -eq 1 ]; then
25 | echo "Getting schema info failed. Probably not initialized. Initializing..."
26 | /opt/apache-hive-metastore-3.0.0-bin/bin/schematool -initSchema -dbType mysql
27 | fi
28 |
29 | /opt/apache-hive-metastore-3.0.0-bin/bin/start-metastore
--------------------------------------------------------------------------------
/ecom_analytics/bronze/dbt_project.yml:
--------------------------------------------------------------------------------
1 |
2 | # Name your project! Project names should contain only lowercase characters
3 | # and underscores. A good package name should reflect your organization's
4 | # name or the intended use of these models
5 | name: 'bronze'
6 | version: '1.0.0'
7 | config-version: 2
8 |
9 | # This setting configures which "profile" dbt uses for this project.
10 | profile: 'trino'
11 |
12 | # These configurations specify where dbt should look for different types of files.
13 | # The `model-paths` config, for example, states that models in this project can be
14 | # found in the "models/" directory. You probably won't need to change these!
15 | model-paths: ["models"]
16 | analysis-paths: ["analyses"]
17 | test-paths: ["tests"]
18 | seed-paths: ["seeds"]
19 | macro-paths: ["macros"]
20 | snapshot-paths: ["snapshots"]
21 |
22 | target-path: "target" # directory which will store compiled SQL files
23 | clean-targets: # directories to be removed by `dbt clean`
24 | - "target"
25 | - "dbt_packages"
26 |
27 |
28 | # Configuring models
29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
30 |
31 | # In this example config, we tell dbt to build all +materialized: incremental
32 | models:
33 | bronze:
34 | +materialized: incremental
35 | +incremental_strategy: delete+insert
36 | +file_format: delta
37 | +location_root: s3a://warehouse/bronze
38 |
--------------------------------------------------------------------------------
/spark/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | javax.jdo.option.ConnectionDriverName
4 | com.mysql.cj.jdbc.Driver
5 |
6 |
7 | javax.jdo.option.ConnectionURL
8 | jdbc:mysql://de_mysql:3306/hive_metastore?createDatabaseIfNotExist=true
9 |
10 |
11 | javax.jdo.option.ConnectionUserName
12 | admin
13 |
14 |
15 | javax.jdo.option.ConnectionPassword
16 | admin
17 |
18 |
19 | fs.s3a.access.key
20 | minio
21 |
22 |
23 | fs.s3a.secret.key
24 | minio123
25 |
26 |
27 | fs.s3a.endpoint
28 | http://minio:9000
29 |
30 |
31 | fs.s3a.path.style.access
32 | true
33 |
34 |
35 | fs.s3a.connection.ssl.enabled
36 | false
37 |
38 |
39 | hive.metastore.warehouse.dir
40 | s3a://warehouse
41 |
42 |
--------------------------------------------------------------------------------
/kyuubi/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | javax.jdo.option.ConnectionDriverName
4 | com.mysql.cj.jdbc.Driver
5 |
6 |
7 | javax.jdo.option.ConnectionURL
8 | jdbc:mysql://de_mysql:3306/hive_metastore?createDatabaseIfNotExist=true
9 |
10 |
11 | javax.jdo.option.ConnectionUserName
12 | admin
13 |
14 |
15 | javax.jdo.option.ConnectionPassword
16 | admin
17 |
18 |
19 | fs.s3a.access.key
20 | minio
21 |
22 |
23 | fs.s3a.secret.key
24 | minio123
25 |
26 |
27 | fs.s3a.endpoint
28 | http://minio:9000
29 |
30 |
31 | fs.s3a.path.style.access
32 | true
33 |
34 |
35 | fs.s3a.connection.ssl.enabled
36 | false
37 |
38 |
39 | hive.metastore.warehouse.dir
40 | s3a://warehouse
41 |
42 |
--------------------------------------------------------------------------------
/ecom_analytics/gold/dbt_project.yml:
--------------------------------------------------------------------------------
1 |
2 | # Name your project! Project names should contain only lowercase characters
3 | # and underscores. A good package name should reflect your organization's
4 | # name or the intended use of these models
5 | name: 'gold'
6 | version: '1.0.0'
7 | config-version: 2
8 |
9 | # This setting configures which "profile" dbt uses for this project.
10 | profile: 'gold'
11 |
12 | # These configurations specify where dbt should look for different types of files.
13 | # The `model-paths` config, for example, states that models in this project can be
14 | # found in the "models/" directory. You probably won't need to change these!
15 | model-paths: ["models"]
16 | analysis-paths: ["analyses"]
17 | test-paths: ["tests"]
18 | seed-paths: ["seeds"]
19 | macro-paths: ["macros"]
20 | snapshot-paths: ["snapshots"]
21 |
22 | target-path: "target" # directory which will store compiled SQL files
23 | clean-targets: # directories to be removed by `dbt clean`
24 | - "target"
25 | - "dbt_packages"
26 |
27 |
28 | # Configuring models
29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
30 |
31 | # In this example config, we tell dbt to build all models in the example/
32 | # directory as views. These settings can be overridden in the individual model
33 | # files using the `{{ config(...) }}` macro.
34 | models:
35 | gold:
36 | # Config indicated by + and applies to all files under models/example/
37 | +materialized: table
--------------------------------------------------------------------------------
/hive-metastore/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM openjdk:8u342-jre
2 |
3 | RUN apt-get update \
4 | && apt-get install --assume-yes python3 python3-pip procps \
5 | && apt-get clean
6 |
7 | RUN pip3 install pyspark~=3.3.1 pandas~=1.5.3
8 |
9 | RUN apt-get update \
10 | && apt-get install --assume-yes telnet \
11 | && apt-get clean
12 |
13 | WORKDIR /opt
14 |
15 | ENV HADOOP_VERSION=3.2.0
16 | ENV METASTORE_VERSION=3.0.0
17 |
18 | ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION}
19 | ENV HIVE_HOME=/opt/apache-hive-metastore-${METASTORE_VERSION}-bin
20 |
21 | RUN curl -L https://apache.org/dist/hive/hive-standalone-metastore-${METASTORE_VERSION}/hive-standalone-metastore-${METASTORE_VERSION}-bin.tar.gz | tar zxf - && \
22 | curl -L https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \
23 | curl -L https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.19.tar.gz | tar zxf - && \
24 | cp mysql-connector-java-8.0.19/mysql-connector-java-8.0.19.jar ${HIVE_HOME}/lib/ && \
25 | rm -rf mysql-connector-java-8.0.19
26 |
27 | COPY metastore-site.xml ${HIVE_HOME}/conf
28 | COPY entrypoint.sh /entrypoint.sh
29 |
30 | RUN groupadd -r hive --gid=1000 && \
31 | useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive && \
32 | chown hive:hive -R ${HIVE_HOME} && \
33 | chown hive:hive /entrypoint.sh && chmod +x /entrypoint.sh
34 |
35 | USER hive
36 | EXPOSE 9083
37 |
38 | ENTRYPOINT ["sh", "-c", "/entrypoint.sh"]
--------------------------------------------------------------------------------
/kyuubi/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/kyuubi:master-snapshot
2 |
3 | RUN curl -O https://repo1.maven.org/maven2/software/amazon/awssdk/s3/2.18.41/s3-2.18.41.jar \
4 | && curl -O https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.12.367/aws-java-sdk-1.12.367.jar \
5 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.2.0/delta-core_2.12-2.2.0.jar \
6 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-storage/2.2.0/delta-storage-2.2.0.jar \
7 | && curl -O https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.2/hadoop-aws-3.3.2.jar \
8 | && curl -O https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.1026/aws-java-sdk-bundle-1.11.1026.jar \
9 | && curl -O https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.32/mysql-connector-java-8.0.32.jar \
10 | && mv s3-2.18.41.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \
11 | && mv aws-java-sdk-1.12.367.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \
12 | && mv delta-core_2.12-2.2.0.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \
13 | && mv delta-storage-2.2.0.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \
14 | && mv hadoop-aws-3.3.2.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \
15 | && mv aws-java-sdk-bundle-1.11.1026.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \
16 | && mv mysql-connector-java-8.0.32.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \
17 | && mkdir -p /opt/kyuubi/work/anonymous/jars \
18 | && cp -r /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/* /opt/kyuubi/work/anonymous/jars \
19 | && cp -r /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/* /opt/kyuubi/jars
--------------------------------------------------------------------------------
/hive-metastore/metastore-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | metastore.thrift.uris
4 | thrift://hive-metastore:9083
5 | Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.
6 |
7 |
8 | metastore.task.threads.always
9 | org.apache.hadoop.hive.metastore.events.EventCleanerTask,org.apache.hadoop.hive.metastore.MaterializationsCacheCleanerTask
10 |
11 |
12 | metastore.expression.proxy
13 | org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy
14 |
15 |
16 | javax.jdo.option.ConnectionDriverName
17 | com.mysql.cj.jdbc.Driver
18 |
19 |
20 | javax.jdo.option.ConnectionURL
21 | jdbc:mysql://de_mysql:3306/metastore_db
22 |
23 |
24 | javax.jdo.option.ConnectionUserName
25 | admin
26 |
27 |
28 | javax.jdo.option.ConnectionPassword
29 | admin
30 |
31 |
32 | fs.s3a.access.key
33 | minio
34 |
35 |
36 | fs.s3a.secret.key
37 | minio123
38 |
39 |
40 | fs.s3a.endpoint
41 | http://minio:9000
42 |
43 |
44 | fs.s3a.path.style.access
45 | true
46 |
47 |
48 | fs.s3a.connection.ssl.enabled
49 | false
50 |
51 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/dbt_project.yml:
--------------------------------------------------------------------------------
1 |
2 | # Name your project! Project names should contain only lowercase characters
3 | # and underscores. A good package name should reflect your organization's
4 | # name or the intended use of these models
5 | name: 'silver'
6 | version: '1.0.0'
7 | config-version: 2
8 |
9 | # This setting configures which "profile" dbt uses for this project.
10 | profile: 'spark'
11 |
12 | # These configurations specify where dbt should look for different types of files.
13 | # The `model-paths` config, for example, states that models in this project can be
14 | # found in the "models/" directory. You probably won't need to change these!
15 | model-paths: ["models"]
16 | analysis-paths: ["analyses"]
17 | test-paths: ["tests"]
18 | seed-paths: ["seeds"]
19 | macro-paths: ["macros"]
20 | snapshot-paths: ["snapshots"]
21 |
22 | target-path: "target" # directory which will store compiled SQL files
23 | clean-targets: # directories to be removed by `dbt clean`
24 | - "target"
25 | - "dbt_packages"
26 |
27 |
28 | # Configuring models
29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
30 |
31 | # In this example config, we tell dbt to build all models in the example/ directory
32 | # as tables. These settings can be overridden in the individual model files
33 | # using the `{{ config(...) }}` macro.
34 | models:
35 | silver:
36 | +materialized: incremental
37 | +incremental_strategy: merge
38 | +file_format: delta
39 | +pre_hook:
40 | - SET spark.hadoop.fs.s3a.endpoint=http://minio:9000
41 | - SET spark.hadoop.fs.s3a.access.key=minio
42 | - SET spark.hadoop.fs.s3a.secret.key=minio123
43 | - SET spark.hadoop.fs.s3a.path.style.access=true
44 | - SET spark.hadoop.fs.s3a.connection.ssl.enabled=false
45 | - SET spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
46 | silver:
47 | +location_root: s3a://warehouse/silver
48 | gold:
49 | +location_root: s3a://warehouse/gold
--------------------------------------------------------------------------------
/mysql_schemas.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS product_category_name_translation;
2 | CREATE TABLE product_category_name_translation (
3 | product_category_name varchar(64),
4 | product_category_name_english varchar(64),
5 | PRIMARY KEY (product_category_name)
6 | );
7 |
8 | DROP TABLE IF EXISTS olist_products_dataset;
9 | CREATE TABLE olist_products_dataset (
10 | product_id varchar(32),
11 | product_category_name varchar(64),
12 | product_name_lenght int4,
13 | product_description_lenght int4,
14 | product_photos_qty int4,
15 | product_weight_g int4,
16 | product_length_cm int4,
17 | product_height_cm int4,
18 | product_width_cm int4,
19 | PRIMARY KEY (product_id)
20 | );
21 |
22 | DROP TABLE IF EXISTS olist_orders_dataset;
23 | CREATE TABLE olist_orders_dataset (
24 | order_id varchar(32),
25 | customer_id varchar(32),
26 | order_status varchar(16),
27 | order_purchase_timestamp varchar(32),
28 | order_approved_at varchar(32),
29 | order_delivered_carrier_date varchar(32),
30 | order_delivered_customer_date varchar(32),
31 | order_estimated_delivery_date varchar(32),
32 | PRIMARY KEY(order_id)
33 | );
34 |
35 |
36 | DROP TABLE IF EXISTS olist_order_items_dataset;
37 | CREATE TABLE olist_order_items_dataset (
38 | order_id varchar(32),
39 | order_item_id int4,
40 | product_id varchar(32),
41 | seller_id varchar(32),
42 | shipping_limit_date varchar(32),
43 | price float4,
44 | freight_value float4,
45 | created_at TIMESTAMP DEFAULT NOW(),
46 | updated_at TIMESTAMP DEFAULT NOW(),
47 | PRIMARY KEY (order_id, order_item_id, product_id, seller_id),
48 | FOREIGN KEY (order_id) REFERENCES olist_orders_dataset(order_id),
49 | FOREIGN KEY (product_id) REFERENCES olist_products_dataset(product_id)
50 | );
51 |
52 | DROP TABLE IF EXISTS olist_order_payments_dataset;
53 | CREATE TABLE olist_order_payments_dataset (
54 | order_id varchar(32),
55 | payment_sequential int4,
56 | payment_type varchar(16),
57 | payment_installments int4,
58 | payment_value float4,
59 | PRIMARY KEY (order_id, payment_sequential)
60 | );
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .venv
106 | env/
107 | venv/
108 | ENV/
109 | env.bak/
110 | venv.bak/
111 |
112 | # Spyder project settings
113 | .spyderproject
114 | .spyproject
115 |
116 | # Rope project settings
117 | .ropeproject
118 |
119 | # mkdocs documentation
120 | /site
121 |
122 | # mypy
123 | .mypy_cache/
124 | .dmypy.json
125 | dmypy.json
126 |
127 | # Pyre type checker
128 | .pyre/
129 |
130 | mysql/
131 | brazilian-ecommerce/
132 | minio/
133 | mariadb/
134 | dbt_packages/
135 | .idea/
136 | psql/
--------------------------------------------------------------------------------
/ecom_analytics/bronze/seeds/product_category_name_translation.csv:
--------------------------------------------------------------------------------
1 | product_category_name,product_category_name_english
2 | beleza_saude,health_beauty
3 | informatica_acessorios,computers_accessories
4 | automotivo,auto
5 | cama_mesa_banho,bed_bath_table
6 | moveis_decoracao,furniture_decor
7 | esporte_lazer,sports_leisure
8 | perfumaria,perfumery
9 | utilidades_domesticas,housewares
10 | telefonia,telephony
11 | relogios_presentes,watches_gifts
12 | alimentos_bebidas,food_drink
13 | bebes,baby
14 | papelaria,stationery
15 | tablets_impressao_imagem,tablets_printing_image
16 | brinquedos,toys
17 | telefonia_fixa,fixed_telephony
18 | ferramentas_jardim,garden_tools
19 | fashion_bolsas_e_acessorios,fashion_bags_accessories
20 | eletroportateis,small_appliances
21 | consoles_games,consoles_games
22 | audio,audio
23 | fashion_calcados,fashion_shoes
24 | cool_stuff,cool_stuff
25 | malas_acessorios,luggage_accessories
26 | climatizacao,air_conditioning
27 | construcao_ferramentas_construcao,construction_tools_construction
28 | moveis_cozinha_area_de_servico_jantar_e_jardim,kitchen_dining_laundry_garden_furniture
29 | construcao_ferramentas_jardim,costruction_tools_garden
30 | fashion_roupa_masculina,fashion_male_clothing
31 | pet_shop,pet_shop
32 | moveis_escritorio,office_furniture
33 | market_place,market_place
34 | eletronicos,electronics
35 | eletrodomesticos,home_appliances
36 | artigos_de_festas,party_supplies
37 | casa_conforto,home_confort
38 | construcao_ferramentas_ferramentas,costruction_tools_tools
39 | agro_industria_e_comercio,agro_industry_and_commerce
40 | moveis_colchao_e_estofado,furniture_mattress_and_upholstery
41 | livros_tecnicos,books_technical
42 | casa_construcao,home_construction
43 | instrumentos_musicais,musical_instruments
44 | moveis_sala,furniture_living_room
45 | construcao_ferramentas_iluminacao,construction_tools_lights
46 | industria_comercio_e_negocios,industry_commerce_and_business
47 | alimentos,food
48 | artes,art
49 | moveis_quarto,furniture_bedroom
50 | livros_interesse_geral,books_general_interest
51 | construcao_ferramentas_seguranca,construction_tools_safety
52 | fashion_underwear_e_moda_praia,fashion_underwear_beach
53 | fashion_esporte,fashion_sport
54 | sinalizacao_e_seguranca,signaling_and_security
55 | pcs,computers
56 | artigos_de_natal,christmas_supplies
57 | fashion_roupa_feminina,fashio_female_clothing
58 | eletrodomesticos_2,home_appliances_2
59 | livros_importados,books_imported
60 | bebidas,drinks
61 | cine_foto,cine_photo
62 | la_cuisine,la_cuisine
63 | musica,music
64 | casa_conforto_2,home_comfort_2
65 | portateis_casa_forno_e_cafe,small_appliances_home_oven_and_coffee
66 | cds_dvds_musicais,cds_dvds_musicals
67 | dvds_blu_ray,dvds_blu_ray
68 | flores,flowers
69 | artes_e_artesanato,arts_and_craftmanship
70 | fraldas_higiene,diapers_and_hygiene
71 | fashion_roupa_infanto_juvenil,fashion_childrens_clothes
72 | seguros_e_servicos,security_and_services
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # de03-apache-kyuuby
2 |
3 | ## Prepare infrastructure
4 | ```bash
5 | make build
6 | make up
7 | ```
8 |
9 | ## Prepare MySQL data
10 |
11 | ```sql
12 | # copy CSV data to mysql container
13 | # cd path/to/brazilian-ecommerce/
14 | docker cp brazilian-ecommerce/ de_mysql:/tmp/
15 | docker cp mysql_schemas.sql de_mysql:/tmp/
16 |
17 | # login to mysql server as root
18 | make to_mysql_root
19 | CREATE DATABASE brazillian_ecommerce;
20 | USE brazillian_ecommerce;
21 | GRANT ALL PRIVILEGES ON *.* TO admin;
22 | SHOW GLOBAL VARIABLES LIKE 'LOCAL_INFILE';
23 | SET GLOBAL LOCAL_INFILE=TRUE;
24 | # exit
25 |
26 | # run commands
27 | make to_mysql
28 |
29 | source /tmp/mysql_schemas.sql;
30 | show tables;
31 |
32 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_order_items_dataset.csv' INTO TABLE olist_order_items_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;
33 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_order_payments_dataset.csv' INTO TABLE olist_order_payments_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;
34 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_orders_dataset.csv' INTO TABLE olist_orders_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;
35 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_products_dataset.csv' INTO TABLE olist_products_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;
36 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/product_category_name_translation.csv' INTO TABLE product_category_name_translation FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;
37 |
38 | SELECT * FROM olist_order_items_dataset LIMIT 10;
39 | SELECT * FROM olist_order_payments_dataset LIMIT 10;
40 | SELECT * FROM olist_orders_dataset LIMIT 10;
41 | SELECT * FROM olist_products_dataset LIMIT 10;
42 | SELECT * FROM product_category_name_translation LIMIT 10;
43 | ```
44 |
45 |
46 | # Prepare data PostgreSQL
47 | ```bash
48 | make to_psql
49 |
50 | create database metabaseappdb;
51 | create database ecom_analytics;
52 | ```
53 |
54 | # Prepare delta-table on warehouse Data lake
55 | ```sql
56 | SHOW catalogs;
57 |
58 | SHOW SCHEMAS FROM warehouse;
59 |
60 | CREATE SCHEMA IF NOT EXISTS warehouse.bronze WITH (location='s3a://warehouse/bronze');
61 | DROP table if EXISTS warehouse.bronze.mytable;
62 | CREATE TABLE warehouse.bronze.mytable (name varchar, id integer);
63 | INSERT INTO warehouse.bronze.mytable VALUES ( 'John', 1), ('Jane', 2);
64 | SELECT * FROM warehouse.bronze.mytable;
65 |
66 | CREATE SCHEMA IF NOT EXISTS warehouse.silver WITH (location='s3a://warehouse/silver');
67 |
68 | -- https://docs.getdbt.com/reference/resource-properties/external
69 | -- https://github.com/dbt-labs/dbt-external-tables
70 | dbt run-operation stage_external_sources --vars "ext_full_refresh: true"
71 | ```
72 |
73 | # Run DBT
74 | ```bash
75 | cd ecom_analytics
76 | make run_bronze
77 |
78 | make run_external
79 | make run_silver
80 | make run_gold
81 | ```
--------------------------------------------------------------------------------
/ecom_analytics/silver/models/orders_sources.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sources:
4 | - name: silver
5 | tables:
6 | - name: olist_products
7 | external:
8 | location: 's3a://warehouse/bronze/olist_products'
9 | using: delta
10 | columns:
11 | - name: product_id
12 | data_type: string
13 | - name: product_category_name
14 | data_type: string
15 | - name: product_name_lenght
16 | data_type: int
17 | - name: product_description_lenght
18 | data_type: int
19 | - name: product_photos_qty
20 | data_type: int
21 | - name: product_weight_g
22 | data_type: int
23 | - name: product_length_cm
24 | data_type: int
25 | - name: product_height_cm
26 | data_type: int
27 | - name: product_width_cm
28 | data_type: int
29 | - name: product_category_name_translation
30 | external:
31 | location: 's3a://warehouse/bronze/product_category_name_translation'
32 | using: delta
33 | columns:
34 | - name: product_category_name
35 | data_type: string
36 | - name: product_category_name_english
37 | data_type: string
38 | - name: olist_order_items
39 | external:
40 | location: 's3a://warehouse/bronze/olist_order_items'
41 | using: delta
42 | columns:
43 | - name: order_id
44 | data_type: string
45 | - name: order_item_id
46 | data_type: int
47 | - name: product_id
48 | data_type: string
49 | - name: seller_id
50 | data_type: string
51 | - name: shipping_limit_date
52 | data_type: string
53 | - name: price
54 | data_type: float
55 | - name: freight_value
56 | data_type: float
57 | - name: olist_order_payments
58 | external:
59 | location: 's3a://warehouse/bronze/olist_order_payments'
60 | using: delta
61 | columns:
62 | - name: order_id
63 | data_type: string
64 | - name: payment_sequential
65 | data_type: int
66 | - name: payment_type
67 | data_type: string
68 | - name: payment_installments
69 | data_type: int
70 | - name: payment_value
71 | data_type: float
72 | - name: olist_orders
73 | external:
74 | location: 's3a://warehouse/bronze/olist_orders'
75 | using: delta
76 | columns:
77 | - name: order_id
78 | data_type: string
79 | - name: customer_id
80 | data_type: string
81 | - name: order_status
82 | data_type: string
83 | - name: order_purchase_timestamp
84 | data_type: string
85 | - name: order_approved_at
86 | data_type: string
87 | - name: order_delivered_carrier_date
88 | data_type: string
89 | - name: order_delivered_customer_date
90 | data_type: string
91 | - name: order_estimated_delivery_date
92 | data_type: string
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.9"
2 |
3 | services:
4 |
5 | de_mysql:
6 | image: "mariadb"
7 | container_name: de_mysql
8 | volumes:
9 | - ./mysql:/var/lib/mysql
10 | ports:
11 | - "3306:3306"
12 | env_file:
13 | - .env
14 | networks:
15 | - data_network
16 |
17 | de_psql:
18 | image: postgres:15
19 | container_name: de_psql
20 | volumes:
21 | - ./psql:/var/lib/postgresql/data
22 | ports:
23 | - "5432:5432"
24 | env_file:
25 | - .env
26 | networks:
27 | - data_network
28 |
29 | minio:
30 | hostname: minio
31 | image: "minio/minio"
32 | container_name: minio
33 | ports:
34 | - "9001:9001"
35 | - "9000:9000"
36 | command: [ "server", "/data", "--console-address", ":9001" ]
37 | volumes:
38 | - ./minio/data:/data
39 | env_file:
40 | - .env
41 | networks:
42 | - data_network
43 |
44 | hive-metastore:
45 | container_name: hive-metastore
46 | hostname: hive-metastore
47 | image: "bitsondatadev/hive-metastore"
48 | ports:
49 | - "9083:9083"
50 | volumes:
51 | - ./hive-metastore/metastore-site.xml:/opt/apache-hive-metastore-3.0.0-bin/conf/metastore-site.xml:ro
52 | environment:
53 | METASTORE_DB_HOSTNAME: de_mysql
54 | networks:
55 | - data_network
56 | depends_on:
57 | - de_mysql
58 | - minio
59 |
60 | trino:
61 | container_name: trino
62 | image: "trinodb/trino"
63 | hostname: trino
64 | ports:
65 | - "8080:8080"
66 | volumes:
67 | - ./trino:/etc/trino
68 | networks:
69 | - data_network
70 |
71 | spark-master:
72 | build:
73 | context: ./spark
74 | dockerfile: ./Dockerfile
75 | container_name: "spark-master"
76 | environment:
77 | - SPARK_MODE=master
78 | - SPARK_LOCAL_IP=spark-master
79 | ports:
80 | - "7077:7077"
81 | volumes:
82 | - ./spark/spark-defaults.conf:/opt/bitnami/spark/conf/spark-defaults.conf
83 | networks:
84 | - data_network
85 |
86 | spark-worker-1:
87 | image: docker.io/bitnami/spark:3.3
88 | container_name: "spark-worker-1"
89 | env_file:
90 | - .env
91 | depends_on:
92 | - spark-master
93 | networks:
94 | - data_network
95 |
96 | spark-worker-2:
97 | image: docker.io/bitnami/spark:3.3
98 | container_name: "spark-worker-2"
99 | env_file:
100 | - .env
101 | depends_on:
102 | - spark-master
103 | networks:
104 | - data_network
105 |
106 | spark-thrift-server:
107 | build:
108 | context: ./spark
109 | dockerfile: ./Dockerfile
110 | container_name: "spark-thrift-server"
111 | depends_on:
112 | - spark-master
113 | - hive-metastore
114 | ports:
115 | - "4040:4040"
116 | - "10000:10000"
117 | command: sh -c "
118 | sleep 10 && ./sbin/start-thriftserver.sh --driver-java-options '-Dhive.metastore.uris=thrift://hive-metastore:9083' --master spark://spark-master:7077"
119 | volumes:
120 | - ./spark/spark-defaults.conf:/opt/bitnami/spark/conf/spark-defaults.conf
121 | networks:
122 | - data_network
123 |
124 | metabase:
125 | image: metabase/metabase:latest
126 | container_name: "metabase"
127 | ports:
128 | - "3000:3000"
129 | env_file:
130 | - .env
131 | networks:
132 | - data_network
133 |
134 | networks:
135 | data_network:
136 | driver: bridge
137 | name: data_network
138 |
--------------------------------------------------------------------------------
/ecom_analytics/silver/logs/dbt.log.legacy:
--------------------------------------------------------------------------------
1 | 2023-02-09 04:11:50.426752 (MainThread): invalid escape sequence '\w'
2 | 2023-02-09 04:11:50.632445 (MainThread): invalid escape sequence '\w'
3 | 2023-02-09 04:12:29.510111 (MainThread): invalid escape sequence '\w'
4 | 2023-02-09 04:12:29.751858 (MainThread): invalid escape sequence '\w'
5 | 2023-02-09 04:12:31.001846 (Thread-1 (worker)): unclosed
6 | 2023-02-09 04:12:33.042784 (Thread-1 (worker)): invalid escape sequence '\w'
7 | 2023-02-09 04:15:11.770701 (MainThread): invalid escape sequence '\w'
8 | 2023-02-09 04:15:11.997010 (MainThread): invalid escape sequence '\w'
9 | 2023-02-09 04:15:34.804687 (MainThread): invalid escape sequence '\w'
10 | 2023-02-09 04:15:35.029489 (MainThread): invalid escape sequence '\w'
11 | 2023-02-09 04:15:36.753810 (Thread-1 (worker)): unclosed
12 | 2023-02-09 04:15:36.754093 (Thread-1 (worker)): unclosed
13 | 2023-02-09 04:21:41.374560 (Thread-1 (worker)): invalid escape sequence '\w'
14 | 2023-02-09 04:21:41.388781 (Thread-1 (worker)): unclosed
15 | 2023-02-09 04:22:02.743843 (Thread-1 (worker)): invalid escape sequence '\w'
16 | 2023-02-09 04:22:02.746365 (Thread-1 (worker)): unclosed
17 | 2023-02-09 04:22:02.746598 (Thread-1 (worker)): unclosed
18 | 2023-02-09 04:26:33.091270 (ThreadPoolExecutor-1_0): unclosed
19 | 2023-02-09 04:26:33.263159 (MainThread): unclosed
20 | 2023-02-09 04:28:41.952780 (MainThread): invalid escape sequence '\w'
21 | 2023-02-09 04:28:42.197963 (MainThread): invalid escape sequence '\w'
22 | 2023-02-09 04:28:43.850074 (Thread-1 (worker)): unclosed
23 | 2023-02-09 04:28:43.850652 (Thread-1 (worker)): unclosed
24 | 2023-02-09 04:30:07.876905 (Thread-1 (worker)): unclosed
25 | 2023-02-09 04:30:07.877236 (Thread-1 (worker)): unclosed
26 | 2023-02-09 04:31:01.156498 (Thread-1 (worker)): unclosed
27 | 2023-02-09 04:31:01.156962 (Thread-1 (worker)): unclosed
28 | 2023-02-09 04:31:32.449736 (Thread-1 (worker)): unclosed
29 | 2023-02-09 04:31:58.671147 (Thread-1 (worker)): unclosed
30 | 2023-02-09 04:31:58.671521 (Thread-1 (worker)): unclosed
31 | 2023-02-09 04:32:32.555201 (MainThread): invalid escape sequence '\w'
32 | 2023-02-09 04:32:32.798852 (MainThread): invalid escape sequence '\w'
33 | 2023-02-09 04:32:34.182634 (Thread-1 (worker)): unclosed
34 | 2023-02-09 04:32:34.265385 (MainThread): unclosed
35 | 2023-02-09 04:32:34.265849 (MainThread): unclosed
36 | 2023-02-09 04:34:10.533182 (MainThread): invalid escape sequence '\w'
37 | 2023-02-09 04:34:10.765162 (MainThread): invalid escape sequence '\w'
38 | 2023-02-09 04:34:12.161120 (Thread-1 (worker)): unclosed
39 | 2023-02-09 04:34:12.239270 (MainThread): unclosed
40 | 2023-02-09 04:34:12.243146 (MainThread): unclosed
41 | 2023-02-09 04:35:10.406425 (MainThread): invalid escape sequence '\w'
42 | 2023-02-09 04:35:10.622482 (MainThread): invalid escape sequence '\w'
43 | 2023-02-09 04:35:11.177069 (Thread-1 (worker)): unclosed
44 | 2023-02-09 04:35:11.177342 (Thread-1 (worker)): unclosed
45 | 2023-02-09 04:35:32.697283 (MainThread): invalid escape sequence '\w'
46 | 2023-02-09 04:35:32.931919 (MainThread): invalid escape sequence '\w'
47 | 2023-02-09 04:35:34.360454 (Thread-1 (worker)): unclosed
48 | 2023-02-09 04:35:34.478453 (MainThread): unclosed
49 | 2023-02-09 04:35:34.478835 (MainThread): unclosed
50 | 2023-02-09 04:36:12.713202 (MainThread): invalid escape sequence '\w'
51 | 2023-02-09 04:36:12.958863 (MainThread): invalid escape sequence '\w'
52 | 2023-02-09 04:36:14.277502 (Thread-1 (worker)): unclosed
53 | 2023-02-09 04:36:14.360654 (MainThread): unclosed
54 | 2023-02-09 04:36:14.361041 (MainThread): unclosed
55 | 2023-02-09 04:37:37.493767 (MainThread): unclosed
56 | 2023-02-09 04:37:37.494107 (MainThread): unclosed
57 | 2023-02-09 04:39:04.499048 (MainThread): unclosed
58 | 2023-02-09 04:39:04.499364 (MainThread): unclosed
59 | 2023-02-09 04:40:47.384557 (MainThread): invalid escape sequence '\w'
60 | 2023-02-09 04:40:47.631436 (MainThread): invalid escape sequence '\w'
61 | 2023-02-09 04:40:48.905204 (Thread-1 (worker)): unclosed
62 | 2023-02-09 04:40:49.000245 (MainThread): unclosed
63 | 2023-02-09 04:40:49.000648 (MainThread): unclosed
64 | 2023-02-09 04:41:05.794757 (ThreadPoolExecutor-1_0): unclosed
65 | 2023-02-09 04:41:06.026634 (MainThread): unclosed
66 | 2023-02-09 04:41:28.790231 (MainThread): unclosed
67 | 2023-02-09 04:41:28.792624 (MainThread): unclosed
68 | 2023-02-09 04:41:28.792928 (MainThread): unclosed
69 | 2023-02-09 04:41:49.806380 (MainThread): invalid escape sequence '\w'
70 | 2023-02-09 04:41:50.034561 (MainThread): invalid escape sequence '\w'
71 | 2023-02-09 04:41:50.529549 (MainThread): unclosed
72 | 2023-02-09 04:42:11.726956 (MainThread): unclosed
73 | 2023-02-09 04:42:11.728639 (MainThread): unclosed
74 | 2023-02-09 04:42:11.728857 (MainThread): unclosed
75 | 2023-02-09 04:43:21.140498 (Thread-1 (worker)): unclosed
76 | 2023-02-09 04:43:27.878836 (MainThread): unclosed
77 | 2023-02-09 04:43:27.880264 (MainThread): unclosed
78 | 2023-02-09 04:43:27.880902 (MainThread): unclosed
79 | 2023-02-09 04:45:16.057697 (ThreadPoolExecutor-1_0): unclosed
80 | 2023-02-09 04:45:16.987011 (MainThread): unclosed
81 | 2023-02-09 04:45:16.987296 (MainThread): unclosed
82 | 2023-02-09 04:45:45.079604 (ThreadPoolExecutor-1_0): unclosed
83 | 2023-02-09 04:45:46.132482 (MainThread): unclosed
84 | 2023-02-09 04:45:46.132755 (MainThread): unclosed
85 | 2023-02-09 04:46:21.160324 (Thread-1 (worker)): unclosed
86 | 2023-02-09 04:47:45.571542 (ThreadPoolExecutor-1_0): unclosed
87 | 2023-02-09 04:47:46.547846 (MainThread): unclosed
88 | 2023-02-09 04:47:46.548134 (MainThread): unclosed
89 | 2023-02-09 04:48:15.573110 (MainThread): unclosed
90 | 2023-02-09 04:48:15.574495 (MainThread): unclosed
91 | 2023-02-09 04:48:15.574728 (MainThread): unclosed
92 | 2023-02-09 04:48:40.042155 (Thread-1 (worker)): unclosed
93 | 2023-02-09 04:48:40.043222 (Thread-1 (worker)): unclosed
94 | 2023-02-09 04:49:56.210585 (MainThread): invalid escape sequence '\w'
95 | 2023-02-09 04:49:56.441901 (MainThread): invalid escape sequence '\w'
96 | 2023-02-09 04:49:56.927294 (MainThread): unclosed
97 | 2023-02-09 04:52:40.664401 (MainThread): invalid escape sequence '\w'
98 | 2023-02-09 04:52:40.862365 (MainThread): invalid escape sequence '\w'
99 | 2023-02-09 04:56:43.512783 (Thread-1 (worker)): invalid escape sequence '\w'
100 | 2023-02-09 05:02:29.383644 (MainThread): invalid escape sequence '\w'
101 | 2023-02-09 05:02:29.580927 (MainThread): invalid escape sequence '\w'
102 | 2023-02-09 14:45:45.873565 (MainThread): unclosed
103 |
--------------------------------------------------------------------------------
/ecom_analytics/bronze/logs/dbt.log.legacy:
--------------------------------------------------------------------------------
1 | 2023-02-09 04:11:50.426752 (MainThread): invalid escape sequence '\w'
2 | 2023-02-09 04:11:50.632445 (MainThread): invalid escape sequence '\w'
3 | 2023-02-09 04:12:29.510111 (MainThread): invalid escape sequence '\w'
4 | 2023-02-09 04:12:29.751858 (MainThread): invalid escape sequence '\w'
5 | 2023-02-09 04:12:31.001846 (Thread-1 (worker)): unclosed
6 | 2023-02-09 04:12:33.042784 (Thread-1 (worker)): invalid escape sequence '\w'
7 | 2023-02-09 04:15:11.770701 (MainThread): invalid escape sequence '\w'
8 | 2023-02-09 04:15:11.997010 (MainThread): invalid escape sequence '\w'
9 | 2023-02-09 04:15:34.804687 (MainThread): invalid escape sequence '\w'
10 | 2023-02-09 04:15:35.029489 (MainThread): invalid escape sequence '\w'
11 | 2023-02-09 04:15:36.753810 (Thread-1 (worker)): unclosed
12 | 2023-02-09 04:15:36.754093 (Thread-1 (worker)): unclosed
13 | 2023-02-09 04:21:41.374560 (Thread-1 (worker)): invalid escape sequence '\w'
14 | 2023-02-09 04:21:41.388781 (Thread-1 (worker)): unclosed
15 | 2023-02-09 04:22:02.743843 (Thread-1 (worker)): invalid escape sequence '\w'
16 | 2023-02-09 04:22:02.746365 (Thread-1 (worker)): unclosed
17 | 2023-02-09 04:22:02.746598 (Thread-1 (worker)): unclosed
18 | 2023-02-09 04:26:33.091270 (ThreadPoolExecutor-1_0): unclosed
19 | 2023-02-09 04:26:33.263159 (MainThread): unclosed
20 | 2023-02-09 04:28:41.952780 (MainThread): invalid escape sequence '\w'
21 | 2023-02-09 04:28:42.197963 (MainThread): invalid escape sequence '\w'
22 | 2023-02-09 04:28:43.850074 (Thread-1 (worker)): unclosed
23 | 2023-02-09 04:28:43.850652 (Thread-1 (worker)): unclosed
24 | 2023-02-09 04:30:07.876905 (Thread-1 (worker)): unclosed
25 | 2023-02-09 04:30:07.877236 (Thread-1 (worker)): unclosed
26 | 2023-02-09 04:31:01.156498 (Thread-1 (worker)): unclosed
27 | 2023-02-09 04:31:01.156962 (Thread-1 (worker)): unclosed
28 | 2023-02-09 04:31:32.449736 (Thread-1 (worker)): unclosed
29 | 2023-02-09 04:31:58.671147 (Thread-1 (worker)): unclosed
30 | 2023-02-09 04:31:58.671521 (Thread-1 (worker)): unclosed
31 | 2023-02-09 04:32:32.555201 (MainThread): invalid escape sequence '\w'
32 | 2023-02-09 04:32:32.798852 (MainThread): invalid escape sequence '\w'
33 | 2023-02-09 04:32:34.182634 (Thread-1 (worker)): unclosed
34 | 2023-02-09 04:32:34.265385 (MainThread): unclosed
35 | 2023-02-09 04:32:34.265849 (MainThread): unclosed
36 | 2023-02-09 04:34:10.533182 (MainThread): invalid escape sequence '\w'
37 | 2023-02-09 04:34:10.765162 (MainThread): invalid escape sequence '\w'
38 | 2023-02-09 04:34:12.161120 (Thread-1 (worker)): unclosed
39 | 2023-02-09 04:34:12.239270 (MainThread): unclosed
40 | 2023-02-09 04:34:12.243146 (MainThread): unclosed
41 | 2023-02-09 04:35:10.406425 (MainThread): invalid escape sequence '\w'
42 | 2023-02-09 04:35:10.622482 (MainThread): invalid escape sequence '\w'
43 | 2023-02-09 04:35:11.177069 (Thread-1 (worker)): unclosed
44 | 2023-02-09 04:35:11.177342 (Thread-1 (worker)): unclosed
45 | 2023-02-09 04:35:32.697283 (MainThread): invalid escape sequence '\w'
46 | 2023-02-09 04:35:32.931919 (MainThread): invalid escape sequence '\w'
47 | 2023-02-09 04:35:34.360454 (Thread-1 (worker)): unclosed
48 | 2023-02-09 04:35:34.478453 (MainThread): unclosed
49 | 2023-02-09 04:35:34.478835 (MainThread): unclosed
50 | 2023-02-09 04:36:12.713202 (MainThread): invalid escape sequence '\w'
51 | 2023-02-09 04:36:12.958863 (MainThread): invalid escape sequence '\w'
52 | 2023-02-09 04:36:14.277502 (Thread-1 (worker)): unclosed
53 | 2023-02-09 04:36:14.360654 (MainThread): unclosed
54 | 2023-02-09 04:36:14.361041 (MainThread): unclosed
55 | 2023-02-09 04:37:37.493767 (MainThread): unclosed
56 | 2023-02-09 04:37:37.494107 (MainThread): unclosed
57 | 2023-02-09 04:39:04.499048 (MainThread): unclosed
58 | 2023-02-09 04:39:04.499364 (MainThread): unclosed
59 | 2023-02-09 04:40:47.384557 (MainThread): invalid escape sequence '\w'
60 | 2023-02-09 04:40:47.631436 (MainThread): invalid escape sequence '\w'
61 | 2023-02-09 04:40:48.905204 (Thread-1 (worker)): unclosed
62 | 2023-02-09 04:40:49.000245 (MainThread): unclosed
63 | 2023-02-09 04:40:49.000648 (MainThread): unclosed
64 | 2023-02-09 04:41:05.794757 (ThreadPoolExecutor-1_0): unclosed
65 | 2023-02-09 04:41:06.026634 (MainThread): unclosed
66 | 2023-02-09 04:41:28.790231 (MainThread): unclosed
67 | 2023-02-09 04:41:28.792624 (MainThread): unclosed
68 | 2023-02-09 04:41:28.792928 (MainThread): unclosed
69 | 2023-02-09 04:41:49.806380 (MainThread): invalid escape sequence '\w'
70 | 2023-02-09 04:41:50.034561 (MainThread): invalid escape sequence '\w'
71 | 2023-02-09 04:41:50.529549 (MainThread): unclosed
72 | 2023-02-09 04:42:11.726956 (MainThread): unclosed
73 | 2023-02-09 04:42:11.728639 (MainThread): unclosed
74 | 2023-02-09 04:42:11.728857 (MainThread): unclosed
75 | 2023-02-09 04:43:21.140498 (Thread-1 (worker)): unclosed
76 | 2023-02-09 04:43:27.878836 (MainThread): unclosed
77 | 2023-02-09 04:43:27.880264 (MainThread): unclosed
78 | 2023-02-09 04:43:27.880902 (MainThread): unclosed
79 | 2023-02-09 04:45:16.057697 (ThreadPoolExecutor-1_0): unclosed
80 | 2023-02-09 04:45:16.987011 (MainThread): unclosed
81 | 2023-02-09 04:45:16.987296 (MainThread): unclosed
82 | 2023-02-09 04:45:45.079604 (ThreadPoolExecutor-1_0): unclosed
83 | 2023-02-09 04:45:46.132482 (MainThread): unclosed
84 | 2023-02-09 04:45:46.132755 (MainThread): unclosed
85 | 2023-02-09 04:46:21.160324 (Thread-1 (worker)): unclosed
86 | 2023-02-09 04:47:45.571542 (ThreadPoolExecutor-1_0): unclosed
87 | 2023-02-09 04:47:46.547846 (MainThread): unclosed
88 | 2023-02-09 04:47:46.548134 (MainThread): unclosed
89 | 2023-02-09 04:48:15.573110 (MainThread): unclosed
90 | 2023-02-09 04:48:15.574495 (MainThread): unclosed
91 | 2023-02-09 04:48:15.574728 (MainThread): unclosed
92 | 2023-02-09 04:48:40.042155 (Thread-1 (worker)): unclosed
93 | 2023-02-09 04:48:40.043222 (Thread-1 (worker)): unclosed
94 | 2023-02-09 04:49:56.210585 (MainThread): invalid escape sequence '\w'
95 | 2023-02-09 04:49:56.441901 (MainThread): invalid escape sequence '\w'
96 | 2023-02-09 04:49:56.927294 (MainThread): unclosed
97 | 2023-02-09 04:52:40.664401 (MainThread): invalid escape sequence '\w'
98 | 2023-02-09 04:52:40.862365 (MainThread): invalid escape sequence '\w'
99 | 2023-02-09 04:56:43.512783 (Thread-1 (worker)): invalid escape sequence '\w'
100 | 2023-02-09 05:02:29.383644 (MainThread): invalid escape sequence '\w'
101 | 2023-02-09 05:02:29.580927 (MainThread): invalid escape sequence '\w'
102 | 2023-02-09 05:14:53.616841 (MainThread): invalid escape sequence '\w'
103 | 2023-02-09 05:14:53.816135 (MainThread): invalid escape sequence '\w'
104 | 2023-02-09 05:25:31.427854 (MainThread): invalid escape sequence '\w'
105 | 2023-02-09 05:25:31.667175 (MainThread): invalid escape sequence '\w'
106 | 2023-02-09 05:26:39.328648 (Thread-1 (worker)): invalid escape sequence '\w'
107 | 2023-02-09 05:51:12.099730 (MainThread): invalid escape sequence '\w'
108 | 2023-02-09 05:51:12.299166 (MainThread): invalid escape sequence '\w'
109 | 2023-02-09 05:51:16.458965 (Thread-1 (worker)): invalid escape sequence '\w'
110 | 2023-02-11 09:19:53.250102 (MainThread): invalid escape sequence '\w'
111 | 2023-02-11 09:19:53.480457 (MainThread): invalid escape sequence '\w'
112 | 2023-02-11 09:43:59.833071 (MainThread): invalid escape sequence '\w'
113 | 2023-02-11 09:44:00.031005 (MainThread): invalid escape sequence '\w'
114 | 2023-02-11 09:46:31.520687 (MainThread): invalid escape sequence '\w'
115 | 2023-02-11 09:46:31.728998 (MainThread): invalid escape sequence '\w'
116 | 2023-02-11 09:57:40.082554 (MainThread): invalid escape sequence '\w'
117 | 2023-02-11 09:57:40.279248 (MainThread): invalid escape sequence '\w'
118 | 2023-02-11 09:59:40.255824 (MainThread): invalid escape sequence '\w'
119 | 2023-02-11 09:59:40.469556 (MainThread): invalid escape sequence '\w'
120 | 2023-02-11 10:00:45.492872 (MainThread): invalid escape sequence '\w'
121 | 2023-02-11 10:00:45.691330 (MainThread): invalid escape sequence '\w'
122 | 2023-02-11 10:12:39.303108 (Thread-1 (worker)): invalid escape sequence '\w'
123 | 2023-02-11 11:09:49.188054 (MainThread): invalid escape sequence '\w'
124 | 2023-02-11 11:09:49.380699 (MainThread): invalid escape sequence '\w'
125 | 2023-02-11 11:19:11.002262 (Thread-1 (worker)): invalid escape sequence '\w'
126 | 2023-02-20 04:41:15.337495 (MainThread): invalid escape sequence '\w'
127 | 2023-02-20 04:41:15.562145 (MainThread): invalid escape sequence '\w'
128 | 2023-02-20 04:50:00.449422 (Thread-1 (worker)): invalid escape sequence '\w'
129 |
--------------------------------------------------------------------------------