├── ecom_analytics ├── bronze │ ├── macros │ │ └── .gitkeep │ ├── seeds │ │ ├── .gitkeep │ │ └── product_category_name_translation.csv │ ├── tests │ │ └── .gitkeep │ ├── analyses │ │ └── .gitkeep │ ├── snapshots │ │ └── .gitkeep │ ├── models │ │ ├── olist_order_payments.sql │ │ ├── olist_orders.sql │ │ ├── olist_order_items.sql │ │ └── olist_products.sql │ ├── README.md │ ├── dbt_project.yml │ └── logs │ │ └── dbt.log.legacy ├── gold │ ├── analyses │ │ └── .gitkeep │ ├── macros │ │ └── .gitkeep │ ├── seeds │ │ └── .gitkeep │ ├── snapshots │ │ └── .gitkeep │ ├── tests │ │ └── .gitkeep │ ├── .gitignore │ ├── models │ │ └── sales_values_by_category.sql │ ├── README.md │ └── dbt_project.yml ├── silver │ ├── macros │ │ ├── .gitkeep │ │ └── classify_abc.sql │ ├── seeds │ │ └── .gitkeep │ ├── tests │ │ └── .gitkeep │ ├── analyses │ │ └── .gitkeep │ ├── snapshots │ │ └── .gitkeep │ ├── packages.yml │ ├── models │ │ ├── silver │ │ │ ├── dim_products.sql │ │ │ └── fact_sales.sql │ │ ├── gold │ │ │ └── sales_values_by_category.sql │ │ └── orders_sources.yml │ ├── dbt_project.yml │ └── logs │ │ └── dbt.log.legacy ├── .user.yml ├── profiles.yml └── Makefile ├── trino ├── node.properties ├── catalog │ ├── de_psql.properties │ ├── de_mysql.properties │ └── warehouse.properties ├── config.properties └── jvm.config ├── requirements.txt ├── Makefile ├── kyuubi ├── spark-defaults.conf ├── hive-site.xml └── Dockerfile ├── spark ├── spark-defaults.conf ├── Dockerfile └── hive-site.xml ├── .env ├── hive-metastore ├── entrypoint.sh ├── Dockerfile └── metastore-site.xml ├── mysql_schemas.sql ├── .gitignore ├── README.md └── docker-compose.yml /ecom_analytics/bronze/macros/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/bronze/seeds/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/bronze/tests/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/gold/analyses/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/gold/macros/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/gold/seeds/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/gold/snapshots/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/gold/tests/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/silver/macros/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/silver/seeds/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/silver/tests/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/bronze/analyses/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/bronze/snapshots/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/silver/analyses/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/silver/snapshots/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ecom_analytics/.user.yml: -------------------------------------------------------------------------------- 1 | id: 4e0c8ec6-2dd5-4241-927d-db28b6327d8a 2 | -------------------------------------------------------------------------------- /ecom_analytics/gold/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /ecom_analytics/gold/models/sales_values_by_category.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM warehouse.silver.sales_values_by_category -------------------------------------------------------------------------------- /trino/node.properties: -------------------------------------------------------------------------------- 1 | node.environment=docker 2 | node.data-dir=/data/trino 3 | plugin.dir=/usr/lib/trino/plugin 4 | -------------------------------------------------------------------------------- /ecom_analytics/silver/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_external_tables 3 | version: 0.8.2 4 | - package: dbt-labs/dbt_utils 5 | version: 0.9.2 -------------------------------------------------------------------------------- /trino/catalog/de_psql.properties: -------------------------------------------------------------------------------- 1 | connector.name=postgresql 2 | connection-url=jdbc:postgresql://de_psql:5432/ecom_analytics 3 | connection-user=admin 4 | connection-password=admin123 -------------------------------------------------------------------------------- /trino/config.properties: -------------------------------------------------------------------------------- 1 | coordinator=true 2 | node-scheduler.include-coordinator=true 3 | http-server.http.port=8080 4 | discovery.uri=http://localhost:8080 5 | discovery-server.enabled=true 6 | -------------------------------------------------------------------------------- /trino/catalog/de_mysql.properties: -------------------------------------------------------------------------------- 1 | connector.name=mysql 2 | connection-url=jdbc:mysql://de_mysql:3306?allowPublicKeyRetrieval=true&useSSL=false 3 | connection-user=root 4 | connection-password=admin -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.5.2 2 | SQLAlchemy==1.4.45 3 | PyMySQL==1.0.2 4 | minio==7.1.12 5 | PyHive==0.6.5 6 | dbt-core==1.4.0 7 | dbt-spark==1.4.0 8 | dbt-spark[PyHive] 9 | dbt-spark[session] 10 | dbt-trino==1.4.0 11 | dbt-postgres==1.4.0 -------------------------------------------------------------------------------- /ecom_analytics/silver/macros/classify_abc.sql: -------------------------------------------------------------------------------- 1 | {% macro classify_abc(column_name) %} 2 | 3 | CASE 4 | WHEN "{{column_name}}" <= 50 THEN 'A (50%)' 5 | WHEN "{{column_name}}" <= 90 THEN 'B (40%)' 6 | ELSE 'C (10%)' 7 | END 8 | 9 | {% endmacro %} -------------------------------------------------------------------------------- /ecom_analytics/silver/models/silver/dim_products.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | rp.product_id 3 | , pcnt.product_category_name_english 4 | FROM {{ source('silver', 'olist_products') }} rp 5 | JOIN {{ source('silver', 'product_category_name_translation') }} pcnt 6 | ON rp.product_category_name = pcnt.product_category_name -------------------------------------------------------------------------------- /trino/catalog/warehouse.properties: -------------------------------------------------------------------------------- 1 | connector.name=delta-lake 2 | hive.metastore.uri=thrift://hive-metastore:9083 3 | hive.s3.endpoint=http://minio:9000 4 | hive.s3.aws-access-key=minio 5 | hive.s3.aws-secret-key=minio123 6 | hive.s3.path-style-access=true 7 | delta.enable-non-concurrent-writes=true 8 | delta.unique-table-location=false -------------------------------------------------------------------------------- /ecom_analytics/bronze/models/olist_order_payments.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key="order_id" 4 | ) 5 | }} 6 | 7 | SELECT 8 | order_id 9 | ,payment_sequential 10 | ,payment_type 11 | ,payment_installments 12 | ,payment_value 13 | FROM de_mysql.brazillian_ecommerce.olist_order_payments_dataset -------------------------------------------------------------------------------- /trino/jvm.config: -------------------------------------------------------------------------------- 1 | -server 2 | -Xmx1G 3 | -XX:-UseBiasedLocking 4 | -XX:+UseG1GC 5 | -XX:G1HeapRegionSize=32M 6 | -XX:+ExplicitGCInvokesConcurrent 7 | -XX:+HeapDumpOnOutOfMemoryError 8 | -XX:+UseGCOverheadLimit 9 | -XX:+ExitOnOutOfMemoryError 10 | -XX:ReservedCodeCacheSize=256M 11 | -Djdk.attach.allowAttachSelf=true 12 | -Djdk.nio.maxCachedBufferSize=2000000 -------------------------------------------------------------------------------- /ecom_analytics/silver/models/silver/fact_sales.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ro.order_id 3 | , ro.customer_id 4 | , ro.order_purchase_timestamp 5 | , roi.product_id 6 | , rop.payment_value 7 | , ro.order_status 8 | FROM {{ source("silver", "olist_orders") }} ro 9 | JOIN {{ source("silver", "olist_order_items")}} roi 10 | ON ro.order_id = roi.order_id 11 | JOIN {{ source("silver", "olist_order_payments")}} rop 12 | ON ro.order_id = rop.order_id -------------------------------------------------------------------------------- /ecom_analytics/bronze/models/olist_orders.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key="order_id" 4 | ) 5 | }} 6 | 7 | SELECT 8 | order_id 9 | , customer_id 10 | , order_status 11 | , order_purchase_timestamp 12 | , order_approved_at 13 | , order_delivered_carrier_date 14 | , order_delivered_customer_date 15 | , order_estimated_delivery_date 16 | FROM de_mysql.brazillian_ecommerce.olist_orders_dataset -------------------------------------------------------------------------------- /ecom_analytics/bronze/models/olist_order_items.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='incremental', 4 | incremental_strategy='delete+insert', 5 | unique_key="order_id" 6 | ) 7 | }} 8 | 9 | SELECT 10 | order_id 11 | , order_item_id 12 | , product_id 13 | , seller_id 14 | , shipping_limit_date 15 | , price 16 | , freight_value 17 | FROM de_mysql.brazillian_ecommerce.olist_order_items_dataset -------------------------------------------------------------------------------- /ecom_analytics/bronze/models/olist_products.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | unique_key="product_id" 4 | ) 5 | }} 6 | 7 | SELECT 8 | product_id 9 | , product_category_name 10 | , product_name_lenght 11 | , product_description_lenght 12 | , product_photos_qty 13 | , product_weight_g 14 | , product_length_cm 15 | , product_height_cm 16 | , product_width_cm 17 | FROM de_mysql.brazillian_ecommerce.olist_products_dataset -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include .env 2 | 3 | build: 4 | docker-compose build 5 | 6 | up: 7 | docker-compose --env-file .env up -d 8 | 9 | down: 10 | docker-compose --env-file .env down 11 | 12 | restart: 13 | make down && make up 14 | 15 | to_mysql: 16 | docker exec -it de_mysql mysql --local-infile=1 -u"${MYSQL_USER}" -p"${MYSQL_PASSWORD}" brazillian_ecommerce 17 | 18 | to_mysql_root: 19 | docker exec -it de_mysql mysql -u"root" -p"${MYSQL_ROOT_PASSWORD}" 20 | 21 | to_psql: 22 | docker exec -ti de_psql psql postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB} 23 | 24 | -------------------------------------------------------------------------------- /ecom_analytics/bronze/README.md: -------------------------------------------------------------------------------- 1 | Welcome to your new dbt project! 2 | 3 | ### Using the starter project 4 | 5 | Try running the following commands: 6 | - dbt run 7 | - dbt test 8 | 9 | 10 | ### Resources: 11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) 12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers 13 | - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support 14 | - Find [dbt events](https://events.getdbt.com) near you 15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices 16 | -------------------------------------------------------------------------------- /ecom_analytics/gold/README.md: -------------------------------------------------------------------------------- 1 | Welcome to your new dbt project! 2 | 3 | ### Using the starter project 4 | 5 | Try running the following commands: 6 | - dbt run 7 | - dbt test 8 | 9 | 10 | ### Resources: 11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) 12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers 13 | - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support 14 | - Find [dbt events](https://events.getdbt.com) near you 15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices 16 | -------------------------------------------------------------------------------- /kyuubi/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.jars jars/* 2 | spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension 3 | spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog 4 | spark.hadoop.fs.s3a.endpoint http://minio:9000 5 | spark.hadoop.fs.s3a.access.key minio 6 | spark.hadoop.fs.s3a.secret.key minio123 7 | spark.hadoop.fs.s3a.path.style.access true 8 | spark.hadoop.fs.s3a.connection.ssl.enabled false 9 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem -------------------------------------------------------------------------------- /spark/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.jars jars/* 2 | spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension 3 | spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog 4 | spark.hadoop.fs.s3a.endpoint http://minio:9000 5 | spark.hadoop.fs.s3a.access.key minio 6 | spark.hadoop.fs.s3a.secret.key minio123 7 | spark.hadoop.fs.s3a.path.style.access true 8 | spark.hadoop.fs.s3a.connection.ssl.enabled false 9 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem -------------------------------------------------------------------------------- /ecom_analytics/profiles.yml: -------------------------------------------------------------------------------- 1 | trino: 2 | target: dev 3 | outputs: 4 | dev: 5 | type: trino 6 | user: admin 7 | host: localhost 8 | port: 8080 9 | database: warehouse 10 | schema: bronze 11 | threads: 1 12 | 13 | spark: 14 | target: dev 15 | outputs: 16 | dev: 17 | type: spark 18 | method: thrift 19 | host: localhost 20 | port: 10000 21 | schema: silver 22 | connect_retries: 5 23 | connect_timeout: 60 24 | retry_all: true 25 | 26 | gold: 27 | target: dev 28 | outputs: 29 | dev: 30 | type: trino 31 | user: admin 32 | host: localhost 33 | port: 8080 34 | database: de_psql 35 | schema: gold 36 | threads: 1 -------------------------------------------------------------------------------- /ecom_analytics/Makefile: -------------------------------------------------------------------------------- 1 | run_bronze: 2 | dbt run --project-dir ./bronze --profiles-dir ./ --full-refresh 3 | 4 | run_silver: 5 | dbt run --project-dir ./silver --profiles-dir ./ --full-refresh 6 | 7 | run_gold: 8 | dbt run --project-dir ./gold --profiles-dir ./ --full-refresh 9 | 10 | run_all: 11 | make run_bronze && make seed && make run_external && make run_silver && make run_gold 12 | 13 | run_external: 14 | dbt run-operation --project-dir ./silver stage_external_sources 15 | 16 | install_deps: 17 | dbt deps --project-dir ./silver 18 | 19 | seed: 20 | dbt seed --project-dir ./bronze --profiles-dir ./ --full-refresh 21 | 22 | docs: 23 | dbt docs generate --project-dir ./silver --profiles-dir ./ && dbt docs serve --port 8081 --project-dir ./silver --profiles-dir ./ 24 | 25 | select: 26 | dbt run --profiles-dir ./silver --select $(script) 27 | 28 | test: 29 | dbt test --profiles-dir ./silver -------------------------------------------------------------------------------- /spark/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/bitnami/spark:3.3 2 | 3 | USER root 4 | 5 | # Install prerequisites 6 | RUN apt-get update && apt-get install -y curl 7 | 8 | RUN curl -O https://repo1.maven.org/maven2/software/amazon/awssdk/s3/2.18.41/s3-2.18.41.jar \ 9 | && curl -O https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.12.367/aws-java-sdk-1.12.367.jar \ 10 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.2.0/delta-core_2.12-2.2.0.jar \ 11 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-storage/2.2.0/delta-storage-2.2.0.jar \ 12 | && curl -O https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.19/mysql-connector-java-8.0.19.jar \ 13 | && mv s3-2.18.41.jar /opt/bitnami/spark/jars \ 14 | && mv aws-java-sdk-1.12.367.jar /opt/bitnami/spark/jars \ 15 | && mv delta-core_2.12-2.2.0.jar /opt/bitnami/spark/jars \ 16 | && mv delta-storage-2.2.0.jar /opt/bitnami/spark/jars \ 17 | && mv mysql-connector-java-8.0.19.jar /opt/bitnami/spark/jars -------------------------------------------------------------------------------- /ecom_analytics/silver/models/gold/sales_values_by_category.sql: -------------------------------------------------------------------------------- 1 | WITH daily_sales_products AS ( 2 | SELECT 3 | CAST(order_purchase_timestamp AS DATE) AS daily 4 | , product_id 5 | , ROUND(SUM(CAST(payment_value AS FLOAT)), 2) AS sales 6 | , COUNT(DISTINCT(order_id)) AS bills 7 | FROM {{ref("fact_sales")}} 8 | WHERE order_status = 'delivered' 9 | GROUP BY 10 | CAST(order_purchase_timestamp AS DATE) 11 | , product_id 12 | ), daily_sales_categories AS ( 13 | SELECT 14 | ts.daily 15 | , DATE_FORMAT(ts.daily, 'y-MM') AS monthly 16 | , p.product_category_name_english AS category 17 | , ts.sales 18 | , ts.bills 19 | , (ts.sales / ts.bills) AS values_per_bills 20 | FROM daily_sales_products ts 21 | JOIN {{ref("dim_products")}} p 22 | ON ts.product_id = p.product_id 23 | ) 24 | SELECT 25 | monthly 26 | , category 27 | , SUM(sales) AS total_sales 28 | , SUM(bills) AS total_bills 29 | , (SUM(sales) * 1.0 / SUM(bills)) AS values_per_bills 30 | FROM daily_sales_categories 31 | GROUP BY 32 | monthly 33 | , category -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # MySQL 2 | MYSQL_HOST=de_mysql 3 | MYSQL_PORT=3306 4 | MYSQL_DATABASE=metastore_db 5 | MYSQL_ROOT_PASSWORD=admin 6 | MYSQL_USER=admin 7 | MYSQL_PASSWORD=admin 8 | 9 | # PostgreSQL 10 | POSTGRES_HOST=de_psql 11 | POSTGRES_PORT=5432 12 | POSTGRES_DB=postgres 13 | POSTGRES_USER=admin 14 | POSTGRES_PASSWORD=admin123 15 | POSTGRES_HOST_AUTH_METHOD=trust 16 | 17 | # MinIO 18 | MINIO_ROOT_USER=minio 19 | MINIO_ROOT_PASSWORD=minio123 20 | MINIO_ACCESS_KEY=minio 21 | MINIO_SECRET_KEY=minio123 22 | 23 | # MinIO credentials 24 | AWS_ACCESS_KEY_ID=minio 25 | AWS_SECRET_ACCESS_KEY=minio123 26 | AWS_ACCESS_KEY=minio 27 | AWS_SECRET_KEY=minio123 28 | 29 | # Spark worker 30 | SPARK_MODE="worker" 31 | SPARK_MASTER_URL="spark://spark-master:7077" 32 | SPARK_WORKER_MEMORY="2G" 33 | SPARK_WORKER_CORES="1" 34 | SPARK_RPC_AUTHENTICATION_ENABLED="no" 35 | SPARK_RPC_ENCRYPTION_ENABLED="no" 36 | SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED="no" 37 | SPARK_SSL_ENABLED="no" 38 | 39 | # Metabase 40 | MB_DB_TYPE=postgres 41 | MB_DB_DBNAME=metabaseappdb 42 | MB_DB_PORT=5432 43 | MB_DB_USER=admin 44 | MB_DB_PASS=admin123 45 | MB_DB_HOST=de_psql 46 | -------------------------------------------------------------------------------- /hive-metastore/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export HADOOP_HOME=/opt/hadoop-3.2.0 4 | export HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.375.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-3.2.0.jar 5 | export JAVA_HOME=/usr/local/openjdk-8 6 | 7 | # Make sure mariadb is ready 8 | MAX_TRIES=8 9 | CURRENT_TRY=1 10 | SLEEP_BETWEEN_TRY=4 11 | until [ "$(telnet mariadb 3306 | sed -n 2p)" = "Connected to mariadb." ] || [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; do 12 | echo "Waiting for mariadb server..." 13 | sleep "$SLEEP_BETWEEN_TRY" 14 | CURRENT_TRY=$((CURRENT_TRY + 1)) 15 | done 16 | 17 | if [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; then 18 | echo "WARNING: Timeout when waiting for mariadb." 19 | fi 20 | 21 | # Check if schema exists 22 | /opt/apache-hive-metastore-3.0.0-bin/bin/schematool -dbType mysql -info 23 | 24 | if [ $? -eq 1 ]; then 25 | echo "Getting schema info failed. Probably not initialized. Initializing..." 26 | /opt/apache-hive-metastore-3.0.0-bin/bin/schematool -initSchema -dbType mysql 27 | fi 28 | 29 | /opt/apache-hive-metastore-3.0.0-bin/bin/start-metastore -------------------------------------------------------------------------------- /ecom_analytics/bronze/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'bronze' 6 | version: '1.0.0' 7 | config-version: 2 8 | 9 | # This setting configures which "profile" dbt uses for this project. 10 | profile: 'trino' 11 | 12 | # These configurations specify where dbt should look for different types of files. 13 | # The `model-paths` config, for example, states that models in this project can be 14 | # found in the "models/" directory. You probably won't need to change these! 15 | model-paths: ["models"] 16 | analysis-paths: ["analyses"] 17 | test-paths: ["tests"] 18 | seed-paths: ["seeds"] 19 | macro-paths: ["macros"] 20 | snapshot-paths: ["snapshots"] 21 | 22 | target-path: "target" # directory which will store compiled SQL files 23 | clean-targets: # directories to be removed by `dbt clean` 24 | - "target" 25 | - "dbt_packages" 26 | 27 | 28 | # Configuring models 29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 30 | 31 | # In this example config, we tell dbt to build all +materialized: incremental 32 | models: 33 | bronze: 34 | +materialized: incremental 35 | +incremental_strategy: delete+insert 36 | +file_format: delta 37 | +location_root: s3a://warehouse/bronze 38 | -------------------------------------------------------------------------------- /spark/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | javax.jdo.option.ConnectionDriverName 4 | com.mysql.cj.jdbc.Driver 5 | 6 | 7 | javax.jdo.option.ConnectionURL 8 | jdbc:mysql://de_mysql:3306/hive_metastore?createDatabaseIfNotExist=true 9 | 10 | 11 | javax.jdo.option.ConnectionUserName 12 | admin 13 | 14 | 15 | javax.jdo.option.ConnectionPassword 16 | admin 17 | 18 | 19 | fs.s3a.access.key 20 | minio 21 | 22 | 23 | fs.s3a.secret.key 24 | minio123 25 | 26 | 27 | fs.s3a.endpoint 28 | http://minio:9000 29 | 30 | 31 | fs.s3a.path.style.access 32 | true 33 | 34 | 35 | fs.s3a.connection.ssl.enabled 36 | false 37 | 38 | 39 | hive.metastore.warehouse.dir 40 | s3a://warehouse 41 | 42 | -------------------------------------------------------------------------------- /kyuubi/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | javax.jdo.option.ConnectionDriverName 4 | com.mysql.cj.jdbc.Driver 5 | 6 | 7 | javax.jdo.option.ConnectionURL 8 | jdbc:mysql://de_mysql:3306/hive_metastore?createDatabaseIfNotExist=true 9 | 10 | 11 | javax.jdo.option.ConnectionUserName 12 | admin 13 | 14 | 15 | javax.jdo.option.ConnectionPassword 16 | admin 17 | 18 | 19 | fs.s3a.access.key 20 | minio 21 | 22 | 23 | fs.s3a.secret.key 24 | minio123 25 | 26 | 27 | fs.s3a.endpoint 28 | http://minio:9000 29 | 30 | 31 | fs.s3a.path.style.access 32 | true 33 | 34 | 35 | fs.s3a.connection.ssl.enabled 36 | false 37 | 38 | 39 | hive.metastore.warehouse.dir 40 | s3a://warehouse 41 | 42 | -------------------------------------------------------------------------------- /ecom_analytics/gold/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'gold' 6 | version: '1.0.0' 7 | config-version: 2 8 | 9 | # This setting configures which "profile" dbt uses for this project. 10 | profile: 'gold' 11 | 12 | # These configurations specify where dbt should look for different types of files. 13 | # The `model-paths` config, for example, states that models in this project can be 14 | # found in the "models/" directory. You probably won't need to change these! 15 | model-paths: ["models"] 16 | analysis-paths: ["analyses"] 17 | test-paths: ["tests"] 18 | seed-paths: ["seeds"] 19 | macro-paths: ["macros"] 20 | snapshot-paths: ["snapshots"] 21 | 22 | target-path: "target" # directory which will store compiled SQL files 23 | clean-targets: # directories to be removed by `dbt clean` 24 | - "target" 25 | - "dbt_packages" 26 | 27 | 28 | # Configuring models 29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 30 | 31 | # In this example config, we tell dbt to build all models in the example/ 32 | # directory as views. These settings can be overridden in the individual model 33 | # files using the `{{ config(...) }}` macro. 34 | models: 35 | gold: 36 | # Config indicated by + and applies to all files under models/example/ 37 | +materialized: table -------------------------------------------------------------------------------- /hive-metastore/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8u342-jre 2 | 3 | RUN apt-get update \ 4 | && apt-get install --assume-yes python3 python3-pip procps \ 5 | && apt-get clean 6 | 7 | RUN pip3 install pyspark~=3.3.1 pandas~=1.5.3 8 | 9 | RUN apt-get update \ 10 | && apt-get install --assume-yes telnet \ 11 | && apt-get clean 12 | 13 | WORKDIR /opt 14 | 15 | ENV HADOOP_VERSION=3.2.0 16 | ENV METASTORE_VERSION=3.0.0 17 | 18 | ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION} 19 | ENV HIVE_HOME=/opt/apache-hive-metastore-${METASTORE_VERSION}-bin 20 | 21 | RUN curl -L https://apache.org/dist/hive/hive-standalone-metastore-${METASTORE_VERSION}/hive-standalone-metastore-${METASTORE_VERSION}-bin.tar.gz | tar zxf - && \ 22 | curl -L https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \ 23 | curl -L https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.19.tar.gz | tar zxf - && \ 24 | cp mysql-connector-java-8.0.19/mysql-connector-java-8.0.19.jar ${HIVE_HOME}/lib/ && \ 25 | rm -rf mysql-connector-java-8.0.19 26 | 27 | COPY metastore-site.xml ${HIVE_HOME}/conf 28 | COPY entrypoint.sh /entrypoint.sh 29 | 30 | RUN groupadd -r hive --gid=1000 && \ 31 | useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive && \ 32 | chown hive:hive -R ${HIVE_HOME} && \ 33 | chown hive:hive /entrypoint.sh && chmod +x /entrypoint.sh 34 | 35 | USER hive 36 | EXPOSE 9083 37 | 38 | ENTRYPOINT ["sh", "-c", "/entrypoint.sh"] -------------------------------------------------------------------------------- /kyuubi/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/kyuubi:master-snapshot 2 | 3 | RUN curl -O https://repo1.maven.org/maven2/software/amazon/awssdk/s3/2.18.41/s3-2.18.41.jar \ 4 | && curl -O https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.12.367/aws-java-sdk-1.12.367.jar \ 5 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.2.0/delta-core_2.12-2.2.0.jar \ 6 | && curl -O https://repo1.maven.org/maven2/io/delta/delta-storage/2.2.0/delta-storage-2.2.0.jar \ 7 | && curl -O https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.2/hadoop-aws-3.3.2.jar \ 8 | && curl -O https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.1026/aws-java-sdk-bundle-1.11.1026.jar \ 9 | && curl -O https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.32/mysql-connector-java-8.0.32.jar \ 10 | && mv s3-2.18.41.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \ 11 | && mv aws-java-sdk-1.12.367.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \ 12 | && mv delta-core_2.12-2.2.0.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \ 13 | && mv delta-storage-2.2.0.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \ 14 | && mv hadoop-aws-3.3.2.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \ 15 | && mv aws-java-sdk-bundle-1.11.1026.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \ 16 | && mv mysql-connector-java-8.0.32.jar /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/ \ 17 | && mkdir -p /opt/kyuubi/work/anonymous/jars \ 18 | && cp -r /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/* /opt/kyuubi/work/anonymous/jars \ 19 | && cp -r /opt/kyuubi/externals/spark-3.3.1-bin-hadoop3/jars/* /opt/kyuubi/jars -------------------------------------------------------------------------------- /hive-metastore/metastore-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | metastore.thrift.uris 4 | thrift://hive-metastore:9083 5 | Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore. 6 | 7 | 8 | metastore.task.threads.always 9 | org.apache.hadoop.hive.metastore.events.EventCleanerTask,org.apache.hadoop.hive.metastore.MaterializationsCacheCleanerTask 10 | 11 | 12 | metastore.expression.proxy 13 | org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy 14 | 15 | 16 | javax.jdo.option.ConnectionDriverName 17 | com.mysql.cj.jdbc.Driver 18 | 19 | 20 | javax.jdo.option.ConnectionURL 21 | jdbc:mysql://de_mysql:3306/metastore_db 22 | 23 | 24 | javax.jdo.option.ConnectionUserName 25 | admin 26 | 27 | 28 | javax.jdo.option.ConnectionPassword 29 | admin 30 | 31 | 32 | fs.s3a.access.key 33 | minio 34 | 35 | 36 | fs.s3a.secret.key 37 | minio123 38 | 39 | 40 | fs.s3a.endpoint 41 | http://minio:9000 42 | 43 | 44 | fs.s3a.path.style.access 45 | true 46 | 47 | 48 | fs.s3a.connection.ssl.enabled 49 | false 50 | 51 | -------------------------------------------------------------------------------- /ecom_analytics/silver/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'silver' 6 | version: '1.0.0' 7 | config-version: 2 8 | 9 | # This setting configures which "profile" dbt uses for this project. 10 | profile: 'spark' 11 | 12 | # These configurations specify where dbt should look for different types of files. 13 | # The `model-paths` config, for example, states that models in this project can be 14 | # found in the "models/" directory. You probably won't need to change these! 15 | model-paths: ["models"] 16 | analysis-paths: ["analyses"] 17 | test-paths: ["tests"] 18 | seed-paths: ["seeds"] 19 | macro-paths: ["macros"] 20 | snapshot-paths: ["snapshots"] 21 | 22 | target-path: "target" # directory which will store compiled SQL files 23 | clean-targets: # directories to be removed by `dbt clean` 24 | - "target" 25 | - "dbt_packages" 26 | 27 | 28 | # Configuring models 29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 30 | 31 | # In this example config, we tell dbt to build all models in the example/ directory 32 | # as tables. These settings can be overridden in the individual model files 33 | # using the `{{ config(...) }}` macro. 34 | models: 35 | silver: 36 | +materialized: incremental 37 | +incremental_strategy: merge 38 | +file_format: delta 39 | +pre_hook: 40 | - SET spark.hadoop.fs.s3a.endpoint=http://minio:9000 41 | - SET spark.hadoop.fs.s3a.access.key=minio 42 | - SET spark.hadoop.fs.s3a.secret.key=minio123 43 | - SET spark.hadoop.fs.s3a.path.style.access=true 44 | - SET spark.hadoop.fs.s3a.connection.ssl.enabled=false 45 | - SET spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem 46 | silver: 47 | +location_root: s3a://warehouse/silver 48 | gold: 49 | +location_root: s3a://warehouse/gold -------------------------------------------------------------------------------- /mysql_schemas.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS product_category_name_translation; 2 | CREATE TABLE product_category_name_translation ( 3 | product_category_name varchar(64), 4 | product_category_name_english varchar(64), 5 | PRIMARY KEY (product_category_name) 6 | ); 7 | 8 | DROP TABLE IF EXISTS olist_products_dataset; 9 | CREATE TABLE olist_products_dataset ( 10 | product_id varchar(32), 11 | product_category_name varchar(64), 12 | product_name_lenght int4, 13 | product_description_lenght int4, 14 | product_photos_qty int4, 15 | product_weight_g int4, 16 | product_length_cm int4, 17 | product_height_cm int4, 18 | product_width_cm int4, 19 | PRIMARY KEY (product_id) 20 | ); 21 | 22 | DROP TABLE IF EXISTS olist_orders_dataset; 23 | CREATE TABLE olist_orders_dataset ( 24 | order_id varchar(32), 25 | customer_id varchar(32), 26 | order_status varchar(16), 27 | order_purchase_timestamp varchar(32), 28 | order_approved_at varchar(32), 29 | order_delivered_carrier_date varchar(32), 30 | order_delivered_customer_date varchar(32), 31 | order_estimated_delivery_date varchar(32), 32 | PRIMARY KEY(order_id) 33 | ); 34 | 35 | 36 | DROP TABLE IF EXISTS olist_order_items_dataset; 37 | CREATE TABLE olist_order_items_dataset ( 38 | order_id varchar(32), 39 | order_item_id int4, 40 | product_id varchar(32), 41 | seller_id varchar(32), 42 | shipping_limit_date varchar(32), 43 | price float4, 44 | freight_value float4, 45 | created_at TIMESTAMP DEFAULT NOW(), 46 | updated_at TIMESTAMP DEFAULT NOW(), 47 | PRIMARY KEY (order_id, order_item_id, product_id, seller_id), 48 | FOREIGN KEY (order_id) REFERENCES olist_orders_dataset(order_id), 49 | FOREIGN KEY (product_id) REFERENCES olist_products_dataset(product_id) 50 | ); 51 | 52 | DROP TABLE IF EXISTS olist_order_payments_dataset; 53 | CREATE TABLE olist_order_payments_dataset ( 54 | order_id varchar(32), 55 | payment_sequential int4, 56 | payment_type varchar(16), 57 | payment_installments int4, 58 | payment_value float4, 59 | PRIMARY KEY (order_id, payment_sequential) 60 | ); -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | 130 | mysql/ 131 | brazilian-ecommerce/ 132 | minio/ 133 | mariadb/ 134 | dbt_packages/ 135 | .idea/ 136 | psql/ -------------------------------------------------------------------------------- /ecom_analytics/bronze/seeds/product_category_name_translation.csv: -------------------------------------------------------------------------------- 1 | product_category_name,product_category_name_english 2 | beleza_saude,health_beauty 3 | informatica_acessorios,computers_accessories 4 | automotivo,auto 5 | cama_mesa_banho,bed_bath_table 6 | moveis_decoracao,furniture_decor 7 | esporte_lazer,sports_leisure 8 | perfumaria,perfumery 9 | utilidades_domesticas,housewares 10 | telefonia,telephony 11 | relogios_presentes,watches_gifts 12 | alimentos_bebidas,food_drink 13 | bebes,baby 14 | papelaria,stationery 15 | tablets_impressao_imagem,tablets_printing_image 16 | brinquedos,toys 17 | telefonia_fixa,fixed_telephony 18 | ferramentas_jardim,garden_tools 19 | fashion_bolsas_e_acessorios,fashion_bags_accessories 20 | eletroportateis,small_appliances 21 | consoles_games,consoles_games 22 | audio,audio 23 | fashion_calcados,fashion_shoes 24 | cool_stuff,cool_stuff 25 | malas_acessorios,luggage_accessories 26 | climatizacao,air_conditioning 27 | construcao_ferramentas_construcao,construction_tools_construction 28 | moveis_cozinha_area_de_servico_jantar_e_jardim,kitchen_dining_laundry_garden_furniture 29 | construcao_ferramentas_jardim,costruction_tools_garden 30 | fashion_roupa_masculina,fashion_male_clothing 31 | pet_shop,pet_shop 32 | moveis_escritorio,office_furniture 33 | market_place,market_place 34 | eletronicos,electronics 35 | eletrodomesticos,home_appliances 36 | artigos_de_festas,party_supplies 37 | casa_conforto,home_confort 38 | construcao_ferramentas_ferramentas,costruction_tools_tools 39 | agro_industria_e_comercio,agro_industry_and_commerce 40 | moveis_colchao_e_estofado,furniture_mattress_and_upholstery 41 | livros_tecnicos,books_technical 42 | casa_construcao,home_construction 43 | instrumentos_musicais,musical_instruments 44 | moveis_sala,furniture_living_room 45 | construcao_ferramentas_iluminacao,construction_tools_lights 46 | industria_comercio_e_negocios,industry_commerce_and_business 47 | alimentos,food 48 | artes,art 49 | moveis_quarto,furniture_bedroom 50 | livros_interesse_geral,books_general_interest 51 | construcao_ferramentas_seguranca,construction_tools_safety 52 | fashion_underwear_e_moda_praia,fashion_underwear_beach 53 | fashion_esporte,fashion_sport 54 | sinalizacao_e_seguranca,signaling_and_security 55 | pcs,computers 56 | artigos_de_natal,christmas_supplies 57 | fashion_roupa_feminina,fashio_female_clothing 58 | eletrodomesticos_2,home_appliances_2 59 | livros_importados,books_imported 60 | bebidas,drinks 61 | cine_foto,cine_photo 62 | la_cuisine,la_cuisine 63 | musica,music 64 | casa_conforto_2,home_comfort_2 65 | portateis_casa_forno_e_cafe,small_appliances_home_oven_and_coffee 66 | cds_dvds_musicais,cds_dvds_musicals 67 | dvds_blu_ray,dvds_blu_ray 68 | flores,flowers 69 | artes_e_artesanato,arts_and_craftmanship 70 | fraldas_higiene,diapers_and_hygiene 71 | fashion_roupa_infanto_juvenil,fashion_childrens_clothes 72 | seguros_e_servicos,security_and_services -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # de03-apache-kyuuby 2 | 3 | ## Prepare infrastructure 4 | ```bash 5 | make build 6 | make up 7 | ``` 8 | 9 | ## Prepare MySQL data 10 | 11 | ```sql 12 | # copy CSV data to mysql container 13 | # cd path/to/brazilian-ecommerce/ 14 | docker cp brazilian-ecommerce/ de_mysql:/tmp/ 15 | docker cp mysql_schemas.sql de_mysql:/tmp/ 16 | 17 | # login to mysql server as root 18 | make to_mysql_root 19 | CREATE DATABASE brazillian_ecommerce; 20 | USE brazillian_ecommerce; 21 | GRANT ALL PRIVILEGES ON *.* TO admin; 22 | SHOW GLOBAL VARIABLES LIKE 'LOCAL_INFILE'; 23 | SET GLOBAL LOCAL_INFILE=TRUE; 24 | # exit 25 | 26 | # run commands 27 | make to_mysql 28 | 29 | source /tmp/mysql_schemas.sql; 30 | show tables; 31 | 32 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_order_items_dataset.csv' INTO TABLE olist_order_items_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS; 33 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_order_payments_dataset.csv' INTO TABLE olist_order_payments_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS; 34 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_orders_dataset.csv' INTO TABLE olist_orders_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS; 35 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/olist_products_dataset.csv' INTO TABLE olist_products_dataset FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS; 36 | LOAD DATA LOCAL INFILE '/tmp/brazilian-ecommerce/product_category_name_translation.csv' INTO TABLE product_category_name_translation FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS; 37 | 38 | SELECT * FROM olist_order_items_dataset LIMIT 10; 39 | SELECT * FROM olist_order_payments_dataset LIMIT 10; 40 | SELECT * FROM olist_orders_dataset LIMIT 10; 41 | SELECT * FROM olist_products_dataset LIMIT 10; 42 | SELECT * FROM product_category_name_translation LIMIT 10; 43 | ``` 44 | 45 | 46 | # Prepare data PostgreSQL 47 | ```bash 48 | make to_psql 49 | 50 | create database metabaseappdb; 51 | create database ecom_analytics; 52 | ``` 53 | 54 | # Prepare delta-table on warehouse Data lake 55 | ```sql 56 | SHOW catalogs; 57 | 58 | SHOW SCHEMAS FROM warehouse; 59 | 60 | CREATE SCHEMA IF NOT EXISTS warehouse.bronze WITH (location='s3a://warehouse/bronze'); 61 | DROP table if EXISTS warehouse.bronze.mytable; 62 | CREATE TABLE warehouse.bronze.mytable (name varchar, id integer); 63 | INSERT INTO warehouse.bronze.mytable VALUES ( 'John', 1), ('Jane', 2); 64 | SELECT * FROM warehouse.bronze.mytable; 65 | 66 | CREATE SCHEMA IF NOT EXISTS warehouse.silver WITH (location='s3a://warehouse/silver'); 67 | 68 | -- https://docs.getdbt.com/reference/resource-properties/external 69 | -- https://github.com/dbt-labs/dbt-external-tables 70 | dbt run-operation stage_external_sources --vars "ext_full_refresh: true" 71 | ``` 72 | 73 | # Run DBT 74 | ```bash 75 | cd ecom_analytics 76 | make run_bronze 77 | 78 | make run_external 79 | make run_silver 80 | make run_gold 81 | ``` -------------------------------------------------------------------------------- /ecom_analytics/silver/models/orders_sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: silver 5 | tables: 6 | - name: olist_products 7 | external: 8 | location: 's3a://warehouse/bronze/olist_products' 9 | using: delta 10 | columns: 11 | - name: product_id 12 | data_type: string 13 | - name: product_category_name 14 | data_type: string 15 | - name: product_name_lenght 16 | data_type: int 17 | - name: product_description_lenght 18 | data_type: int 19 | - name: product_photos_qty 20 | data_type: int 21 | - name: product_weight_g 22 | data_type: int 23 | - name: product_length_cm 24 | data_type: int 25 | - name: product_height_cm 26 | data_type: int 27 | - name: product_width_cm 28 | data_type: int 29 | - name: product_category_name_translation 30 | external: 31 | location: 's3a://warehouse/bronze/product_category_name_translation' 32 | using: delta 33 | columns: 34 | - name: product_category_name 35 | data_type: string 36 | - name: product_category_name_english 37 | data_type: string 38 | - name: olist_order_items 39 | external: 40 | location: 's3a://warehouse/bronze/olist_order_items' 41 | using: delta 42 | columns: 43 | - name: order_id 44 | data_type: string 45 | - name: order_item_id 46 | data_type: int 47 | - name: product_id 48 | data_type: string 49 | - name: seller_id 50 | data_type: string 51 | - name: shipping_limit_date 52 | data_type: string 53 | - name: price 54 | data_type: float 55 | - name: freight_value 56 | data_type: float 57 | - name: olist_order_payments 58 | external: 59 | location: 's3a://warehouse/bronze/olist_order_payments' 60 | using: delta 61 | columns: 62 | - name: order_id 63 | data_type: string 64 | - name: payment_sequential 65 | data_type: int 66 | - name: payment_type 67 | data_type: string 68 | - name: payment_installments 69 | data_type: int 70 | - name: payment_value 71 | data_type: float 72 | - name: olist_orders 73 | external: 74 | location: 's3a://warehouse/bronze/olist_orders' 75 | using: delta 76 | columns: 77 | - name: order_id 78 | data_type: string 79 | - name: customer_id 80 | data_type: string 81 | - name: order_status 82 | data_type: string 83 | - name: order_purchase_timestamp 84 | data_type: string 85 | - name: order_approved_at 86 | data_type: string 87 | - name: order_delivered_carrier_date 88 | data_type: string 89 | - name: order_delivered_customer_date 90 | data_type: string 91 | - name: order_estimated_delivery_date 92 | data_type: string -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | 5 | de_mysql: 6 | image: "mariadb" 7 | container_name: de_mysql 8 | volumes: 9 | - ./mysql:/var/lib/mysql 10 | ports: 11 | - "3306:3306" 12 | env_file: 13 | - .env 14 | networks: 15 | - data_network 16 | 17 | de_psql: 18 | image: postgres:15 19 | container_name: de_psql 20 | volumes: 21 | - ./psql:/var/lib/postgresql/data 22 | ports: 23 | - "5432:5432" 24 | env_file: 25 | - .env 26 | networks: 27 | - data_network 28 | 29 | minio: 30 | hostname: minio 31 | image: "minio/minio" 32 | container_name: minio 33 | ports: 34 | - "9001:9001" 35 | - "9000:9000" 36 | command: [ "server", "/data", "--console-address", ":9001" ] 37 | volumes: 38 | - ./minio/data:/data 39 | env_file: 40 | - .env 41 | networks: 42 | - data_network 43 | 44 | hive-metastore: 45 | container_name: hive-metastore 46 | hostname: hive-metastore 47 | image: "bitsondatadev/hive-metastore" 48 | ports: 49 | - "9083:9083" 50 | volumes: 51 | - ./hive-metastore/metastore-site.xml:/opt/apache-hive-metastore-3.0.0-bin/conf/metastore-site.xml:ro 52 | environment: 53 | METASTORE_DB_HOSTNAME: de_mysql 54 | networks: 55 | - data_network 56 | depends_on: 57 | - de_mysql 58 | - minio 59 | 60 | trino: 61 | container_name: trino 62 | image: "trinodb/trino" 63 | hostname: trino 64 | ports: 65 | - "8080:8080" 66 | volumes: 67 | - ./trino:/etc/trino 68 | networks: 69 | - data_network 70 | 71 | spark-master: 72 | build: 73 | context: ./spark 74 | dockerfile: ./Dockerfile 75 | container_name: "spark-master" 76 | environment: 77 | - SPARK_MODE=master 78 | - SPARK_LOCAL_IP=spark-master 79 | ports: 80 | - "7077:7077" 81 | volumes: 82 | - ./spark/spark-defaults.conf:/opt/bitnami/spark/conf/spark-defaults.conf 83 | networks: 84 | - data_network 85 | 86 | spark-worker-1: 87 | image: docker.io/bitnami/spark:3.3 88 | container_name: "spark-worker-1" 89 | env_file: 90 | - .env 91 | depends_on: 92 | - spark-master 93 | networks: 94 | - data_network 95 | 96 | spark-worker-2: 97 | image: docker.io/bitnami/spark:3.3 98 | container_name: "spark-worker-2" 99 | env_file: 100 | - .env 101 | depends_on: 102 | - spark-master 103 | networks: 104 | - data_network 105 | 106 | spark-thrift-server: 107 | build: 108 | context: ./spark 109 | dockerfile: ./Dockerfile 110 | container_name: "spark-thrift-server" 111 | depends_on: 112 | - spark-master 113 | - hive-metastore 114 | ports: 115 | - "4040:4040" 116 | - "10000:10000" 117 | command: sh -c " 118 | sleep 10 && ./sbin/start-thriftserver.sh --driver-java-options '-Dhive.metastore.uris=thrift://hive-metastore:9083' --master spark://spark-master:7077" 119 | volumes: 120 | - ./spark/spark-defaults.conf:/opt/bitnami/spark/conf/spark-defaults.conf 121 | networks: 122 | - data_network 123 | 124 | metabase: 125 | image: metabase/metabase:latest 126 | container_name: "metabase" 127 | ports: 128 | - "3000:3000" 129 | env_file: 130 | - .env 131 | networks: 132 | - data_network 133 | 134 | networks: 135 | data_network: 136 | driver: bridge 137 | name: data_network 138 | -------------------------------------------------------------------------------- /ecom_analytics/silver/logs/dbt.log.legacy: -------------------------------------------------------------------------------- 1 | 2023-02-09 04:11:50.426752 (MainThread): invalid escape sequence '\w' 2 | 2023-02-09 04:11:50.632445 (MainThread): invalid escape sequence '\w' 3 | 2023-02-09 04:12:29.510111 (MainThread): invalid escape sequence '\w' 4 | 2023-02-09 04:12:29.751858 (MainThread): invalid escape sequence '\w' 5 | 2023-02-09 04:12:31.001846 (Thread-1 (worker)): unclosed 6 | 2023-02-09 04:12:33.042784 (Thread-1 (worker)): invalid escape sequence '\w' 7 | 2023-02-09 04:15:11.770701 (MainThread): invalid escape sequence '\w' 8 | 2023-02-09 04:15:11.997010 (MainThread): invalid escape sequence '\w' 9 | 2023-02-09 04:15:34.804687 (MainThread): invalid escape sequence '\w' 10 | 2023-02-09 04:15:35.029489 (MainThread): invalid escape sequence '\w' 11 | 2023-02-09 04:15:36.753810 (Thread-1 (worker)): unclosed 12 | 2023-02-09 04:15:36.754093 (Thread-1 (worker)): unclosed 13 | 2023-02-09 04:21:41.374560 (Thread-1 (worker)): invalid escape sequence '\w' 14 | 2023-02-09 04:21:41.388781 (Thread-1 (worker)): unclosed 15 | 2023-02-09 04:22:02.743843 (Thread-1 (worker)): invalid escape sequence '\w' 16 | 2023-02-09 04:22:02.746365 (Thread-1 (worker)): unclosed 17 | 2023-02-09 04:22:02.746598 (Thread-1 (worker)): unclosed 18 | 2023-02-09 04:26:33.091270 (ThreadPoolExecutor-1_0): unclosed 19 | 2023-02-09 04:26:33.263159 (MainThread): unclosed 20 | 2023-02-09 04:28:41.952780 (MainThread): invalid escape sequence '\w' 21 | 2023-02-09 04:28:42.197963 (MainThread): invalid escape sequence '\w' 22 | 2023-02-09 04:28:43.850074 (Thread-1 (worker)): unclosed 23 | 2023-02-09 04:28:43.850652 (Thread-1 (worker)): unclosed 24 | 2023-02-09 04:30:07.876905 (Thread-1 (worker)): unclosed 25 | 2023-02-09 04:30:07.877236 (Thread-1 (worker)): unclosed 26 | 2023-02-09 04:31:01.156498 (Thread-1 (worker)): unclosed 27 | 2023-02-09 04:31:01.156962 (Thread-1 (worker)): unclosed 28 | 2023-02-09 04:31:32.449736 (Thread-1 (worker)): unclosed 29 | 2023-02-09 04:31:58.671147 (Thread-1 (worker)): unclosed 30 | 2023-02-09 04:31:58.671521 (Thread-1 (worker)): unclosed 31 | 2023-02-09 04:32:32.555201 (MainThread): invalid escape sequence '\w' 32 | 2023-02-09 04:32:32.798852 (MainThread): invalid escape sequence '\w' 33 | 2023-02-09 04:32:34.182634 (Thread-1 (worker)): unclosed 34 | 2023-02-09 04:32:34.265385 (MainThread): unclosed 35 | 2023-02-09 04:32:34.265849 (MainThread): unclosed 36 | 2023-02-09 04:34:10.533182 (MainThread): invalid escape sequence '\w' 37 | 2023-02-09 04:34:10.765162 (MainThread): invalid escape sequence '\w' 38 | 2023-02-09 04:34:12.161120 (Thread-1 (worker)): unclosed 39 | 2023-02-09 04:34:12.239270 (MainThread): unclosed 40 | 2023-02-09 04:34:12.243146 (MainThread): unclosed 41 | 2023-02-09 04:35:10.406425 (MainThread): invalid escape sequence '\w' 42 | 2023-02-09 04:35:10.622482 (MainThread): invalid escape sequence '\w' 43 | 2023-02-09 04:35:11.177069 (Thread-1 (worker)): unclosed 44 | 2023-02-09 04:35:11.177342 (Thread-1 (worker)): unclosed 45 | 2023-02-09 04:35:32.697283 (MainThread): invalid escape sequence '\w' 46 | 2023-02-09 04:35:32.931919 (MainThread): invalid escape sequence '\w' 47 | 2023-02-09 04:35:34.360454 (Thread-1 (worker)): unclosed 48 | 2023-02-09 04:35:34.478453 (MainThread): unclosed 49 | 2023-02-09 04:35:34.478835 (MainThread): unclosed 50 | 2023-02-09 04:36:12.713202 (MainThread): invalid escape sequence '\w' 51 | 2023-02-09 04:36:12.958863 (MainThread): invalid escape sequence '\w' 52 | 2023-02-09 04:36:14.277502 (Thread-1 (worker)): unclosed 53 | 2023-02-09 04:36:14.360654 (MainThread): unclosed 54 | 2023-02-09 04:36:14.361041 (MainThread): unclosed 55 | 2023-02-09 04:37:37.493767 (MainThread): unclosed 56 | 2023-02-09 04:37:37.494107 (MainThread): unclosed 57 | 2023-02-09 04:39:04.499048 (MainThread): unclosed 58 | 2023-02-09 04:39:04.499364 (MainThread): unclosed 59 | 2023-02-09 04:40:47.384557 (MainThread): invalid escape sequence '\w' 60 | 2023-02-09 04:40:47.631436 (MainThread): invalid escape sequence '\w' 61 | 2023-02-09 04:40:48.905204 (Thread-1 (worker)): unclosed 62 | 2023-02-09 04:40:49.000245 (MainThread): unclosed 63 | 2023-02-09 04:40:49.000648 (MainThread): unclosed 64 | 2023-02-09 04:41:05.794757 (ThreadPoolExecutor-1_0): unclosed 65 | 2023-02-09 04:41:06.026634 (MainThread): unclosed 66 | 2023-02-09 04:41:28.790231 (MainThread): unclosed 67 | 2023-02-09 04:41:28.792624 (MainThread): unclosed 68 | 2023-02-09 04:41:28.792928 (MainThread): unclosed 69 | 2023-02-09 04:41:49.806380 (MainThread): invalid escape sequence '\w' 70 | 2023-02-09 04:41:50.034561 (MainThread): invalid escape sequence '\w' 71 | 2023-02-09 04:41:50.529549 (MainThread): unclosed 72 | 2023-02-09 04:42:11.726956 (MainThread): unclosed 73 | 2023-02-09 04:42:11.728639 (MainThread): unclosed 74 | 2023-02-09 04:42:11.728857 (MainThread): unclosed 75 | 2023-02-09 04:43:21.140498 (Thread-1 (worker)): unclosed 76 | 2023-02-09 04:43:27.878836 (MainThread): unclosed 77 | 2023-02-09 04:43:27.880264 (MainThread): unclosed 78 | 2023-02-09 04:43:27.880902 (MainThread): unclosed 79 | 2023-02-09 04:45:16.057697 (ThreadPoolExecutor-1_0): unclosed 80 | 2023-02-09 04:45:16.987011 (MainThread): unclosed 81 | 2023-02-09 04:45:16.987296 (MainThread): unclosed 82 | 2023-02-09 04:45:45.079604 (ThreadPoolExecutor-1_0): unclosed 83 | 2023-02-09 04:45:46.132482 (MainThread): unclosed 84 | 2023-02-09 04:45:46.132755 (MainThread): unclosed 85 | 2023-02-09 04:46:21.160324 (Thread-1 (worker)): unclosed 86 | 2023-02-09 04:47:45.571542 (ThreadPoolExecutor-1_0): unclosed 87 | 2023-02-09 04:47:46.547846 (MainThread): unclosed 88 | 2023-02-09 04:47:46.548134 (MainThread): unclosed 89 | 2023-02-09 04:48:15.573110 (MainThread): unclosed 90 | 2023-02-09 04:48:15.574495 (MainThread): unclosed 91 | 2023-02-09 04:48:15.574728 (MainThread): unclosed 92 | 2023-02-09 04:48:40.042155 (Thread-1 (worker)): unclosed 93 | 2023-02-09 04:48:40.043222 (Thread-1 (worker)): unclosed 94 | 2023-02-09 04:49:56.210585 (MainThread): invalid escape sequence '\w' 95 | 2023-02-09 04:49:56.441901 (MainThread): invalid escape sequence '\w' 96 | 2023-02-09 04:49:56.927294 (MainThread): unclosed 97 | 2023-02-09 04:52:40.664401 (MainThread): invalid escape sequence '\w' 98 | 2023-02-09 04:52:40.862365 (MainThread): invalid escape sequence '\w' 99 | 2023-02-09 04:56:43.512783 (Thread-1 (worker)): invalid escape sequence '\w' 100 | 2023-02-09 05:02:29.383644 (MainThread): invalid escape sequence '\w' 101 | 2023-02-09 05:02:29.580927 (MainThread): invalid escape sequence '\w' 102 | 2023-02-09 14:45:45.873565 (MainThread): unclosed 103 | -------------------------------------------------------------------------------- /ecom_analytics/bronze/logs/dbt.log.legacy: -------------------------------------------------------------------------------- 1 | 2023-02-09 04:11:50.426752 (MainThread): invalid escape sequence '\w' 2 | 2023-02-09 04:11:50.632445 (MainThread): invalid escape sequence '\w' 3 | 2023-02-09 04:12:29.510111 (MainThread): invalid escape sequence '\w' 4 | 2023-02-09 04:12:29.751858 (MainThread): invalid escape sequence '\w' 5 | 2023-02-09 04:12:31.001846 (Thread-1 (worker)): unclosed 6 | 2023-02-09 04:12:33.042784 (Thread-1 (worker)): invalid escape sequence '\w' 7 | 2023-02-09 04:15:11.770701 (MainThread): invalid escape sequence '\w' 8 | 2023-02-09 04:15:11.997010 (MainThread): invalid escape sequence '\w' 9 | 2023-02-09 04:15:34.804687 (MainThread): invalid escape sequence '\w' 10 | 2023-02-09 04:15:35.029489 (MainThread): invalid escape sequence '\w' 11 | 2023-02-09 04:15:36.753810 (Thread-1 (worker)): unclosed 12 | 2023-02-09 04:15:36.754093 (Thread-1 (worker)): unclosed 13 | 2023-02-09 04:21:41.374560 (Thread-1 (worker)): invalid escape sequence '\w' 14 | 2023-02-09 04:21:41.388781 (Thread-1 (worker)): unclosed 15 | 2023-02-09 04:22:02.743843 (Thread-1 (worker)): invalid escape sequence '\w' 16 | 2023-02-09 04:22:02.746365 (Thread-1 (worker)): unclosed 17 | 2023-02-09 04:22:02.746598 (Thread-1 (worker)): unclosed 18 | 2023-02-09 04:26:33.091270 (ThreadPoolExecutor-1_0): unclosed 19 | 2023-02-09 04:26:33.263159 (MainThread): unclosed 20 | 2023-02-09 04:28:41.952780 (MainThread): invalid escape sequence '\w' 21 | 2023-02-09 04:28:42.197963 (MainThread): invalid escape sequence '\w' 22 | 2023-02-09 04:28:43.850074 (Thread-1 (worker)): unclosed 23 | 2023-02-09 04:28:43.850652 (Thread-1 (worker)): unclosed 24 | 2023-02-09 04:30:07.876905 (Thread-1 (worker)): unclosed 25 | 2023-02-09 04:30:07.877236 (Thread-1 (worker)): unclosed 26 | 2023-02-09 04:31:01.156498 (Thread-1 (worker)): unclosed 27 | 2023-02-09 04:31:01.156962 (Thread-1 (worker)): unclosed 28 | 2023-02-09 04:31:32.449736 (Thread-1 (worker)): unclosed 29 | 2023-02-09 04:31:58.671147 (Thread-1 (worker)): unclosed 30 | 2023-02-09 04:31:58.671521 (Thread-1 (worker)): unclosed 31 | 2023-02-09 04:32:32.555201 (MainThread): invalid escape sequence '\w' 32 | 2023-02-09 04:32:32.798852 (MainThread): invalid escape sequence '\w' 33 | 2023-02-09 04:32:34.182634 (Thread-1 (worker)): unclosed 34 | 2023-02-09 04:32:34.265385 (MainThread): unclosed 35 | 2023-02-09 04:32:34.265849 (MainThread): unclosed 36 | 2023-02-09 04:34:10.533182 (MainThread): invalid escape sequence '\w' 37 | 2023-02-09 04:34:10.765162 (MainThread): invalid escape sequence '\w' 38 | 2023-02-09 04:34:12.161120 (Thread-1 (worker)): unclosed 39 | 2023-02-09 04:34:12.239270 (MainThread): unclosed 40 | 2023-02-09 04:34:12.243146 (MainThread): unclosed 41 | 2023-02-09 04:35:10.406425 (MainThread): invalid escape sequence '\w' 42 | 2023-02-09 04:35:10.622482 (MainThread): invalid escape sequence '\w' 43 | 2023-02-09 04:35:11.177069 (Thread-1 (worker)): unclosed 44 | 2023-02-09 04:35:11.177342 (Thread-1 (worker)): unclosed 45 | 2023-02-09 04:35:32.697283 (MainThread): invalid escape sequence '\w' 46 | 2023-02-09 04:35:32.931919 (MainThread): invalid escape sequence '\w' 47 | 2023-02-09 04:35:34.360454 (Thread-1 (worker)): unclosed 48 | 2023-02-09 04:35:34.478453 (MainThread): unclosed 49 | 2023-02-09 04:35:34.478835 (MainThread): unclosed 50 | 2023-02-09 04:36:12.713202 (MainThread): invalid escape sequence '\w' 51 | 2023-02-09 04:36:12.958863 (MainThread): invalid escape sequence '\w' 52 | 2023-02-09 04:36:14.277502 (Thread-1 (worker)): unclosed 53 | 2023-02-09 04:36:14.360654 (MainThread): unclosed 54 | 2023-02-09 04:36:14.361041 (MainThread): unclosed 55 | 2023-02-09 04:37:37.493767 (MainThread): unclosed 56 | 2023-02-09 04:37:37.494107 (MainThread): unclosed 57 | 2023-02-09 04:39:04.499048 (MainThread): unclosed 58 | 2023-02-09 04:39:04.499364 (MainThread): unclosed 59 | 2023-02-09 04:40:47.384557 (MainThread): invalid escape sequence '\w' 60 | 2023-02-09 04:40:47.631436 (MainThread): invalid escape sequence '\w' 61 | 2023-02-09 04:40:48.905204 (Thread-1 (worker)): unclosed 62 | 2023-02-09 04:40:49.000245 (MainThread): unclosed 63 | 2023-02-09 04:40:49.000648 (MainThread): unclosed 64 | 2023-02-09 04:41:05.794757 (ThreadPoolExecutor-1_0): unclosed 65 | 2023-02-09 04:41:06.026634 (MainThread): unclosed 66 | 2023-02-09 04:41:28.790231 (MainThread): unclosed 67 | 2023-02-09 04:41:28.792624 (MainThread): unclosed 68 | 2023-02-09 04:41:28.792928 (MainThread): unclosed 69 | 2023-02-09 04:41:49.806380 (MainThread): invalid escape sequence '\w' 70 | 2023-02-09 04:41:50.034561 (MainThread): invalid escape sequence '\w' 71 | 2023-02-09 04:41:50.529549 (MainThread): unclosed 72 | 2023-02-09 04:42:11.726956 (MainThread): unclosed 73 | 2023-02-09 04:42:11.728639 (MainThread): unclosed 74 | 2023-02-09 04:42:11.728857 (MainThread): unclosed 75 | 2023-02-09 04:43:21.140498 (Thread-1 (worker)): unclosed 76 | 2023-02-09 04:43:27.878836 (MainThread): unclosed 77 | 2023-02-09 04:43:27.880264 (MainThread): unclosed 78 | 2023-02-09 04:43:27.880902 (MainThread): unclosed 79 | 2023-02-09 04:45:16.057697 (ThreadPoolExecutor-1_0): unclosed 80 | 2023-02-09 04:45:16.987011 (MainThread): unclosed 81 | 2023-02-09 04:45:16.987296 (MainThread): unclosed 82 | 2023-02-09 04:45:45.079604 (ThreadPoolExecutor-1_0): unclosed 83 | 2023-02-09 04:45:46.132482 (MainThread): unclosed 84 | 2023-02-09 04:45:46.132755 (MainThread): unclosed 85 | 2023-02-09 04:46:21.160324 (Thread-1 (worker)): unclosed 86 | 2023-02-09 04:47:45.571542 (ThreadPoolExecutor-1_0): unclosed 87 | 2023-02-09 04:47:46.547846 (MainThread): unclosed 88 | 2023-02-09 04:47:46.548134 (MainThread): unclosed 89 | 2023-02-09 04:48:15.573110 (MainThread): unclosed 90 | 2023-02-09 04:48:15.574495 (MainThread): unclosed 91 | 2023-02-09 04:48:15.574728 (MainThread): unclosed 92 | 2023-02-09 04:48:40.042155 (Thread-1 (worker)): unclosed 93 | 2023-02-09 04:48:40.043222 (Thread-1 (worker)): unclosed 94 | 2023-02-09 04:49:56.210585 (MainThread): invalid escape sequence '\w' 95 | 2023-02-09 04:49:56.441901 (MainThread): invalid escape sequence '\w' 96 | 2023-02-09 04:49:56.927294 (MainThread): unclosed 97 | 2023-02-09 04:52:40.664401 (MainThread): invalid escape sequence '\w' 98 | 2023-02-09 04:52:40.862365 (MainThread): invalid escape sequence '\w' 99 | 2023-02-09 04:56:43.512783 (Thread-1 (worker)): invalid escape sequence '\w' 100 | 2023-02-09 05:02:29.383644 (MainThread): invalid escape sequence '\w' 101 | 2023-02-09 05:02:29.580927 (MainThread): invalid escape sequence '\w' 102 | 2023-02-09 05:14:53.616841 (MainThread): invalid escape sequence '\w' 103 | 2023-02-09 05:14:53.816135 (MainThread): invalid escape sequence '\w' 104 | 2023-02-09 05:25:31.427854 (MainThread): invalid escape sequence '\w' 105 | 2023-02-09 05:25:31.667175 (MainThread): invalid escape sequence '\w' 106 | 2023-02-09 05:26:39.328648 (Thread-1 (worker)): invalid escape sequence '\w' 107 | 2023-02-09 05:51:12.099730 (MainThread): invalid escape sequence '\w' 108 | 2023-02-09 05:51:12.299166 (MainThread): invalid escape sequence '\w' 109 | 2023-02-09 05:51:16.458965 (Thread-1 (worker)): invalid escape sequence '\w' 110 | 2023-02-11 09:19:53.250102 (MainThread): invalid escape sequence '\w' 111 | 2023-02-11 09:19:53.480457 (MainThread): invalid escape sequence '\w' 112 | 2023-02-11 09:43:59.833071 (MainThread): invalid escape sequence '\w' 113 | 2023-02-11 09:44:00.031005 (MainThread): invalid escape sequence '\w' 114 | 2023-02-11 09:46:31.520687 (MainThread): invalid escape sequence '\w' 115 | 2023-02-11 09:46:31.728998 (MainThread): invalid escape sequence '\w' 116 | 2023-02-11 09:57:40.082554 (MainThread): invalid escape sequence '\w' 117 | 2023-02-11 09:57:40.279248 (MainThread): invalid escape sequence '\w' 118 | 2023-02-11 09:59:40.255824 (MainThread): invalid escape sequence '\w' 119 | 2023-02-11 09:59:40.469556 (MainThread): invalid escape sequence '\w' 120 | 2023-02-11 10:00:45.492872 (MainThread): invalid escape sequence '\w' 121 | 2023-02-11 10:00:45.691330 (MainThread): invalid escape sequence '\w' 122 | 2023-02-11 10:12:39.303108 (Thread-1 (worker)): invalid escape sequence '\w' 123 | 2023-02-11 11:09:49.188054 (MainThread): invalid escape sequence '\w' 124 | 2023-02-11 11:09:49.380699 (MainThread): invalid escape sequence '\w' 125 | 2023-02-11 11:19:11.002262 (Thread-1 (worker)): invalid escape sequence '\w' 126 | 2023-02-20 04:41:15.337495 (MainThread): invalid escape sequence '\w' 127 | 2023-02-20 04:41:15.562145 (MainThread): invalid escape sequence '\w' 128 | 2023-02-20 04:50:00.449422 (Thread-1 (worker)): invalid escape sequence '\w' 129 | --------------------------------------------------------------------------------