├── 1. Authentication ├── 1.access_adls_using_access_keys.py ├── 2.access_adls_using_sas_token.py ├── 3.access_adls_using_service_principal.py ├── 4.access_adls_using_cluster_scoped_credentials.py ├── 5.explore_dbutils_secrets_utility.py ├── 6.explore_dbfs_root.py ├── 7.mount_adls_using_service_principal.py └── 8.mount_adls_containers_for_project.py ├── 2. Includes_configs_and_commonfunctions ├── common_functions.py └── configuration.py ├── 3. Data Ingestion ├── 0.ingest_all_files.py ├── 1.ingest_circuits_file.py ├── 2.ingest_races_file.py ├── 3.ingest_constructors_file.py ├── 4.ingest_drivers_file.py ├── 5.ingest_results_file.py ├── 6.ingest_pit_stops_file.py ├── 7.ingest_lap_times_file.py ├── 8.ingest_qualifying_file.py └── 9.create_processed_database.sql ├── 4. raw └── 1.create_raw_tables.sql ├── 5. Data Transformation ├── 0.create_presentation_database.sql ├── 1.race_results.py ├── 2.driver_standings.py ├── 3.constructor_standings.py ├── 4.calculated_race_results.py └── calculated_race_results_sql.sql ├── 6. Data Analysis and Visualization ├── 1.find_dominant_drivers.sql ├── 2.find_dominant_teams.sql ├── 3.viz_dominant_drivers.sql └── 4.viz_dominant_teams.sql ├── 7. Power Bi Reports ├── 412.png ├── 413.png ├── 414.png ├── 415.png ├── 416.png ├── 423.png ├── 424.png ├── 425.png └── 426.png ├── Incremental_load_data ├── 2021-03-21 │ ├── .DS_Store │ ├── circuits.csv │ ├── constructors.json │ ├── drivers.json │ ├── lap_times │ │ ├── .DS_Store │ │ ├── lap_times_split_1.csv │ │ ├── lap_times_split_2.csv │ │ ├── lap_times_split_3.csv │ │ ├── lap_times_split_4.csv │ │ └── lap_times_split_5.csv │ ├── pit_stops.json │ ├── qualifying │ │ ├── .DS_Store │ │ ├── qualifying_split_1.json │ │ └── qualifying_split_2.json │ ├── races.csv │ └── results.json ├── 2021-03-28 │ ├── circuits.csv │ ├── constructors.json │ ├── drivers.json │ ├── lap_times │ │ └── lap_times_split_1.csv │ ├── pit_stops.json │ ├── qualifying │ │ └── qualifying_split_1.json │ ├── races.csv │ └── results.json └── 2021-04-18 │ ├── circuits.csv │ ├── constructors.json │ ├── drivers.json │ ├── lap_times │ └── lap_times_split_1.csv │ ├── pit_stops.json │ ├── qualifying │ └── qualifying_split_1.json │ ├── races.csv │ └── results.json ├── LICENSE ├── README.md ├── Screenshots ├── Step1 - Creating Resources.pdf ├── Step10 - Data Analysis(Delta Format).pdf ├── Step11 - Data Ingestion and Transformation using ADF.pdf ├── Step12 - Creating Power Bi Reports.pdf ├── Step2 - Setting Authentication.pdf ├── Step3 - Data Ingestion.pdf ├── Step4 - Data Transformation.pdf ├── Step5 - Creating External and Managed Tables.pdf ├── Step6 - Data Analysis.pdf ├── Step7 - Ingestion using Incremental Load.pdf ├── Step8 - Transformation using Incremental Load.pdf └── Step9 - Data Ingestion(Delta Format).pdf ├── demo ├── 1.filter_demo.py ├── 10.delta_lake_demo.py ├── 2.join_demo.py ├── 3.aggregation_demo.py ├── 4.sql_temp_view_demo.py ├── 5.sql_temp_view_demo.py ├── 6.sql_objects_demo.sql ├── 7.sql_basics_demo.sql ├── 8.sql_functions_demo.sql └── 9.sql_joins_demo.sql └── utils ├── 1.prepare_for_incremental_load.sql └── read.md /1. Authentication/1.access_adls_using_access_keys.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC #### Access Azure Data Lake using access keys 4 | # MAGIC 1. Set the spark config fs.azure.account.key 5 | # MAGIC 1. List files from demo container 6 | # MAGIC 1. Read data from circuits.csv file 7 | 8 | # COMMAND ---------- 9 | 10 | formula1dl_account_key = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1dl-account-key') 11 | 12 | # COMMAND ---------- 13 | 14 | spark.conf.set( 15 | "fs.azure.account.key.formula1dl.dfs.core.windows.net", 16 | formula1dl_account_key) 17 | 18 | # COMMAND ---------- 19 | 20 | display(dbutils.fs.ls("abfss://demo@formula1dl.dfs.core.windows.net")) 21 | 22 | # COMMAND ---------- 23 | 24 | display(spark.read.csv("abfss://demo@formula1dl.dfs.core.windows.net/circuits.csv")) 25 | 26 | # COMMAND ---------- 27 | 28 | -------------------------------------------------------------------------------- /1. Authentication/2.access_adls_using_sas_token.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC #### Access Azure Data Lake using SAS Token 4 | # MAGIC 1. Set the spark config for SAS Token 5 | # MAGIC 1. List files from demo container 6 | # MAGIC 1. Read data from circuits.csv file 7 | 8 | # COMMAND ---------- 9 | 10 | formula1dl_demo_sas_token = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1dl-demo-sas-token') 11 | 12 | # COMMAND ---------- 13 | 14 | spark.conf.set("fs.azure.account.auth.type.formula1dl.dfs.core.windows.net", "SAS") 15 | spark.conf.set("fs.azure.sas.token.provider.type.formula1dl.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider") 16 | spark.conf.set("fs.azure.sas.fixed.token.formula1dl.dfs.core.windows.net", formula1dl_demo_sas_token) 17 | 18 | # COMMAND ---------- 19 | 20 | display(dbutils.fs.ls("abfss://demo@formula1dl.dfs.core.windows.net")) 21 | 22 | # COMMAND ---------- 23 | 24 | display(spark.read.csv("abfss://demo@formula1dl.dfs.core.windows.net/circuits.csv")) 25 | 26 | # COMMAND ---------- 27 | 28 | -------------------------------------------------------------------------------- /1. Authentication/3.access_adls_using_service_principal.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Access Azure Data Lake using Service Principal 4 | # MAGIC #### Steps to follow 5 | # MAGIC 1. Register Azure AD Application / Service Principal 6 | # MAGIC 2. Generate a secret/ password for the Application 7 | # MAGIC 3. Set Spark Config with App/ Client Id, Directory/ Tenant Id & Secret 8 | # MAGIC 4. Assign Role 'Storage Blob Data Contributor' to the Data Lake. 9 | 10 | # COMMAND ---------- 11 | 12 | client_id = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-client-id') 13 | tenant_id = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-tenant-id') 14 | client_secret = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-client-secret') 15 | 16 | # COMMAND ---------- 17 | 18 | spark.conf.set("fs.azure.account.auth.type.formula1dl.dfs.core.windows.net", "OAuth") 19 | spark.conf.set("fs.azure.account.oauth.provider.type.formula1dl.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") 20 | spark.conf.set("fs.azure.account.oauth2.client.id.formula1dl.dfs.core.windows.net", client_id) 21 | spark.conf.set("fs.azure.account.oauth2.client.secret.formula1dl.dfs.core.windows.net", client_secret) 22 | spark.conf.set("fs.azure.account.oauth2.client.endpoint.formula1dl.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token") 23 | 24 | # COMMAND ---------- 25 | 26 | display(dbutils.fs.ls("abfss://demo@formula1dl.dfs.core.windows.net")) 27 | 28 | # COMMAND ---------- 29 | 30 | display(spark.read.csv("abfss://demo@formula1dl.dfs.core.windows.net/circuits.csv")) 31 | 32 | # COMMAND ---------- 33 | 34 | -------------------------------------------------------------------------------- /1. Authentication/4.access_adls_using_cluster_scoped_credentials.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC #### Access Azure Data Lake using cluster scoped credentials 4 | # MAGIC 1. Set the spark config fs.azure.account.key in the cluster 5 | # MAGIC 1. List files from demo container 6 | # MAGIC 1. Read data from circuits.csv file 7 | 8 | # COMMAND ---------- 9 | 10 | display(dbutils.fs.ls("abfss://demo@formula1dl.dfs.core.windows.net")) 11 | 12 | # COMMAND ---------- 13 | 14 | display(spark.read.csv("abfss://demo@formula1dl.dfs.core.windows.net/circuits.csv")) 15 | 16 | # COMMAND ---------- 17 | 18 | -------------------------------------------------------------------------------- /1. Authentication/5.explore_dbutils_secrets_utility.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC #### Explore the capabilities of the dbutils.secrets utility 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.secrets.help() 8 | 9 | # COMMAND ---------- 10 | 11 | dbutils.secrets.listScopes() 12 | 13 | # COMMAND ---------- 14 | 15 | dbutils.secrets.list(scope = 'formula1-scope') 16 | 17 | # COMMAND ---------- 18 | 19 | dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1dl-account-key') 20 | 21 | # COMMAND ---------- 22 | 23 | -------------------------------------------------------------------------------- /1. Authentication/6.explore_dbfs_root.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC #### Explore DBFS Root 4 | # MAGIC 1. List all the folders in DBFS root 5 | # MAGIC 2. Interact with DBFS File Browser 6 | # MAGIC 3. Upload file to DBFS Root 7 | 8 | # COMMAND ---------- 9 | 10 | display(dbutils.fs.ls('/')) 11 | 12 | # COMMAND ---------- 13 | 14 | display(dbutils.fs.ls('/FileStore')) 15 | 16 | # COMMAND ---------- 17 | 18 | display(spark.read.csv('/FileStore/circuits.csv')) 19 | 20 | # COMMAND ---------- 21 | 22 | -------------------------------------------------------------------------------- /1. Authentication/7.mount_adls_using_service_principal.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Mount Azure Data Lake using Service Principal 4 | # MAGIC #### Steps to follow 5 | # MAGIC 1. Get client_id, tenant_id and client_secret from key vault 6 | # MAGIC 2. Set Spark Config with App/ Client Id, Directory/ Tenant Id & Secret 7 | # MAGIC 3. Call file system utlity mount to mount the storage 8 | # MAGIC 4. Explore other file system utlities related to mount (list all mounts, unmount) 9 | 10 | # COMMAND ---------- 11 | 12 | client_id = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-client-id') 13 | tenant_id = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-tenant-id') 14 | client_secret = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-client-secret') 15 | 16 | # COMMAND ---------- 17 | 18 | configs = {"fs.azure.account.auth.type": "OAuth", 19 | "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", 20 | "fs.azure.account.oauth2.client.id": client_id, 21 | "fs.azure.account.oauth2.client.secret": client_secret, 22 | "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"} 23 | 24 | # COMMAND ---------- 25 | 26 | dbutils.fs.mount( 27 | source = "abfss://demo@formula1dl.dfs.core.windows.net/", 28 | mount_point = "/mnt/formula1dl/demo", 29 | extra_configs = configs) 30 | 31 | # COMMAND ---------- 32 | 33 | display(dbutils.fs.ls("/mnt/formula1dl/demo")) 34 | 35 | # COMMAND ---------- 36 | 37 | display(spark.read.csv("/mnt/formula1dl/demo/circuits.csv")) 38 | 39 | # COMMAND ---------- 40 | 41 | display(dbutils.fs.mounts()) 42 | 43 | # COMMAND ---------- 44 | 45 | dbutils.fs.unmount('/mnt/formula1dl/demo') 46 | 47 | # COMMAND ---------- 48 | 49 | -------------------------------------------------------------------------------- /1. Authentication/8.mount_adls_containers_for_project.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Mount Azure Data Lake Containers for the Project 4 | 5 | # COMMAND ---------- 6 | 7 | def mount_adls(storage_account_name, container_name): 8 | # Get secrets from Key Vault 9 | client_id = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-client-id') 10 | tenant_id = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-tenant-id') 11 | client_secret = dbutils.secrets.get(scope = 'formula1-scope', key = 'formula1-app-client-secret') 12 | 13 | # Set spark configurations 14 | configs = {"fs.azure.account.auth.type": "OAuth", 15 | "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", 16 | "fs.azure.account.oauth2.client.id": client_id, 17 | "fs.azure.account.oauth2.client.secret": client_secret, 18 | "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"} 19 | 20 | # Unmount the mount point if it already exists 21 | if any(mount.mountPoint == f"/mnt/{storage_account_name}/{container_name}" for mount in dbutils.fs.mounts()): 22 | dbutils.fs.unmount(f"/mnt/{storage_account_name}/{container_name}") 23 | 24 | # Mount the storage account container 25 | dbutils.fs.mount( 26 | source = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/", 27 | mount_point = f"/mnt/{storage_account_name}/{container_name}", 28 | extra_configs = configs) 29 | 30 | display(dbutils.fs.mounts()) 31 | 32 | # COMMAND ---------- 33 | 34 | # MAGIC %md 35 | # MAGIC ##### Mount Raw Container 36 | 37 | # COMMAND ---------- 38 | 39 | mount_adls('formula1dl', 'raw') 40 | 41 | # COMMAND ---------- 42 | 43 | mount_adls('formula1dl', 'processed') 44 | 45 | # COMMAND ---------- 46 | 47 | mount_adls('formula1dl', 'presentation') 48 | 49 | # COMMAND ---------- 50 | 51 | -------------------------------------------------------------------------------- /2. Includes_configs_and_commonfunctions/common_functions.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | from pyspark.sql.functions import current_timestamp 3 | def add_ingestion_date(input_df): 4 | output_df = input_df.withColumn("ingestion_date", current_timestamp()) 5 | return output_df 6 | 7 | # COMMAND ---------- 8 | 9 | def re_arrange_partition_column(input_df, partition_column): 10 | column_list = [] 11 | for column_name in input_df.schema.names: 12 | if column_name != partition_column: 13 | column_list.append(column_name) 14 | column_list.append(partition_column) 15 | output_df = input_df.select(column_list) 16 | return output_df 17 | 18 | # COMMAND ---------- 19 | 20 | def overwrite_partition(input_df, db_name, table_name, partition_column): 21 | output_df = re_arrange_partition_column(input_df, partition_column) 22 | spark.conf.set("spark.sql.sources.partitionOverwriteMode","dynamic") 23 | if (spark._jsparkSession.catalog().tableExists(f"{db_name}.{table_name}")): 24 | output_df.write.mode("overwrite").insertInto(f"{db_name}.{table_name}") 25 | else: 26 | output_df.write.mode("overwrite").partitionBy(partition_column).format("parquet").saveAsTable(f"{db_name}.{table_name}") 27 | 28 | # COMMAND ---------- 29 | 30 | def df_column_to_list(input_df, column_name): 31 | df_row_list = input_df.select(column_name) \ 32 | .distinct() \ 33 | .collect() 34 | 35 | column_value_list = [row[column_name] for row in df_row_list] 36 | return column_value_list 37 | 38 | # COMMAND ---------- 39 | 40 | def merge_delta_data(input_df, db_name, table_name, folder_path, merge_condition, partition_column): 41 | spark.conf.set("spark.databricks.optimizer.dynamicPartitionPruning","true") 42 | 43 | from delta.tables import DeltaTable 44 | if (spark._jsparkSession.catalog().tableExists(f"{db_name}.{table_name}")): 45 | deltaTable = DeltaTable.forPath(spark, f"{folder_path}/{table_name}") 46 | deltaTable.alias("tgt").merge( 47 | input_df.alias("src"), 48 | merge_condition) \ 49 | .whenMatchedUpdateAll()\ 50 | .whenNotMatchedInsertAll()\ 51 | .execute() 52 | else: 53 | input_df.write.mode("overwrite").partitionBy(partition_column).format("delta").saveAsTable(f"{db_name}.{table_name}") 54 | 55 | # COMMAND ---------- 56 | 57 | -------------------------------------------------------------------------------- /2. Includes_configs_and_commonfunctions/configuration.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | raw_folder_path = 'abfss://raw@erformula1dl.dfs.core.windows.net' 3 | processed_folder_path = 'abfss://processed@erformula1dl.dfs.core.windows.net' 4 | presentation_folder_path = 'abfss://presentation@erformula1dl.dfs.core.windows.net' 5 | 6 | # COMMAND ---------- 7 | 8 | -------------------------------------------------------------------------------- /3. Data Ingestion/0.ingest_all_files.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | v_result = dbutils.notebook.run("1.ingest_circuits_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 3 | 4 | # COMMAND ---------- 5 | 6 | v_result 7 | 8 | # COMMAND ---------- 9 | 10 | v_result = dbutils.notebook.run("2.ingest_races_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 11 | 12 | # COMMAND ---------- 13 | 14 | v_result 15 | 16 | # COMMAND ---------- 17 | 18 | v_result = dbutils.notebook.run("3.ingest_constructors_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 19 | 20 | # COMMAND ---------- 21 | 22 | v_result 23 | 24 | # COMMAND ---------- 25 | 26 | v_result = dbutils.notebook.run("4.ingest_drivers_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 27 | 28 | # COMMAND ---------- 29 | 30 | v_result 31 | 32 | # COMMAND ---------- 33 | 34 | v_result = dbutils.notebook.run("5.ingest_results_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 35 | 36 | # COMMAND ---------- 37 | 38 | v_result 39 | 40 | # COMMAND ---------- 41 | 42 | v_result = dbutils.notebook.run("6.ingest_pit_stops_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 43 | 44 | # COMMAND ---------- 45 | 46 | v_result 47 | 48 | # COMMAND ---------- 49 | 50 | v_result = dbutils.notebook.run("7.ingest_lap_times_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 51 | 52 | # COMMAND ---------- 53 | 54 | v_result 55 | 56 | # COMMAND ---------- 57 | 58 | v_result = dbutils.notebook.run("8.ingest_qualifying_file", 0, {"p_data_source": "Ergast API", "p_file_date": "2021-04-18"}) 59 | 60 | # COMMAND ---------- 61 | 62 | v_result 63 | 64 | # COMMAND ---------- 65 | 66 | -------------------------------------------------------------------------------- /3. Data Ingestion/1.ingest_circuits_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest circuits.csv file 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-21") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the CSV file using the spark dataframe reader 27 | 28 | # COMMAND ---------- 29 | 30 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType 31 | 32 | # COMMAND ---------- 33 | 34 | circuits_schema = StructType(fields=[StructField("circuitId", IntegerType(), False), 35 | StructField("circuitRef", StringType(), True), 36 | StructField("name", StringType(), True), 37 | StructField("location", StringType(), True), 38 | StructField("country", StringType(), True), 39 | StructField("lat", DoubleType(), True), 40 | StructField("lng", DoubleType(), True), 41 | StructField("alt", IntegerType(), True), 42 | StructField("url", StringType(), True) 43 | ]) 44 | 45 | # COMMAND ---------- 46 | 47 | circuits_df = spark.read \ 48 | .option("header", True) \ 49 | .schema(circuits_schema) \ 50 | .csv(f"{raw_folder_path}/{v_file_date}/circuits.csv") 51 | 52 | # COMMAND ---------- 53 | 54 | # MAGIC %md 55 | # MAGIC ##### Step 2 - Select only the required columns 56 | 57 | # COMMAND ---------- 58 | 59 | from pyspark.sql.functions import col 60 | 61 | # COMMAND ---------- 62 | 63 | circuits_selected_df = circuits_df.select(col("circuitId"), col("circuitRef"), col("name"), col("location"), col("country"), col("lat"), col("lng"), col("alt")) 64 | 65 | # COMMAND ---------- 66 | 67 | # MAGIC %md 68 | # MAGIC ##### Step 3 - Rename the columns as required 69 | 70 | # COMMAND ---------- 71 | 72 | from pyspark.sql.functions import lit 73 | 74 | # COMMAND ---------- 75 | 76 | circuits_renamed_df = circuits_selected_df.withColumnRenamed("circuitId", "circuit_id") \ 77 | .withColumnRenamed("circuitRef", "circuit_ref") \ 78 | .withColumnRenamed("lat", "latitude") \ 79 | .withColumnRenamed("lng", "longitude") \ 80 | .withColumnRenamed("alt", "altitude") \ 81 | .withColumn("data_source", lit(v_data_source)) \ 82 | .withColumn("file_date", lit(v_file_date)) 83 | 84 | # COMMAND ---------- 85 | 86 | # MAGIC %md 87 | # MAGIC ##### Step 4 - Add ingestion date to the dataframe 88 | 89 | # COMMAND ---------- 90 | 91 | circuits_final_df = add_ingestion_date(circuits_renamed_df) 92 | 93 | # COMMAND ---------- 94 | 95 | # MAGIC %md 96 | # MAGIC ##### Step 5 - Write data to datalake as parquet 97 | 98 | # COMMAND ---------- 99 | 100 | circuits_final_df.write.mode("overwrite").format("delta").saveAsTable("f1_processed.circuits") 101 | 102 | # COMMAND ---------- 103 | 104 | # MAGIC %sql 105 | # MAGIC SELECT * FROM f1_processed.circuits; 106 | 107 | # COMMAND ---------- 108 | 109 | dbutils.notebook.exit("Success") -------------------------------------------------------------------------------- /3. Data Ingestion/2.ingest_races_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest races.csv file 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-21") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the CSV file using the spark dataframe reader API 27 | 28 | # COMMAND ---------- 29 | 30 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType 31 | 32 | # COMMAND ---------- 33 | 34 | races_schema = StructType(fields=[StructField("raceId", IntegerType(), False), 35 | StructField("year", IntegerType(), True), 36 | StructField("round", IntegerType(), True), 37 | StructField("circuitId", IntegerType(), True), 38 | StructField("name", StringType(), True), 39 | StructField("date", DateType(), True), 40 | StructField("time", StringType(), True), 41 | StructField("url", StringType(), True) 42 | ]) 43 | 44 | # COMMAND ---------- 45 | 46 | races_df = spark.read \ 47 | .option("header", True) \ 48 | .schema(races_schema) \ 49 | .csv(f"{raw_folder_path}/{v_file_date}/races.csv") 50 | 51 | # COMMAND ---------- 52 | 53 | # MAGIC %md 54 | # MAGIC ##### Step 2 - Add ingestion date and race_timestamp to the dataframe 55 | 56 | # COMMAND ---------- 57 | 58 | from pyspark.sql.functions import to_timestamp, concat, col, lit 59 | 60 | # COMMAND ---------- 61 | 62 | races_with_timestamp_df = races_df.withColumn("race_timestamp", to_timestamp(concat(col('date'), lit(' '), col('time')), 'yyyy-MM-dd HH:mm:ss')) \ 63 | .withColumn("data_source", lit(v_data_source)) \ 64 | .withColumn("file_date", lit(v_file_date)) 65 | 66 | # COMMAND ---------- 67 | 68 | races_with_ingestion_date_df = add_ingestion_date(races_with_timestamp_df) 69 | 70 | # COMMAND ---------- 71 | 72 | # MAGIC %md 73 | # MAGIC ##### Step 3 - Select only the columns required & rename as required 74 | 75 | # COMMAND ---------- 76 | 77 | races_selected_df = races_with_ingestion_date_df.select(col('raceId').alias('race_id'), col('year').alias('race_year'), 78 | col('round'), col('circuitId').alias('circuit_id'),col('name'), col('ingestion_date'), col('race_timestamp')) 79 | 80 | # COMMAND ---------- 81 | 82 | # MAGIC %md 83 | # MAGIC ##### Write the output to processed container in parquet format 84 | 85 | # COMMAND ---------- 86 | 87 | races_selected_df.write.mode("overwrite").partitionBy('race_year').format("delta").saveAsTable("f1_processed.races") 88 | 89 | # COMMAND ---------- 90 | 91 | # MAGIC %sql 92 | # MAGIC SELECT * FROM f1_processed.races; 93 | 94 | # COMMAND ---------- 95 | 96 | dbutils.notebook.exit("Success") -------------------------------------------------------------------------------- /3. Data Ingestion/3.ingest_constructors_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest constructors.json file 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-21") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the JSON file using the spark dataframe reader 27 | 28 | # COMMAND ---------- 29 | 30 | constructors_schema = "constructorId INT, constructorRef STRING, name STRING, nationality STRING, url STRING" 31 | 32 | # COMMAND ---------- 33 | 34 | constructor_df = spark.read \ 35 | .schema(constructors_schema) \ 36 | .json(f"{raw_folder_path}/{v_file_date}/constructors.json") 37 | 38 | # COMMAND ---------- 39 | 40 | # MAGIC %md 41 | # MAGIC ##### Step 2 - Drop unwanted columns from the dataframe 42 | 43 | # COMMAND ---------- 44 | 45 | from pyspark.sql.functions import col 46 | 47 | # COMMAND ---------- 48 | 49 | constructor_dropped_df = constructor_df.drop(col('url')) 50 | 51 | # COMMAND ---------- 52 | 53 | # MAGIC %md 54 | # MAGIC ##### Step 3 - Rename columns and add ingestion date 55 | 56 | # COMMAND ---------- 57 | 58 | from pyspark.sql.functions import lit 59 | 60 | # COMMAND ---------- 61 | 62 | constructor_renamed_df = constructor_dropped_df.withColumnRenamed("constructorId", "constructor_id") \ 63 | .withColumnRenamed("constructorRef", "constructor_ref") \ 64 | .withColumn("data_source", lit(v_data_source)) \ 65 | .withColumn("file_date", lit(v_file_date)) 66 | 67 | # COMMAND ---------- 68 | 69 | constructor_final_df = add_ingestion_date(constructor_renamed_df) 70 | 71 | # COMMAND ---------- 72 | 73 | # MAGIC %md 74 | # MAGIC ##### Step 4 Write output to parquet file 75 | 76 | # COMMAND ---------- 77 | 78 | constructor_final_df.write.mode("overwrite").format("delta").saveAsTable("f1_processed.constructors") 79 | 80 | # COMMAND ---------- 81 | 82 | # MAGIC %sql 83 | # MAGIC SELECT * FROM f1_processed.constructors; 84 | 85 | # COMMAND ---------- 86 | 87 | dbutils.notebook.exit("Success") -------------------------------------------------------------------------------- /3. Data Ingestion/4.ingest_drivers_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest drivers.json file 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-21") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the JSON file using the spark dataframe reader API 27 | 28 | # COMMAND ---------- 29 | 30 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType 31 | 32 | # COMMAND ---------- 33 | 34 | name_schema = StructType(fields=[StructField("forename", StringType(), True), 35 | StructField("surname", StringType(), True) 36 | 37 | ]) 38 | 39 | # COMMAND ---------- 40 | 41 | drivers_schema = StructType(fields=[StructField("driverId", IntegerType(), False), 42 | StructField("driverRef", StringType(), True), 43 | StructField("number", IntegerType(), True), 44 | StructField("code", StringType(), True), 45 | StructField("name", name_schema), 46 | StructField("dob", DateType(), True), 47 | StructField("nationality", StringType(), True), 48 | StructField("url", StringType(), True) 49 | ]) 50 | 51 | # COMMAND ---------- 52 | 53 | drivers_df = spark.read \ 54 | .schema(drivers_schema) \ 55 | .json(f"{raw_folder_path}/{v_file_date}/drivers.json") 56 | 57 | # COMMAND ---------- 58 | 59 | # MAGIC %md 60 | # MAGIC ##### Step 2 - Rename columns and add new columns 61 | # MAGIC 1. driverId renamed to driver_id 62 | # MAGIC 1. driverRef renamed to driver_ref 63 | # MAGIC 1. ingestion date added 64 | # MAGIC 1. name added with concatenation of forename and surname 65 | 66 | # COMMAND ---------- 67 | 68 | from pyspark.sql.functions import col, concat, lit 69 | 70 | # COMMAND ---------- 71 | 72 | drivers_with_ingestion_date_df = add_ingestion_date(drivers_df) 73 | 74 | # COMMAND ---------- 75 | 76 | drivers_with_columns_df = drivers_with_ingestion_date_df.withColumnRenamed("driverId", "driver_id") \ 77 | .withColumnRenamed("driverRef", "driver_ref") \ 78 | .withColumn("name", concat(col("name.forename"), lit(" "), col("name.surname"))) \ 79 | .withColumn("data_source", lit(v_data_source)) \ 80 | .withColumn("file_date", lit(v_file_date)) 81 | 82 | # COMMAND ---------- 83 | 84 | # MAGIC %md 85 | # MAGIC ##### Step 3 - Drop the unwanted columns 86 | # MAGIC 1. name.forename 87 | # MAGIC 1. name.surname 88 | # MAGIC 1. url 89 | 90 | # COMMAND ---------- 91 | 92 | drivers_final_df = drivers_with_columns_df.drop(col("url")) 93 | 94 | # COMMAND ---------- 95 | 96 | # MAGIC %md 97 | # MAGIC ##### Step 4 - Write to output to processed container in parquet format 98 | 99 | # COMMAND ---------- 100 | 101 | drivers_final_df.write.mode("overwrite").format("delta").saveAsTable("f1_processed.drivers") 102 | 103 | # COMMAND ---------- 104 | 105 | # MAGIC %sql 106 | # MAGIC SELECT * FROM f1_processed.drivers 107 | 108 | # COMMAND ---------- 109 | 110 | dbutils.notebook.exit("Success") -------------------------------------------------------------------------------- /3. Data Ingestion/5.ingest_results_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest results.json file 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-28") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the JSON file using the spark dataframe reader API 27 | 28 | # COMMAND ---------- 29 | 30 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType 31 | 32 | # COMMAND ---------- 33 | 34 | results_schema = StructType(fields=[StructField("resultId", IntegerType(), False), 35 | StructField("raceId", IntegerType(), True), 36 | StructField("driverId", IntegerType(), True), 37 | StructField("constructorId", IntegerType(), True), 38 | StructField("number", IntegerType(), True), 39 | StructField("grid", IntegerType(), True), 40 | StructField("position", IntegerType(), True), 41 | StructField("positionText", StringType(), True), 42 | StructField("positionOrder", IntegerType(), True), 43 | StructField("points", FloatType(), True), 44 | StructField("laps", IntegerType(), True), 45 | StructField("time", StringType(), True), 46 | StructField("milliseconds", IntegerType(), True), 47 | StructField("fastestLap", IntegerType(), True), 48 | StructField("rank", IntegerType(), True), 49 | StructField("fastestLapTime", StringType(), True), 50 | StructField("fastestLapSpeed", FloatType(), True), 51 | StructField("statusId", StringType(), True)]) 52 | 53 | # COMMAND ---------- 54 | 55 | results_df = spark.read \ 56 | .schema(results_schema) \ 57 | .json(f"{raw_folder_path}/{v_file_date}/results.json") 58 | 59 | # COMMAND ---------- 60 | 61 | # MAGIC %md 62 | # MAGIC ##### Step 2 - Rename columns and add new columns 63 | 64 | # COMMAND ---------- 65 | 66 | from pyspark.sql.functions import lit 67 | 68 | # COMMAND ---------- 69 | 70 | results_with_columns_df = results_df.withColumnRenamed("resultId", "result_id") \ 71 | .withColumnRenamed("raceId", "race_id") \ 72 | .withColumnRenamed("driverId", "driver_id") \ 73 | .withColumnRenamed("constructorId", "constructor_id") \ 74 | .withColumnRenamed("positionText", "position_text") \ 75 | .withColumnRenamed("positionOrder", "position_order") \ 76 | .withColumnRenamed("fastestLap", "fastest_lap") \ 77 | .withColumnRenamed("fastestLapTime", "fastest_lap_time") \ 78 | .withColumnRenamed("fastestLapSpeed", "fastest_lap_speed") \ 79 | .withColumn("data_source", lit(v_data_source)) \ 80 | .withColumn("file_date", lit(v_file_date)) 81 | 82 | # COMMAND ---------- 83 | 84 | results_with_ingestion_date_df = add_ingestion_date(results_with_columns_df) 85 | 86 | # COMMAND ---------- 87 | 88 | # MAGIC %md 89 | # MAGIC ##### Step 3 - Drop the unwanted column 90 | 91 | # COMMAND ---------- 92 | 93 | from pyspark.sql.functions import col 94 | 95 | # COMMAND ---------- 96 | 97 | results_final_df = results_with_ingestion_date_df.drop(col("statusId")) 98 | 99 | # COMMAND ---------- 100 | 101 | # MAGIC %md 102 | # MAGIC De-dupe the dataframe 103 | 104 | # COMMAND ---------- 105 | 106 | results_deduped_df = results_final_df.dropDuplicates(['race_id', 'driver_id']) 107 | 108 | # COMMAND ---------- 109 | 110 | # MAGIC %md 111 | # MAGIC ##### Step 4 - Write to output to processed container in parquet format 112 | 113 | # COMMAND ---------- 114 | 115 | # MAGIC %md 116 | # MAGIC Method 1 117 | 118 | # COMMAND ---------- 119 | 120 | # for race_id_list in results_final_df.select("race_id").distinct().collect(): 121 | # if (spark._jsparkSession.catalog().tableExists("f1_processed.results")): 122 | # spark.sql(f"ALTER TABLE f1_processed.results DROP IF EXISTS PARTITION (race_id = {race_id_list.race_id})") 123 | 124 | # COMMAND ---------- 125 | 126 | # results_final_df.write.mode("append").partitionBy('race_id').format("parquet").saveAsTable("f1_processed.results") 127 | 128 | # COMMAND ---------- 129 | 130 | # MAGIC %md 131 | # MAGIC Method 2 132 | 133 | # COMMAND ---------- 134 | 135 | # overwrite_partition(results_final_df, 'f1_processed', 'results', 'race_id') 136 | 137 | # COMMAND ---------- 138 | 139 | merge_condition = "tgt.result_id = src.result_id AND tgt.race_id = src.race_id" 140 | merge_delta_data(results_deduped_df, 'f1_processed', 'results', processed_folder_path, merge_condition, 'race_id') 141 | 142 | # COMMAND ---------- 143 | 144 | dbutils.notebook.exit("Success") 145 | 146 | # COMMAND ---------- 147 | 148 | # MAGIC %sql 149 | # MAGIC SELECT COUNT(1) 150 | # MAGIC FROM f1_processed.results; 151 | 152 | # COMMAND ---------- 153 | 154 | # MAGIC %sql 155 | # MAGIC SELECT race_id, driver_id, COUNT(1) 156 | # MAGIC FROM f1_processed.results 157 | # MAGIC GROUP BY race_id, driver_id 158 | # MAGIC HAVING COUNT(1) > 1 159 | # MAGIC ORDER BY race_id, driver_id DESC; 160 | 161 | # COMMAND ---------- 162 | 163 | # MAGIC %sql SELECT * FROM f1_processed.results WHERE race_id = 540 AND driver_id = 229; 164 | 165 | # COMMAND ---------- 166 | 167 | -------------------------------------------------------------------------------- /3. Data Ingestion/6.ingest_pit_stops_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest pit_stops.json file 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-28") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the JSON file using the spark dataframe reader API 27 | 28 | # COMMAND ---------- 29 | 30 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType 31 | 32 | # COMMAND ---------- 33 | 34 | pit_stops_schema = StructType(fields=[StructField("raceId", IntegerType(), False), 35 | StructField("driverId", IntegerType(), True), 36 | StructField("stop", StringType(), True), 37 | StructField("lap", IntegerType(), True), 38 | StructField("time", StringType(), True), 39 | StructField("duration", StringType(), True), 40 | StructField("milliseconds", IntegerType(), True) 41 | ]) 42 | 43 | # COMMAND ---------- 44 | 45 | pit_stops_df = spark.read \ 46 | .schema(pit_stops_schema) \ 47 | .option("multiLine", True) \ 48 | .json(f"{raw_folder_path}/{v_file_date}/pit_stops.json") 49 | 50 | # COMMAND ---------- 51 | 52 | # MAGIC %md 53 | # MAGIC ##### Step 2 - Rename columns and add new columns 54 | # MAGIC 1. Rename driverId and raceId 55 | # MAGIC 1. Add ingestion_date with current timestamp 56 | 57 | # COMMAND ---------- 58 | 59 | pit_stops_with_ingestion_date_df = add_ingestion_date(pit_stops_df) 60 | 61 | # COMMAND ---------- 62 | 63 | from pyspark.sql.functions import lit 64 | 65 | # COMMAND ---------- 66 | 67 | final_df = pit_stops_with_ingestion_date_df.withColumnRenamed("driverId", "driver_id") \ 68 | .withColumnRenamed("raceId", "race_id") \ 69 | .withColumn("ingestion_date", current_timestamp()) \ 70 | .withColumn("data_source", lit(v_data_source)) \ 71 | .withColumn("file_date", lit(v_file_date)) 72 | 73 | # COMMAND ---------- 74 | 75 | # MAGIC %md 76 | # MAGIC ##### Step 3 - Write to output to processed container in parquet format 77 | 78 | # COMMAND ---------- 79 | 80 | #overwrite_partition(final_df, 'f1_processed', 'pit_stops', 'race_id') 81 | 82 | # COMMAND ---------- 83 | 84 | merge_condition = "tgt.race_id = src.race_id AND tgt.driver_id = src.driver_id AND tgt.stop = src.stop AND tgt.race_id = src.race_id" 85 | merge_delta_data(final_df, 'f1_processed', 'pit_stops', processed_folder_path, merge_condition, 'race_id') 86 | 87 | # COMMAND ---------- 88 | 89 | dbutils.notebook.exit("Success") 90 | 91 | # COMMAND ---------- 92 | 93 | # MAGIC %sql 94 | # MAGIC SELECT * FROM f1_processed.pit_stops; 95 | 96 | # COMMAND ---------- 97 | 98 | -------------------------------------------------------------------------------- /3. Data Ingestion/7.ingest_lap_times_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest lap_times folder 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-21") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the CSV file using the spark dataframe reader API 27 | 28 | # COMMAND ---------- 29 | 30 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType 31 | 32 | # COMMAND ---------- 33 | 34 | lap_times_schema = StructType(fields=[StructField("raceId", IntegerType(), False), 35 | StructField("driverId", IntegerType(), True), 36 | StructField("lap", IntegerType(), True), 37 | StructField("position", IntegerType(), True), 38 | StructField("time", StringType(), True), 39 | StructField("milliseconds", IntegerType(), True) 40 | ]) 41 | 42 | # COMMAND ---------- 43 | 44 | lap_times_df = spark.read \ 45 | .schema(lap_times_schema) \ 46 | .csv(f"{raw_folder_path}/{v_file_date}/lap_times") 47 | 48 | # COMMAND ---------- 49 | 50 | # MAGIC %md 51 | # MAGIC ##### Step 2 - Rename columns and add new columns 52 | # MAGIC 1. Rename driverId and raceId 53 | # MAGIC 1. Add ingestion_date with current timestamp 54 | 55 | # COMMAND ---------- 56 | 57 | lap_times_with_ingestion_date_df = add_ingestion_date(lap_times_df) 58 | 59 | # COMMAND ---------- 60 | 61 | from pyspark.sql.functions import lit 62 | 63 | # COMMAND ---------- 64 | 65 | final_df = lap_times_with_ingestion_date_df.withColumnRenamed("driverId", "driver_id") \ 66 | .withColumnRenamed("raceId", "race_id") \ 67 | .withColumn("ingestion_date", current_timestamp()) \ 68 | .withColumn("data_source", lit(v_data_source)) \ 69 | .withColumn("file_date", lit(v_file_date)) 70 | 71 | # COMMAND ---------- 72 | 73 | # MAGIC %md 74 | # MAGIC ##### Step 3 - Write to output to processed container in parquet format 75 | 76 | # COMMAND ---------- 77 | 78 | #overwrite_partition(final_df, 'f1_processed', 'lap_times', 'race_id') 79 | 80 | # COMMAND ---------- 81 | 82 | merge_condition = "tgt.race_id = src.race_id AND tgt.driver_id = src.driver_id AND tgt.lap = src.lap AND tgt.race_id = src.race_id" 83 | merge_delta_data(final_df, 'f1_processed', 'lap_times', processed_folder_path, merge_condition, 'race_id') 84 | 85 | # COMMAND ---------- 86 | 87 | dbutils.notebook.exit("Success") -------------------------------------------------------------------------------- /3. Data Ingestion/8.ingest_qualifying_file.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ### Ingest qualifying json files 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_data_source", "") 8 | v_data_source = dbutils.widgets.get("p_data_source") 9 | 10 | # COMMAND ---------- 11 | 12 | dbutils.widgets.text("p_file_date", "2021-03-21") 13 | v_file_date = dbutils.widgets.get("p_file_date") 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run "../includes/configuration" 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %run "../includes/common_functions" 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %md 26 | # MAGIC ##### Step 1 - Read the JSON file using the spark dataframe reader API 27 | 28 | # COMMAND ---------- 29 | 30 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType 31 | 32 | # COMMAND ---------- 33 | 34 | qualifying_schema = StructType(fields=[StructField("qualifyId", IntegerType(), False), 35 | StructField("raceId", IntegerType(), True), 36 | StructField("driverId", IntegerType(), True), 37 | StructField("constructorId", IntegerType(), True), 38 | StructField("number", IntegerType(), True), 39 | StructField("position", IntegerType(), True), 40 | StructField("q1", StringType(), True), 41 | StructField("q2", StringType(), True), 42 | StructField("q3", StringType(), True), 43 | ]) 44 | 45 | # COMMAND ---------- 46 | 47 | qualifying_df = spark.read \ 48 | .schema(qualifying_schema) \ 49 | .option("multiLine", True) \ 50 | .json(f"{raw_folder_path}/{v_file_date}/qualifying") 51 | 52 | # COMMAND ---------- 53 | 54 | # MAGIC %md 55 | # MAGIC ##### Step 2 - Rename columns and add new columns 56 | # MAGIC 1. Rename qualifyingId, driverId, constructorId and raceId 57 | # MAGIC 1. Add ingestion_date with current timestamp 58 | 59 | # COMMAND ---------- 60 | 61 | qualifying_with_ingestion_date_df = add_ingestion_date(qualifying_df) 62 | 63 | # COMMAND ---------- 64 | 65 | from pyspark.sql.functions import lit 66 | 67 | # COMMAND ---------- 68 | 69 | final_df = qualifying_with_ingestion_date_df.withColumnRenamed("qualifyId", "qualify_id") \ 70 | .withColumnRenamed("driverId", "driver_id") \ 71 | .withColumnRenamed("raceId", "race_id") \ 72 | .withColumnRenamed("constructorId", "constructor_id") \ 73 | .withColumn("ingestion_date", current_timestamp()) \ 74 | .withColumn("data_source", lit(v_data_source)) \ 75 | .withColumn("file_date", lit(v_file_date)) 76 | 77 | # COMMAND ---------- 78 | 79 | # MAGIC %md 80 | # MAGIC ##### Step 3 - Write to output to processed container in parquet format 81 | 82 | # COMMAND ---------- 83 | 84 | #overwrite_partition(final_df, 'f1_processed', 'qualifying', 'race_id') 85 | 86 | # COMMAND ---------- 87 | 88 | merge_condition = "tgt.qualify_id = src.qualify_id AND tgt.race_id = src.race_id" 89 | merge_delta_data(final_df, 'f1_processed', 'qualifying', processed_folder_path, merge_condition, 'race_id') 90 | 91 | # COMMAND ---------- 92 | 93 | dbutils.notebook.exit("Success") 94 | 95 | # COMMAND ---------- 96 | 97 | -------------------------------------------------------------------------------- /3. Data Ingestion/9.create_processed_database.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | CREATE DATABASE IF NOT EXISTS f1_processed 3 | LOCATION "abfss://processed@erformula1dl.dfs.core.windows.net/" 4 | 5 | -- COMMAND ---------- 6 | 7 | DESC DATABASE f1_processed; 8 | 9 | -- COMMAND ---------- 10 | 11 | -------------------------------------------------------------------------------- /4. raw/1.create_raw_tables.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | 3 | CREATE DATABASE IF NOT EXISTS f1_raw; 4 | 5 | -- COMMAND ---------- 6 | 7 | -- MAGIC %md 8 | -- MAGIC #### Create tables for CSV files 9 | 10 | -- COMMAND ---------- 11 | 12 | -- MAGIC %md 13 | -- MAGIC ##### Create circuits table 14 | 15 | -- COMMAND ---------- 16 | 17 | DROP TABLE IF EXISTS f1_raw.circuits; 18 | CREATE TABLE IF NOT EXISTS f1_raw.circuits(circuitId INT, 19 | circuitRef STRING, 20 | name STRING, 21 | location STRING, 22 | country STRING, 23 | lat DOUBLE, 24 | lng DOUBLE, 25 | alt INT, 26 | url STRING 27 | ) 28 | USING csv 29 | OPTIONS (path "abfss://raw@erformula1dl.dfs.core.windows.net/circuits.csv", header true) 30 | 31 | -- COMMAND ---------- 32 | 33 | SELECT * FROM f1_raw.circuits; 34 | 35 | -- COMMAND ---------- 36 | 37 | -- MAGIC %md 38 | -- MAGIC ##### Create races table 39 | 40 | -- COMMAND ---------- 41 | 42 | DROP TABLE IF EXISTS f1_raw.races; 43 | CREATE TABLE IF NOT EXISTS f1_raw.races(raceId INT, 44 | year INT, 45 | round INT, 46 | circuitId INT, 47 | name STRING, 48 | date DATE, 49 | time STRING, 50 | url STRING) 51 | USING csv 52 | OPTIONS (path "abfss://raw@erformula1dl.dfs.core.windows.net/races.csv", header true) 53 | 54 | -- COMMAND ---------- 55 | 56 | SELECT * FROM f1_raw.races; 57 | 58 | -- COMMAND ---------- 59 | 60 | -- MAGIC %md 61 | -- MAGIC #### Create tables for JSON files 62 | 63 | -- COMMAND ---------- 64 | 65 | -- MAGIC %md 66 | -- MAGIC ##### Create constructors table 67 | -- MAGIC * Single Line JSON 68 | -- MAGIC * Simple structure 69 | 70 | -- COMMAND ---------- 71 | 72 | DROP TABLE IF EXISTS f1_raw.constructors; 73 | CREATE TABLE IF NOT EXISTS f1_raw.constructors( 74 | constructorId INT, 75 | constructorRef STRING, 76 | name STRING, 77 | nationality STRING, 78 | url STRING) 79 | USING json 80 | OPTIONS(path "abfss://raw@erformula1dl.dfs.core.windows.net/constructors.json") 81 | 82 | -- COMMAND ---------- 83 | 84 | SELECT * FROM f1_raw.constructors; 85 | 86 | -- COMMAND ---------- 87 | 88 | -- MAGIC %md 89 | -- MAGIC ##### Create drivers table 90 | -- MAGIC * Single Line JSON 91 | -- MAGIC * Complex structure 92 | 93 | -- COMMAND ---------- 94 | 95 | DROP TABLE IF EXISTS f1_raw.drivers; 96 | CREATE TABLE IF NOT EXISTS f1_raw.drivers( 97 | driverId INT, 98 | driverRef STRING, 99 | number INT, 100 | code STRING, 101 | name STRUCT, 102 | dob DATE, 103 | nationality STRING, 104 | url STRING) 105 | USING json 106 | OPTIONS (path "abfss://raw@erformula1dl.dfs.core.windows.net/drivers.json") 107 | 108 | -- COMMAND ---------- 109 | 110 | SELECT * FROM f1_raw.drivers; 111 | 112 | -- COMMAND ---------- 113 | 114 | -- MAGIC %md ##### Create results table 115 | -- MAGIC * Single Line JSON 116 | -- MAGIC * Simple structure 117 | 118 | -- COMMAND ---------- 119 | 120 | DROP TABLE IF EXISTS f1_raw.results; 121 | CREATE TABLE IF NOT EXISTS f1_raw.results( 122 | resultId INT, 123 | raceId INT, 124 | driverId INT, 125 | constructorId INT, 126 | number INT,grid INT, 127 | position INT, 128 | positionText STRING, 129 | positionOrder INT, 130 | points INT, 131 | laps INT, 132 | time STRING, 133 | milliseconds INT, 134 | fastestLap INT, 135 | rank INT, 136 | fastestLapTime STRING, 137 | fastestLapSpeed FLOAT, 138 | statusId STRING) 139 | USING json 140 | OPTIONS(path "abfss://raw@erformula1dl.dfs.core.windows.net/results.json") 141 | 142 | -- COMMAND ---------- 143 | 144 | SELECT * FROM f1_raw.results 145 | 146 | -- COMMAND ---------- 147 | 148 | -- MAGIC %md 149 | -- MAGIC ##### Create pit stops table 150 | -- MAGIC * Multi Line JSON 151 | -- MAGIC * Simple structure 152 | 153 | -- COMMAND ---------- 154 | 155 | DROP TABLE IF EXISTS f1_raw.pit_stops; 156 | CREATE TABLE IF NOT EXISTS f1_raw.pit_stops( 157 | driverId INT, 158 | duration STRING, 159 | lap INT, 160 | milliseconds INT, 161 | raceId INT, 162 | stop INT, 163 | time STRING) 164 | USING json 165 | OPTIONS(path "abfss://raw@erformula1dl.dfs.core.windows.net/pit_stops.json", multiLine true) 166 | 167 | -- COMMAND ---------- 168 | 169 | SELECT * FROM f1_raw.pit_stops; 170 | 171 | -- COMMAND ---------- 172 | 173 | -- MAGIC %md 174 | -- MAGIC #### Create tables for list of files 175 | 176 | -- COMMAND ---------- 177 | 178 | -- MAGIC %md 179 | -- MAGIC ##### Create Lap Times Table 180 | -- MAGIC * CSV file 181 | -- MAGIC * Multiple files 182 | 183 | -- COMMAND ---------- 184 | 185 | DROP TABLE IF EXISTS f1_raw.lap_times; 186 | CREATE TABLE IF NOT EXISTS f1_raw.lap_times( 187 | raceId INT, 188 | driverId INT, 189 | lap INT, 190 | position INT, 191 | time STRING, 192 | milliseconds INT 193 | ) 194 | USING csv 195 | OPTIONS (path "abfss://raw@erformula1dl.dfs.core.windows.net/lap_times") 196 | 197 | -- COMMAND ---------- 198 | 199 | SELECT * FROM f1_raw.lap_times 200 | 201 | -- COMMAND ---------- 202 | 203 | -- MAGIC %md 204 | -- MAGIC ##### Create Qualifying Table 205 | -- MAGIC * JSON file 206 | -- MAGIC * MultiLine JSON 207 | -- MAGIC * Multiple files 208 | 209 | -- COMMAND ---------- 210 | 211 | DROP TABLE IF EXISTS f1_raw.qualifying; 212 | CREATE TABLE IF NOT EXISTS f1_raw.qualifying( 213 | constructorId INT, 214 | driverId INT, 215 | number INT, 216 | position INT, 217 | q1 STRING, 218 | q2 STRING, 219 | q3 STRING, 220 | qualifyId INT, 221 | raceId INT) 222 | USING json 223 | OPTIONS (path "abfss://raw@erformula1dl.dfs.core.windows.net/qualifying", multiLine true) 224 | 225 | -- COMMAND ---------- 226 | 227 | SELECT * FROM f1_raw.qualifying 228 | 229 | -- COMMAND ---------- 230 | 231 | DESC EXTENDED f1_raw.qualifying; 232 | 233 | -- COMMAND ---------- 234 | 235 | -------------------------------------------------------------------------------- /5. Data Transformation/0.create_presentation_database.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | CREATE DATABASE IF NOT EXISTS f1_presentation 3 | LOCATION "abfss://presentation@erformula1dl.dfs.core.windows.net/" 4 | 5 | -- COMMAND ---------- 6 | 7 | -------------------------------------------------------------------------------- /5. Data Transformation/1.race_results.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | dbutils.widgets.text("p_file_date", "2021-03-21") 3 | v_file_date = dbutils.widgets.get("p_file_date") 4 | 5 | # COMMAND ---------- 6 | 7 | # MAGIC %md 8 | # MAGIC ##### Read all the data as required 9 | 10 | # COMMAND ---------- 11 | 12 | # MAGIC %run "../includes/configuration" 13 | 14 | # COMMAND ---------- 15 | 16 | # MAGIC %run "../includes/common_functions" 17 | 18 | # COMMAND ---------- 19 | 20 | drivers_df = spark.read.format("delta").load(f"{processed_folder_path}/drivers") \ 21 | .withColumnRenamed("number", "driver_number") \ 22 | .withColumnRenamed("name", "driver_name") \ 23 | .withColumnRenamed("nationality", "driver_nationality") 24 | 25 | # COMMAND ---------- 26 | 27 | constructors_df = spark.read.format("delta").load(f"{processed_folder_path}/constructors") \ 28 | .withColumnRenamed("name", "team") 29 | 30 | # COMMAND ---------- 31 | 32 | circuits_df = spark.read.format("delta").load(f"{processed_folder_path}/circuits") \ 33 | .withColumnRenamed("location", "circuit_location") 34 | 35 | # COMMAND ---------- 36 | 37 | races_df = spark.read.format("delta").load(f"{processed_folder_path}/races") \ 38 | .withColumnRenamed("name", "race_name") \ 39 | .withColumnRenamed("race_timestamp", "race_date") 40 | 41 | # COMMAND ---------- 42 | 43 | results_df = spark.read.format("delta").load(f"{processed_folder_path}/results") \ 44 | .filter(f"file_date = '{v_file_date}'") \ 45 | .withColumnRenamed("time", "race_time") \ 46 | .withColumnRenamed("race_id", "result_race_id") \ 47 | .withColumnRenamed("file_date", "result_file_date") 48 | 49 | # COMMAND ---------- 50 | 51 | # MAGIC %md 52 | # MAGIC ##### Join circuits to races 53 | 54 | # COMMAND ---------- 55 | 56 | race_circuits_df = races_df.join(circuits_df, races_df.circuit_id == circuits_df.circuit_id, "inner") \ 57 | .select(races_df.race_id, races_df.race_year, races_df.race_name, races_df.race_date, circuits_df.circuit_location) 58 | 59 | # COMMAND ---------- 60 | 61 | # MAGIC %md 62 | # MAGIC ##### Join results to all other dataframes 63 | 64 | # COMMAND ---------- 65 | 66 | race_results_df = results_df.join(race_circuits_df, results_df.result_race_id == race_circuits_df.race_id) \ 67 | .join(drivers_df, results_df.driver_id == drivers_df.driver_id) \ 68 | .join(constructors_df, results_df.constructor_id == constructors_df.constructor_id) 69 | 70 | # COMMAND ---------- 71 | 72 | from pyspark.sql.functions import current_timestamp 73 | 74 | # COMMAND ---------- 75 | 76 | final_df = race_results_df.select("race_id", "race_year", "race_name", "race_date", "circuit_location", "driver_name", "driver_number", "driver_nationality", 77 | "team", "grid", "fastest_lap", "race_time", "points", "position", "result_file_date") \ 78 | .withColumn("created_date", current_timestamp()) \ 79 | .withColumnRenamed("result_file_date", "file_date") 80 | 81 | # COMMAND ---------- 82 | 83 | merge_condition = "tgt.driver_name = src.driver_name AND tgt.race_id = src.race_id" 84 | merge_delta_data(final_df, 'f1_presentation', 'race_results', presentation_folder_path, merge_condition, 'race_id') 85 | 86 | # COMMAND ---------- 87 | 88 | # MAGIC %sql 89 | # MAGIC SELECT * FROM f1_presentation.race_results; 90 | 91 | # COMMAND ---------- 92 | 93 | -------------------------------------------------------------------------------- /5. Data Transformation/2.driver_standings.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ##### Produce driver standings 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_file_date", "2021-03-28") 8 | v_file_date = dbutils.widgets.get("p_file_date") 9 | 10 | # COMMAND ---------- 11 | 12 | # MAGIC %run "../includes/common_functions" 13 | 14 | # COMMAND ---------- 15 | 16 | # MAGIC %run "../includes/configuration" 17 | 18 | # COMMAND ---------- 19 | 20 | # MAGIC %md 21 | # MAGIC Find race years for which the data is to be reprocessed 22 | 23 | # COMMAND ---------- 24 | 25 | race_results_df = spark.read.format("delta").load(f"{presentation_folder_path}/race_results") \ 26 | .filter(f"file_date = '{v_file_date}'") 27 | 28 | # COMMAND ---------- 29 | 30 | race_year_list = df_column_to_list(race_results_df, 'race_year') 31 | 32 | # COMMAND ---------- 33 | 34 | from pyspark.sql.functions import col 35 | 36 | race_results_df = spark.read.format("delta").load(f"{presentation_folder_path}/race_results") \ 37 | .filter(col("race_year").isin(race_year_list)) 38 | 39 | # COMMAND ---------- 40 | 41 | from pyspark.sql.functions import sum, when, count, col 42 | 43 | driver_standings_df = race_results_df \ 44 | .groupBy("race_year", "driver_name", "driver_nationality") \ 45 | .agg(sum("points").alias("total_points"), 46 | count(when(col("position") == 1, True)).alias("wins")) 47 | 48 | # COMMAND ---------- 49 | 50 | from pyspark.sql.window import Window 51 | from pyspark.sql.functions import desc, rank, asc 52 | 53 | driver_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins")) 54 | final_df = driver_standings_df.withColumn("rank", rank().over(driver_rank_spec)) 55 | 56 | # COMMAND ---------- 57 | 58 | merge_condition = "tgt.driver_name = src.driver_name AND tgt.race_year = src.race_year" 59 | merge_delta_data(final_df, 'f1_presentation', 'driver_standings', presentation_folder_path, merge_condition, 'race_year') 60 | 61 | # COMMAND ---------- 62 | 63 | # MAGIC %sql 64 | # MAGIC SELECT * FROM f1_presentation.driver_standings WHERE race_year = 2021; 65 | 66 | # COMMAND ---------- 67 | 68 | # MAGIC %sql 69 | # MAGIC SELECT race_year, COUNT(1) 70 | # MAGIC FROM f1_presentation.driver_standings 71 | # MAGIC GROUP BY race_year 72 | # MAGIC ORDER BY race_year DESC; 73 | 74 | # COMMAND ---------- 75 | 76 | -------------------------------------------------------------------------------- /5. Data Transformation/3.constructor_standings.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ##### Produce constructor standings 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("p_file_date", "2021-03-28") 8 | v_file_date = dbutils.widgets.get("p_file_date") 9 | 10 | # COMMAND ---------- 11 | 12 | # MAGIC %run "../includes/configuration" 13 | 14 | # COMMAND ---------- 15 | 16 | # MAGIC %run "../includes/common_functions" 17 | 18 | # COMMAND ---------- 19 | 20 | # MAGIC %md 21 | # MAGIC Find race years for which the data is to be reprocessed 22 | 23 | # COMMAND ---------- 24 | 25 | race_results_df = spark.read.format("delta").load(f"{presentation_folder_path}/race_results") \ 26 | .filter(f"file_date = '{v_file_date}'") 27 | 28 | # COMMAND ---------- 29 | 30 | race_year_list = df_column_to_list(race_results_df, 'race_year') 31 | 32 | # COMMAND ---------- 33 | 34 | from pyspark.sql.functions import col 35 | 36 | race_results_df = spark.read.format("delta").load(f"{presentation_folder_path}/race_results") \ 37 | .filter(col("race_year").isin(race_year_list)) 38 | 39 | # COMMAND ---------- 40 | 41 | from pyspark.sql.functions import sum, when, count, col 42 | 43 | constructor_standings_df = race_results_df \ 44 | .groupBy("race_year", "team") \ 45 | .agg(sum("points").alias("total_points"), 46 | count(when(col("position") == 1, True)).alias("wins")) 47 | 48 | # COMMAND ---------- 49 | 50 | from pyspark.sql.window import Window 51 | from pyspark.sql.functions import desc, rank, asc 52 | 53 | constructor_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins")) 54 | final_df = constructor_standings_df.withColumn("rank", rank().over(constructor_rank_spec)) 55 | 56 | # COMMAND ---------- 57 | 58 | merge_condition = "tgt.team = src.team AND tgt.race_year = src.race_year" 59 | merge_delta_data(final_df, 'f1_presentation', 'constructor_standings', presentation_folder_path, merge_condition, 'race_year') 60 | 61 | # COMMAND ---------- 62 | 63 | # MAGIC %sql 64 | # MAGIC SELECT * FROM f1_presentation.constructor_standings WHERE race_year = 2021; 65 | 66 | # COMMAND ---------- 67 | 68 | # MAGIC %sql 69 | # MAGIC SELECT race_year, COUNT(1) 70 | # MAGIC FROM f1_presentation.constructor_standings 71 | # MAGIC GROUP BY race_year 72 | # MAGIC ORDER BY race_year DESC; -------------------------------------------------------------------------------- /5. Data Transformation/4.calculated_race_results.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | dbutils.widgets.text("p_file_date", "2021-03-21") 3 | v_file_date = dbutils.widgets.get("p_file_date") 4 | 5 | # COMMAND ---------- 6 | 7 | spark.sql(f""" 8 | CREATE TABLE IF NOT EXISTS f1_presentation.calculated_race_results 9 | ( 10 | race_year INT, 11 | team_name STRING, 12 | driver_id INT, 13 | driver_name STRING, 14 | race_id INT, 15 | position INT, 16 | points INT, 17 | calculated_points INT, 18 | created_date TIMESTAMP, 19 | updated_date TIMESTAMP 20 | ) 21 | USING DELTA 22 | """) 23 | 24 | # COMMAND ---------- 25 | 26 | spark.sql(f""" 27 | CREATE OR REPLACE TEMP VIEW race_result_updated 28 | AS 29 | SELECT races.race_year, 30 | constructors.name AS team_name, 31 | drivers.driver_id, 32 | drivers.name AS driver_name, 33 | races.race_id, 34 | results.position, 35 | results.points, 36 | 11 - results.position AS calculated_points 37 | FROM f1_processed.results 38 | JOIN f1_processed.drivers ON (results.driver_id = drivers.driver_id) 39 | JOIN f1_processed.constructors ON (results.constructor_id = constructors.constructor_id) 40 | JOIN f1_processed.races ON (results.race_id = races.race_id) 41 | WHERE results.position <= 10 42 | AND results.file_date = '{v_file_date}' 43 | """) 44 | 45 | # COMMAND ---------- 46 | 47 | spark.sql(f""" 48 | MERGE INTO f1_presentation.calculated_race_results tgt 49 | USING race_result_updated upd 50 | ON (tgt.driver_id = upd.driver_id AND tgt.race_id = upd.race_id) 51 | WHEN MATCHED THEN 52 | UPDATE SET tgt.position = upd.position, 53 | tgt.points = upd.points, 54 | tgt.calculated_points = upd.calculated_points, 55 | tgt.updated_date = current_timestamp 56 | WHEN NOT MATCHED 57 | THEN INSERT (race_year, team_name, driver_id, driver_name,race_id, position, points, calculated_points, created_date ) 58 | VALUES (race_year, team_name, driver_id, driver_name,race_id, position, points, calculated_points, current_timestamp) 59 | """) 60 | 61 | # COMMAND ---------- 62 | 63 | # MAGIC %sql 64 | # MAGIC SELECT COUNT(1) FROM race_result_updated; 65 | 66 | # COMMAND ---------- 67 | 68 | # MAGIC %sql 69 | # MAGIC SELECT COUNT(1) FROM f1_presentation.calculated_race_results; 70 | 71 | # COMMAND ---------- 72 | 73 | -------------------------------------------------------------------------------- /5. Data Transformation/calculated_race_results_sql.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | USE f1_processed; 3 | 4 | -- COMMAND ---------- 5 | 6 | CREATE TABLE f1_presentation.calculated_race_results_sql 7 | USING parquet 8 | AS 9 | SELECT races.race_year, 10 | constructors.name AS team_name, 11 | drivers.name AS driver_name, 12 | results.position, 13 | results.points, 14 | 11 - results.position As calculated_points 15 | FROM results 16 | JOIN f1_processed.drivers ON (results.driver_id = drivers.driver_id) 17 | JOIN f1_processed.constructors ON (results.constructor_id = constructors.constructor_id) 18 | JOIN f1_processed.races ON (results.race_id = races.race_id) 19 | WHERE results.position <= 10 20 | 21 | -- COMMAND ---------- 22 | 23 | SELECT * FROM f1_presentation.calculated_race_results_sql; -------------------------------------------------------------------------------- /6. Data Analysis and Visualization/1.find_dominant_drivers.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | SELECT driver_name, 3 | COUNT(1) AS total_races, 4 | SUM(calculated_points) AS total_points, 5 | AVG(calculated_points) AS avg_points 6 | FROM f1_presentation.calculated_race_results 7 | GROUP BY driver_name 8 | HAVING COUNT(1) >= 50 9 | ORDER BY avg_points DESC 10 | 11 | -- COMMAND ---------- 12 | 13 | SELECT driver_name, 14 | COUNT(1) AS total_races, 15 | SUM(calculated_points) AS total_points, 16 | AVG(calculated_points) AS avg_points 17 | FROM f1_presentation.calculated_race_results 18 | WHERE race_year BETWEEN 2011 AND 2020 19 | GROUP BY driver_name 20 | HAVING COUNT(1) >= 50 21 | ORDER BY avg_points DESC 22 | 23 | -- COMMAND ---------- 24 | 25 | SELECT driver_name, 26 | COUNT(1) AS total_races, 27 | SUM(calculated_points) AS total_points, 28 | AVG(calculated_points) AS avg_points 29 | FROM f1_presentation.calculated_race_results 30 | WHERE race_year BETWEEN 2001 AND 2010 31 | GROUP BY driver_name 32 | HAVING COUNT(1) >= 50 33 | ORDER BY avg_points DESC 34 | 35 | -- COMMAND ---------- 36 | 37 | -------------------------------------------------------------------------------- /6. Data Analysis and Visualization/2.find_dominant_teams.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | SELECT team_name, 3 | COUNT(1) AS total_races, 4 | SUM(calculated_points) AS total_points, 5 | AVG(calculated_points) AS avg_points 6 | FROM f1_presentation.calculated_race_results 7 | GROUP BY team_name 8 | HAVING COUNT(1) >= 100 9 | ORDER BY avg_points DESC 10 | 11 | -- COMMAND ---------- 12 | 13 | SELECT team_name, 14 | COUNT(1) AS total_races, 15 | SUM(calculated_points) AS total_points, 16 | AVG(calculated_points) AS avg_points 17 | FROM f1_presentation.calculated_race_results 18 | WHERE race_year BETWEEN 2011 AND 2020 19 | GROUP BY team_name 20 | HAVING COUNT(1) >= 100 21 | ORDER BY avg_points DESC 22 | 23 | -- COMMAND ---------- 24 | 25 | SELECT team_name, 26 | COUNT(1) AS total_races, 27 | SUM(calculated_points) AS total_points, 28 | AVG(calculated_points) AS avg_points 29 | FROM f1_presentation.calculated_race_results 30 | WHERE race_year BETWEEN 2001 AND 2011 31 | GROUP BY team_name 32 | HAVING COUNT(1) >= 100 33 | ORDER BY avg_points DESC 34 | 35 | -- COMMAND ---------- 36 | 37 | -------------------------------------------------------------------------------- /6. Data Analysis and Visualization/3.viz_dominant_drivers.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %python 3 | -- MAGIC html = """

Dominant Formula 1 Drivers of All Time

""" 4 | -- MAGIC displayHTML(html) 5 | 6 | -- COMMAND ---------- 7 | 8 | CREATE OR REPLACE TEMP VIEW v_dominant_drivers 9 | AS 10 | SELECT driver_name, 11 | COUNT(1) AS total_races, 12 | SUM(calculated_points) AS total_points, 13 | AVG(calculated_points) AS avg_points, 14 | RANK() OVER(ORDER BY AVG(calculated_points) DESC) driver_rank 15 | FROM f1_presentation.calculated_race_results 16 | GROUP BY driver_name 17 | HAVING COUNT(1) >= 50 18 | ORDER BY avg_points DESC 19 | 20 | -- COMMAND ---------- 21 | 22 | SELECT race_year, 23 | driver_name, 24 | COUNT(1) AS total_races, 25 | SUM(calculated_points) AS total_points, 26 | AVG(calculated_points) AS avg_points 27 | FROM f1_presentation.calculated_race_results 28 | WHERE driver_name IN (SELECT driver_name FROM v_dominant_drivers WHERE driver_rank <= 10) 29 | GROUP BY race_year, driver_name 30 | ORDER BY race_year, avg_points DESC 31 | 32 | -- COMMAND ---------- 33 | 34 | SELECT race_year, 35 | driver_name, 36 | COUNT(1) AS total_races, 37 | SUM(calculated_points) AS total_points, 38 | AVG(calculated_points) AS avg_points 39 | FROM f1_presentation.calculated_race_results 40 | WHERE driver_name IN (SELECT driver_name FROM v_dominant_drivers WHERE driver_rank <= 10) 41 | GROUP BY race_year, driver_name 42 | ORDER BY race_year, avg_points DESC 43 | 44 | -- COMMAND ---------- 45 | 46 | SELECT race_year, 47 | driver_name, 48 | COUNT(1) AS total_races, 49 | SUM(calculated_points) AS total_points, 50 | AVG(calculated_points) AS avg_points 51 | FROM f1_presentation.calculated_race_results 52 | WHERE driver_name IN (SELECT driver_name FROM v_dominant_drivers WHERE driver_rank <= 10) 53 | GROUP BY race_year, driver_name 54 | ORDER BY race_year, avg_points DESC 55 | 56 | -- COMMAND ---------- 57 | 58 | -------------------------------------------------------------------------------- /6. Data Analysis and Visualization/4.viz_dominant_teams.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %python 3 | -- MAGIC html = """

Dominant Formula 1 Teams of All Time

""" 4 | -- MAGIC displayHTML(html) 5 | 6 | -- COMMAND ---------- 7 | 8 | CREATE OR REPLACE TEMP VIEW v_dominant_teams 9 | AS 10 | SELECT team_name, 11 | COUNT(1) AS total_races, 12 | SUM(calculated_points) AS total_points, 13 | AVG(calculated_points) AS avg_points, 14 | RANK() OVER(ORDER BY AVG(calculated_points) DESC) team_rank 15 | FROM f1_presentation.calculated_race_results 16 | GROUP BY team_name 17 | HAVING COUNT(1) >= 100 18 | ORDER BY avg_points DESC 19 | 20 | -- COMMAND ---------- 21 | 22 | SELECT * FROM v_dominant_teams; 23 | 24 | -- COMMAND ---------- 25 | 26 | SELECT race_year, 27 | team_name, 28 | COUNT(1) AS total_races, 29 | SUM(calculated_points) AS total_points, 30 | AVG(calculated_points) AS avg_points 31 | FROM f1_presentation.calculated_race_results 32 | WHERE team_name IN (SELECT team_name FROM v_dominant_teams WHERE team_rank <= 5) 33 | GROUP BY race_year, team_name 34 | ORDER BY race_year, avg_points DESC 35 | 36 | -- COMMAND ---------- 37 | 38 | SELECT race_year, 39 | team_name, 40 | COUNT(1) AS total_races, 41 | SUM(calculated_points) AS total_points, 42 | AVG(calculated_points) AS avg_points 43 | FROM f1_presentation.calculated_race_results 44 | WHERE team_name IN (SELECT team_name FROM v_dominant_teams WHERE team_rank <= 5) 45 | GROUP BY race_year, team_name 46 | ORDER BY race_year, avg_points DESC 47 | 48 | -- COMMAND ---------- 49 | 50 | -------------------------------------------------------------------------------- /7. Power Bi Reports/412.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/412.png -------------------------------------------------------------------------------- /7. Power Bi Reports/413.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/413.png -------------------------------------------------------------------------------- /7. Power Bi Reports/414.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/414.png -------------------------------------------------------------------------------- /7. Power Bi Reports/415.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/415.png -------------------------------------------------------------------------------- /7. Power Bi Reports/416.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/416.png -------------------------------------------------------------------------------- /7. Power Bi Reports/423.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/423.png -------------------------------------------------------------------------------- /7. Power Bi Reports/424.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/424.png -------------------------------------------------------------------------------- /7. Power Bi Reports/425.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/425.png -------------------------------------------------------------------------------- /7. Power Bi Reports/426.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/7. Power Bi Reports/426.png -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-21/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Incremental_load_data/2021-03-21/.DS_Store -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-21/circuits.csv: -------------------------------------------------------------------------------- 1 | circuitId,circuitRef,name,location,country,lat,lng,alt,url 2 | 1,"albert_park","Albert Park Grand Prix Circuit","Melbourne","Australia",-37.8497,144.968,10,"http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit" 3 | 2,"sepang","Sepang International Circuit","Kuala Lumpur","Malaysia",2.76083,101.738,18,"http://en.wikipedia.org/wiki/Sepang_International_Circuit" 4 | 3,"bahrain","Bahrain International Circuit","Sakhir","Bahrain",26.0325,50.5106,7,"http://en.wikipedia.org/wiki/Bahrain_International_Circuit" 5 | 4,"catalunya","Circuit de Barcelona-Catalunya","Montmeló","Spain",41.57,2.26111,109,"http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya" 6 | 5,"istanbul","Istanbul Park","Istanbul","Turkey",40.9517,29.405,130,"http://en.wikipedia.org/wiki/Istanbul_Park" 7 | 6,"monaco","Circuit de Monaco","Monte-Carlo","Monaco",43.7347,7.42056,7,"http://en.wikipedia.org/wiki/Circuit_de_Monaco" 8 | 7,"villeneuve","Circuit Gilles Villeneuve","Montreal","Canada",45.5,-73.5228,13,"http://en.wikipedia.org/wiki/Circuit_Gilles_Villeneuve" 9 | 8,"magny_cours","Circuit de Nevers Magny-Cours","Magny Cours","France",46.8642,3.16361,228,"http://en.wikipedia.org/wiki/Circuit_de_Nevers_Magny-Cours" 10 | 9,"silverstone","Silverstone Circuit","Silverstone","UK",52.0786,-1.01694,153,"http://en.wikipedia.org/wiki/Silverstone_Circuit" 11 | 10,"hockenheimring","Hockenheimring","Hockenheim","Germany",49.3278,8.56583,103,"http://en.wikipedia.org/wiki/Hockenheimring" 12 | 11,"hungaroring","Hungaroring","Budapest","Hungary",47.5789,19.2486,264,"http://en.wikipedia.org/wiki/Hungaroring" 13 | 12,"valencia","Valencia Street Circuit","Valencia","Spain",39.4589,-0.331667,4,"http://en.wikipedia.org/wiki/Valencia_Street_Circuit" 14 | 13,"spa","Circuit de Spa-Francorchamps","Spa","Belgium",50.4372,5.97139,401,"http://en.wikipedia.org/wiki/Circuit_de_Spa-Francorchamps" 15 | 14,"monza","Autodromo Nazionale di Monza","Monza","Italy",45.6156,9.28111,162,"http://en.wikipedia.org/wiki/Autodromo_Nazionale_Monza" 16 | 15,"marina_bay","Marina Bay Street Circuit","Marina Bay","Singapore",1.2914,103.864,18,"http://en.wikipedia.org/wiki/Marina_Bay_Street_Circuit" 17 | 16,"fuji","Fuji Speedway","Oyama","Japan",35.3717,138.927,583,"http://en.wikipedia.org/wiki/Fuji_Speedway" 18 | 17,"shanghai","Shanghai International Circuit","Shanghai","China",31.3389,121.22,5,"http://en.wikipedia.org/wiki/Shanghai_International_Circuit" 19 | 18,"interlagos","Autódromo José Carlos Pace","São Paulo","Brazil",-23.7036,-46.6997,785,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Jos%C3%A9_Carlos_Pace" 20 | 19,"indianapolis","Indianapolis Motor Speedway","Indianapolis","USA",39.795,-86.2347,223,"http://en.wikipedia.org/wiki/Indianapolis_Motor_Speedway" 21 | 20,"nurburgring","Nürburgring","Nürburg","Germany",50.3356,6.9475,578,"http://en.wikipedia.org/wiki/N%C3%BCrburgring" 22 | 21,"imola","Autodromo Enzo e Dino Ferrari","Imola","Italy",44.3439,11.7167,37,"http://en.wikipedia.org/wiki/Autodromo_Enzo_e_Dino_Ferrari" 23 | 22,"suzuka","Suzuka Circuit","Suzuka","Japan",34.8431,136.541,45,"http://en.wikipedia.org/wiki/Suzuka_Circuit" 24 | 23,"osterreichring","A1-Ring","Spielburg","Austria",47.2197,14.7647,678,"http://en.wikipedia.org/wiki/A1-Ring" 25 | 24,"yas_marina","Yas Marina Circuit","Abu Dhabi","UAE",24.4672,54.6031,3,"http://en.wikipedia.org/wiki/Yas_Marina_Circuit" 26 | 25,"galvez","Autódromo Juan y Oscar Gálvez","Buenos Aires","Argentina",-34.6943,-58.4593,8,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Oscar_Alfredo_G%C3%A1lvez" 27 | 26,"jerez","Circuito de Jerez","Jerez de la Frontera","Spain",36.7083,-6.03417,37,"http://en.wikipedia.org/wiki/Circuito_Permanente_de_Jerez" 28 | 27,"estoril","Autódromo do Estoril","Estoril","Portugal",38.7506,-9.39417,130,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_do_Estoril" 29 | 28,"okayama","Okayama International Circuit","Okayama","Japan",34.915,134.221,266,"http://en.wikipedia.org/wiki/TI_Circuit" 30 | 29,"adelaide","Adelaide Street Circuit","Adelaide","Australia",-34.9272,138.617,58,"http://en.wikipedia.org/wiki/Adelaide_Street_Circuit" 31 | 30,"kyalami","Kyalami","Midrand","South Africa",-25.9894,28.0767,1460,"http://en.wikipedia.org/wiki/Kyalami" 32 | 31,"donington","Donington Park","Castle Donington","UK",52.8306,-1.37528,88,"http://en.wikipedia.org/wiki/Donington_Park" 33 | 32,"rodriguez","Autódromo Hermanos Rodríguez","Mexico City","Mexico",19.4042,-99.0907,2227,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Hermanos_Rodr%C3%ADguez" 34 | 33,"phoenix","Phoenix street circuit","Phoenix","USA",33.4479,-112.075,345,"http://en.wikipedia.org/wiki/Phoenix_street_circuit" 35 | 34,"ricard","Circuit Paul Ricard","Le Castellet","France",43.2506,5.79167,432,"http://en.wikipedia.org/wiki/Paul_Ricard_Circuit" 36 | 35,"yeongam","Korean International Circuit","Yeongam County","Korea",34.7333,126.417,0,"http://en.wikipedia.org/wiki/Korean_International_Circuit" 37 | 36,"jacarepagua","Autódromo Internacional Nelson Piquet","Rio de Janeiro","Brazil",-22.9756,-43.395,1126,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Internacional_Nelson_Piquet" 38 | 37,"detroit","Detroit Street Circuit","Detroit","USA",42.3298,-83.0401,177,"http://en.wikipedia.org/wiki/Detroit_street_circuit" 39 | 38,"brands_hatch","Brands Hatch","Kent","UK",51.3569,0.263056,145,"http://en.wikipedia.org/wiki/Brands_Hatch" 40 | 39,"zandvoort","Circuit Park Zandvoort","Zandvoort","Netherlands",52.3888,4.54092,6,"http://en.wikipedia.org/wiki/Circuit_Zandvoort" 41 | 40,"zolder","Zolder","Heusden-Zolder","Belgium",50.9894,5.25694,36,"http://en.wikipedia.org/wiki/Zolder" 42 | 41,"dijon","Dijon-Prenois","Dijon","France",47.3625,4.89913,484,"http://en.wikipedia.org/wiki/Dijon-Prenois" 43 | 42,"dallas","Fair Park","Dallas","USA",32.7774,-96.7587,139,"http://en.wikipedia.org/wiki/Fair_Park" 44 | 43,"long_beach","Long Beach","California","USA",33.7651,-118.189,12,"http://en.wikipedia.org/wiki/Long_Beach,_California" 45 | 44,"las_vegas","Las Vegas Street Circuit","Nevada","USA",36.1162,-115.174,639,"http://en.wikipedia.org/wiki/Las_Vegas,_Nevada" 46 | 45,"jarama","Jarama","Madrid","Spain",40.6171,-3.58558,609,"http://en.wikipedia.org/wiki/Circuito_Permanente_Del_Jarama" 47 | 46,"watkins_glen","Watkins Glen","New York State","USA",42.3369,-76.9272,485,"http://en.wikipedia.org/wiki/Watkins_Glen_International" 48 | 47,"anderstorp","Scandinavian Raceway","Anderstorp","Sweden",57.2653,13.6042,153,"http://en.wikipedia.org/wiki/Scandinavian_Raceway" 49 | 48,"mosport","Mosport International Raceway","Ontario","Canada",44.0481,-78.6756,332,"http://en.wikipedia.org/wiki/Mosport" 50 | 49,"montjuic","Montjuïc","Barcelona","Spain",41.3664,2.15167,79,"http://en.wikipedia.org/wiki/Montju%C3%AFc_circuit" 51 | 50,"nivelles","Nivelles-Baulers","Brussels","Belgium",50.6211,4.32694,139,"http://en.wikipedia.org/wiki/Nivelles-Baulers" 52 | 51,"charade","Charade Circuit","Clermont-Ferrand","France",45.7472,3.03889,790,"http://en.wikipedia.org/wiki/Charade_Circuit" 53 | 52,"tremblant","Circuit Mont-Tremblant","Quebec","Canada",46.1877,-74.6099,214,"http://en.wikipedia.org/wiki/Circuit_Mont-Tremblant" 54 | 53,"essarts","Rouen-Les-Essarts","Rouen","France",49.3306,1.00458,81,"http://en.wikipedia.org/wiki/Rouen-Les-Essarts" 55 | 54,"lemans","Le Mans","Le Mans","France",47.95,0.224231,67,"http://en.wikipedia.org/wiki/Circuit_de_la_Sarthe#Bugatti_Circuit" 56 | 55,"reims","Reims-Gueux","Reims","France",49.2542,3.93083,88,"http://en.wikipedia.org/wiki/Reims-Gueux" 57 | 56,"george","Prince George Circuit","Eastern Cape Province","South Africa",-33.0486,27.8736,15,"http://en.wikipedia.org/wiki/Prince_George_Circuit" 58 | 57,"zeltweg","Zeltweg","Styria","Austria",47.2039,14.7478,676,"http://en.wikipedia.org/wiki/Zeltweg_Airfield" 59 | 58,"aintree","Aintree","Liverpool","UK",53.4769,-2.94056,20,"http://en.wikipedia.org/wiki/Aintree_Motor_Racing_Circuit" 60 | 59,"boavista","Circuito da Boavista","Oporto","Portugal",41.1705,-8.67325,28,"http://en.wikipedia.org/wiki/Circuito_da_Boavista" 61 | 60,"riverside","Riverside International Raceway","California","USA",33.937,-117.273,470,"http://en.wikipedia.org/wiki/Riverside_International_Raceway" 62 | 61,"avus","AVUS","Berlin","Germany",52.4806,13.2514,53,"http://en.wikipedia.org/wiki/AVUS" 63 | 62,"monsanto","Monsanto Park Circuit","Lisbon","Portugal",38.7197,-9.20306,158,"http://en.wikipedia.org/wiki/Monsanto_Park_Circuit" 64 | 63,"sebring","Sebring International Raceway","Florida","USA",27.4547,-81.3483,18,"http://en.wikipedia.org/wiki/Sebring_Raceway" 65 | 64,"ain-diab","Ain Diab","Casablanca","Morocco",33.5786,-7.6875,19,"http://en.wikipedia.org/wiki/Ain-Diab_Circuit" 66 | 65,"pescara","Pescara Circuit","Pescara","Italy",42.475,14.1508,129,"http://en.wikipedia.org/wiki/Pescara_Circuit" 67 | 66,"bremgarten","Circuit Bremgarten","Bern","Switzerland",46.9589,7.40194,551,"http://en.wikipedia.org/wiki/Circuit_Bremgarten" 68 | 67,"pedralbes","Circuit de Pedralbes","Barcelona","Spain",41.3903,2.11667,85,"http://en.wikipedia.org/wiki/Pedralbes_Circuit" 69 | 68,"buddh","Buddh International Circuit","Uttar Pradesh","India",28.3487,77.5331,194,"http://en.wikipedia.org/wiki/Buddh_International_Circuit" 70 | 69,"americas","Circuit of the Americas","Austin","USA",30.1328,-97.6411,161,"http://en.wikipedia.org/wiki/Circuit_of_the_Americas" 71 | 70,"red_bull_ring","Red Bull Ring","Spielburg","Austria",47.2197,14.7647,678,"http://en.wikipedia.org/wiki/Red_Bull_Ring" 72 | 71,"sochi","Sochi Autodrom","Sochi","Russia",43.4057,39.9578,2,"http://en.wikipedia.org/wiki/Sochi_Autodrom" 73 | 72,"port_imperial","Port Imperial Street Circuit","New Jersey","USA",40.7769,-74.0111,4,"http://en.wikipedia.org/wiki/Port_Imperial_Street_Circuit" 74 | 73,"BAK","Baku City Circuit","Baku","Azerbaijan",40.3725,49.8533,-7,"http://en.wikipedia.org/wiki/Baku_City_Circuit" 75 | 74,"hanoi","Hanoi Street Circuit","Hanoi","Vietnam",21.0166,105.766,9,"http://en.wikipedia.org/wiki/Hanoi_Street_Circuit" 76 | 75,"portimao","Autódromo Internacional do Algarve","Portimão","Portugal",37.227,-8.6267,108,"http://en.wikipedia.org/wiki/Algarve_International_Circuit" 77 | 76,"mugello","Autodromo Internazionale del Mugello","Mugello","Italy",43.9975,11.3719,255,"http://en.wikipedia.org/wiki/Mugello_Circuit" 78 | 77,"jeddah","Jeddah Street Circuit","Jeddah","Saudi Arabia",21.5433,39.1728,15,"http://en.wikipedia.org/wiki/Jeddah_Street_Circuit" 79 | -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-21/constructors.json: -------------------------------------------------------------------------------- 1 | {"constructorId":1,"constructorRef":"mclaren","name":"McLaren","nationality":"British","url":"http://en.wikipedia.org/wiki/McLaren"} 2 | {"constructorId":2,"constructorRef":"bmw_sauber","name":"BMW Sauber","nationality":"German","url":"http://en.wikipedia.org/wiki/BMW_Sauber"} 3 | {"constructorId":3,"constructorRef":"williams","name":"Williams","nationality":"British","url":"http://en.wikipedia.org/wiki/Williams_Grand_Prix_Engineering"} 4 | {"constructorId":4,"constructorRef":"renault","name":"Renault","nationality":"French","url":"http://en.wikipedia.org/wiki/Renault_in_Formula_One"} 5 | {"constructorId":5,"constructorRef":"toro_rosso","name":"Toro Rosso","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso"} 6 | {"constructorId":6,"constructorRef":"ferrari","name":"Ferrari","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Scuderia_Ferrari"} 7 | {"constructorId":7,"constructorRef":"toyota","name":"Toyota","nationality":"Japanese","url":"http://en.wikipedia.org/wiki/Toyota_Racing"} 8 | {"constructorId":8,"constructorRef":"super_aguri","name":"Super Aguri","nationality":"Japanese","url":"http://en.wikipedia.org/wiki/Super_Aguri_F1"} 9 | {"constructorId":9,"constructorRef":"red_bull","name":"Red Bull","nationality":"Austrian","url":"http://en.wikipedia.org/wiki/Red_Bull_Racing"} 10 | {"constructorId":10,"constructorRef":"force_india","name":"Force India","nationality":"Indian","url":"http://en.wikipedia.org/wiki/Racing_Point_Force_India"} 11 | {"constructorId":11,"constructorRef":"honda","name":"Honda","nationality":"Japanese","url":"http://en.wikipedia.org/wiki/Honda_Racing_F1"} 12 | {"constructorId":12,"constructorRef":"spyker","name":"Spyker","nationality":"Dutch","url":"http://en.wikipedia.org/wiki/Spyker_F1"} 13 | {"constructorId":13,"constructorRef":"mf1","name":"MF1","nationality":"Russian","url":"http://en.wikipedia.org/wiki/Midland_F1_Racing"} 14 | {"constructorId":14,"constructorRef":"spyker_mf1","name":"Spyker MF1","nationality":"Dutch","url":"http://en.wikipedia.org/wiki/Midland_F1_Racing"} 15 | {"constructorId":15,"constructorRef":"sauber","name":"Sauber","nationality":"Swiss","url":"http://en.wikipedia.org/wiki/Sauber"} 16 | {"constructorId":16,"constructorRef":"bar","name":"BAR","nationality":"British","url":"http://en.wikipedia.org/wiki/British_American_Racing"} 17 | {"constructorId":17,"constructorRef":"jordan","name":"Jordan","nationality":"Irish","url":"http://en.wikipedia.org/wiki/Jordan_Grand_Prix"} 18 | {"constructorId":18,"constructorRef":"minardi","name":"Minardi","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Minardi"} 19 | {"constructorId":19,"constructorRef":"jaguar","name":"Jaguar","nationality":"British","url":"http://en.wikipedia.org/wiki/Jaguar_Racing"} 20 | {"constructorId":20,"constructorRef":"prost","name":"Prost","nationality":"French","url":"http://en.wikipedia.org/wiki/Prost_Grand_Prix"} 21 | {"constructorId":21,"constructorRef":"arrows","name":"Arrows","nationality":"British","url":"http://en.wikipedia.org/wiki/Arrows_Grand_Prix_International"} 22 | {"constructorId":22,"constructorRef":"benetton","name":"Benetton","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Benetton_Formula"} 23 | {"constructorId":23,"constructorRef":"brawn","name":"Brawn","nationality":"British","url":"http://en.wikipedia.org/wiki/Brawn_GP"} 24 | {"constructorId":24,"constructorRef":"stewart","name":"Stewart","nationality":"British","url":"http://en.wikipedia.org/wiki/Stewart_Grand_Prix"} 25 | {"constructorId":25,"constructorRef":"tyrrell","name":"Tyrrell","nationality":"British","url":"http://en.wikipedia.org/wiki/Tyrrell_Racing"} 26 | {"constructorId":26,"constructorRef":"lola","name":"Lola","nationality":"British","url":"http://en.wikipedia.org/wiki/MasterCard_Lola"} 27 | {"constructorId":27,"constructorRef":"ligier","name":"Ligier","nationality":"French","url":"http://en.wikipedia.org/wiki/Ligier"} 28 | {"constructorId":28,"constructorRef":"forti","name":"Forti","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Forti"} 29 | {"constructorId":29,"constructorRef":"footwork","name":"Footwork","nationality":"British","url":"http://en.wikipedia.org/wiki/Footwork_Arrows"} 30 | {"constructorId":30,"constructorRef":"pacific","name":"Pacific","nationality":"British","url":"http://en.wikipedia.org/wiki/Pacific_Racing"} 31 | {"constructorId":31,"constructorRef":"simtek","name":"Simtek","nationality":"British","url":"http://en.wikipedia.org/wiki/Simtek"} 32 | {"constructorId":32,"constructorRef":"team_lotus","name":"Team Lotus","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_Lotus"} 33 | {"constructorId":33,"constructorRef":"larrousse","name":"Larrousse","nationality":"French","url":"http://en.wikipedia.org/wiki/Larrousse"} 34 | {"constructorId":34,"constructorRef":"brabham","name":"Brabham","nationality":"British","url":"http://en.wikipedia.org/wiki/Brabham"} 35 | {"constructorId":35,"constructorRef":"dallara","name":"Dallara","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Dallara"} 36 | {"constructorId":36,"constructorRef":"fondmetal","name":"Fondmetal","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Fondmetal"} 37 | {"constructorId":37,"constructorRef":"march","name":"March","nationality":"British","url":"http://en.wikipedia.org/wiki/March_Engineering"} 38 | {"constructorId":38,"constructorRef":"moda","name":"Andrea Moda","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Andrea_Moda_Formula"} 39 | {"constructorId":39,"constructorRef":"ags","name":"AGS","nationality":"French","url":"http://en.wikipedia.org/wiki/Automobiles_Gonfaronnaises_Sportives"} 40 | {"constructorId":40,"constructorRef":"lambo","name":"Lambo","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Modena_(racing_team)"} 41 | {"constructorId":41,"constructorRef":"leyton","name":"Leyton House","nationality":"British","url":"http://en.wikipedia.org/wiki/Leyton_House"} 42 | {"constructorId":42,"constructorRef":"coloni","name":"Coloni","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Enzo_Coloni_Racing_Car_Systems"} 43 | {"constructorId":44,"constructorRef":"eurobrun","name":"Euro Brun","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Euro_Brun"} 44 | {"constructorId":45,"constructorRef":"osella","name":"Osella","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Osella"} 45 | {"constructorId":46,"constructorRef":"onyx","name":"Onyx","nationality":"British","url":"http://en.wikipedia.org/wiki/Onyx_(racing_team)"} 46 | {"constructorId":47,"constructorRef":"life","name":"Life","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Life_(Racing_Team)"} 47 | {"constructorId":48,"constructorRef":"rial","name":"Rial","nationality":"German","url":"http://en.wikipedia.org/wiki/Rial_%28racing_team%29"} 48 | {"constructorId":49,"constructorRef":"zakspeed","name":"Zakspeed","nationality":"German","url":"http://en.wikipedia.org/wiki/Zakspeed"} 49 | {"constructorId":50,"constructorRef":"ram","name":"RAM","nationality":"British","url":"http://en.wikipedia.org/wiki/RAM_Racing"} 50 | {"constructorId":51,"constructorRef":"alfa","name":"Alfa Romeo","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Alfa_Romeo_in_Formula_One"} 51 | {"constructorId":52,"constructorRef":"spirit","name":"Spirit","nationality":"British","url":"http://en.wikipedia.org/wiki/Spirit_(racing_team)"} 52 | {"constructorId":53,"constructorRef":"toleman","name":"Toleman","nationality":"British","url":"http://en.wikipedia.org/wiki/Toleman"} 53 | {"constructorId":54,"constructorRef":"ats","name":"ATS","nationality":"Italian","url":"http://en.wikipedia.org/wiki/ATS_(wheels)"} 54 | {"constructorId":55,"constructorRef":"theodore","name":"Theodore","nationality":"Hong Kong","url":"http://en.wikipedia.org/wiki/Theodore_Racing"} 55 | {"constructorId":56,"constructorRef":"fittipaldi","name":"Fittipaldi","nationality":"Brazilian","url":"http://en.wikipedia.org/wiki/Fittipaldi_%28constructor%29"} 56 | {"constructorId":57,"constructorRef":"ensign","name":"Ensign","nationality":"British","url":"http://en.wikipedia.org/wiki/Ensign_%28racing_team%29"} 57 | {"constructorId":58,"constructorRef":"shadow","name":"Shadow","nationality":"British","url":"http://en.wikipedia.org/wiki/Shadow_Racing_Cars"} 58 | {"constructorId":59,"constructorRef":"wolf","name":"Wolf","nationality":"Canadian","url":"http://en.wikipedia.org/wiki/Walter_Wolf_Racing"} 59 | {"constructorId":60,"constructorRef":"merzario","name":"Merzario","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Merzario"} 60 | {"constructorId":61,"constructorRef":"kauhsen","name":"Kauhsen","nationality":"German","url":"http://en.wikipedia.org/wiki/Kauhsen"} 61 | {"constructorId":62,"constructorRef":"rebaque","name":"Rebaque","nationality":"Mexican","url":"http://en.wikipedia.org/wiki/Rebaque"} 62 | {"constructorId":63,"constructorRef":"surtees","name":"Surtees","nationality":"British","url":"http://en.wikipedia.org/wiki/Surtees"} 63 | {"constructorId":64,"constructorRef":"hesketh","name":"Hesketh","nationality":"British","url":"http://en.wikipedia.org/wiki/Hesketh_Racing"} 64 | {"constructorId":65,"constructorRef":"martini","name":"Martini","nationality":"French","url":"http://en.wikipedia.org/wiki/Martini_(cars)"} 65 | {"constructorId":66,"constructorRef":"brm","name":"BRM","nationality":"British","url":"http://en.wikipedia.org/wiki/BRM"} 66 | {"constructorId":67,"constructorRef":"penske","name":"Penske","nationality":"American","url":"http://en.wikipedia.org/wiki/Penske_Racing"} 67 | {"constructorId":68,"constructorRef":"lec","name":"LEC","nationality":"British","url":"http://en.wikipedia.org/wiki/LEC_(Formula_One)"} 68 | {"constructorId":69,"constructorRef":"mcguire","name":"McGuire","nationality":"Australian","url":"http://en.wikipedia.org/wiki/McGuire_(Formula_One)"} 69 | {"constructorId":70,"constructorRef":"boro","name":"Boro","nationality":"Dutch","url":"http://en.wikipedia.org/wiki/Boro_(Formula_One)"} 70 | {"constructorId":71,"constructorRef":"apollon","name":"Apollon","nationality":"Swiss","url":"http://en.wikipedia.org/wiki/Apollon_(Formula_One)"} 71 | {"constructorId":72,"constructorRef":"kojima","name":"Kojima","nationality":"Japanese","url":"http://en.wikipedia.org/wiki/Kojima_Engineering"} 72 | {"constructorId":73,"constructorRef":"parnelli","name":"Parnelli","nationality":"American","url":"http://en.wikipedia.org/wiki/Parnelli"} 73 | {"constructorId":74,"constructorRef":"maki","name":"Maki","nationality":"Japanese","url":"http://en.wikipedia.org/wiki/Maki_(cars)"} 74 | {"constructorId":75,"constructorRef":"hill","name":"Embassy Hill","nationality":"British","url":"http://en.wikipedia.org/wiki/Hill_(constructor)"} 75 | {"constructorId":76,"constructorRef":"lyncar","name":"Lyncar","nationality":"British","url":"http://en.wikipedia.org/wiki/Lyncar"} 76 | {"constructorId":77,"constructorRef":"trojan","name":"Trojan","nationality":"British","url":"http://en.wikipedia.org/wiki/Trojan_(Racing_team)"} 77 | {"constructorId":78,"constructorRef":"amon","name":"Amon","nationality":"New Zealand","url":"http://en.wikipedia.org/wiki/Amon_(Formula_One_team)"} 78 | {"constructorId":79,"constructorRef":"token","name":"Token","nationality":"British","url":"http://en.wikipedia.org/wiki/Token_(Racing_team)"} 79 | {"constructorId":80,"constructorRef":"iso_marlboro","name":"Iso Marlboro","nationality":"British","url":"http://en.wikipedia.org/wiki/Iso_Marlboro"} 80 | {"constructorId":81,"constructorRef":"tecno","name":"Tecno","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Tecno"} 81 | {"constructorId":82,"constructorRef":"matra","name":"Matra","nationality":"French","url":"http://en.wikipedia.org/wiki/Matra"} 82 | {"constructorId":83,"constructorRef":"politoys","name":"Politoys","nationality":"British","url":"http://en.wikipedia.org/wiki/Frank_Williams_Racing_Cars"} 83 | {"constructorId":84,"constructorRef":"connew","name":"Connew","nationality":"British","url":"http://en.wikipedia.org/wiki/Connew"} 84 | {"constructorId":85,"constructorRef":"bellasi","name":"Bellasi","nationality":"Swiss","url":"http://en.wikipedia.org/wiki/Bellasi"} 85 | {"constructorId":86,"constructorRef":"tomaso","name":"De Tomaso","nationality":"Italian","url":"http://en.wikipedia.org/wiki/De_Tomaso"} 86 | {"constructorId":87,"constructorRef":"cooper","name":"Cooper","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 87 | {"constructorId":88,"constructorRef":"eagle","name":"Eagle","nationality":"American","url":"http://en.wikipedia.org/wiki/Anglo_American_Racers"} 88 | {"constructorId":89,"constructorRef":"lds","name":"LDS","nationality":"South African","url":"http://en.wikipedia.org/wiki/LDS_(automobile)"} 89 | {"constructorId":90,"constructorRef":"protos","name":"Protos","nationality":"British","url":"http://en.wikipedia.org/wiki/Protos_(constructor)"} 90 | {"constructorId":91,"constructorRef":"shannon","name":"Shannon","nationality":"British","url":"http://en.wikipedia.org/wiki/Shannon_(Formula_One)"} 91 | {"constructorId":92,"constructorRef":"scirocco","name":"Scirocco","nationality":"British","url":"http://en.wikipedia.org/wiki/Scirocco-Powell"} 92 | {"constructorId":93,"constructorRef":"re","name":"RE","nationality":"Rhodesian","url":"http://en.wikipedia.org/wiki/RE_%28automobile%29"} 93 | {"constructorId":94,"constructorRef":"brp","name":"BRP","nationality":"British","url":"http://en.wikipedia.org/wiki/British_Racing_Partnership"} 94 | {"constructorId":95,"constructorRef":"porsche","name":"Porsche","nationality":"German","url":"http://en.wikipedia.org/wiki/Porsche_in_Formula_One"} 95 | {"constructorId":96,"constructorRef":"derrington","name":"Derrington","nationality":"British","url":"http://en.wikipedia.org/wiki/Derrington-Francis"} 96 | {"constructorId":97,"constructorRef":"gilby","name":"Gilby","nationality":"British","url":"http://en.wikipedia.org/wiki/Gilby"} 97 | {"constructorId":98,"constructorRef":"stebro","name":"Stebro","nationality":"Canadian","url":"http://en.wikipedia.org/wiki/Stebro"} 98 | {"constructorId":99,"constructorRef":"emeryson","name":"Emeryson","nationality":"British","url":"http://en.wikipedia.org/wiki/Emeryson"} 99 | {"constructorId":100,"constructorRef":"enb","name":"ENB","nationality":"Belgium","url":"http://en.wikipedia.org/wiki/Ecurie_Nationale_Belge"} 100 | {"constructorId":101,"constructorRef":"jbw","name":"JBW","nationality":"British","url":"http://en.wikipedia.org/wiki/JBW"} 101 | {"constructorId":102,"constructorRef":"ferguson","name":"Ferguson","nationality":"British","url":"http://en.wikipedia.org/wiki/Ferguson_Research_Ltd."} 102 | {"constructorId":103,"constructorRef":"mbm","name":"MBM","nationality":"Swiss","url":"http://en.wikipedia.org/wiki/Monteverdi_Basel_Motors"} 103 | {"constructorId":104,"constructorRef":"behra-porsche","name":"Behra-Porsche","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Behra-Porsche"} 104 | {"constructorId":105,"constructorRef":"maserati","name":"Maserati","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Maserati"} 105 | {"constructorId":106,"constructorRef":"scarab","name":"Scarab","nationality":"American","url":"http://en.wikipedia.org/wiki/Scarab_(constructor)"} 106 | {"constructorId":107,"constructorRef":"watson","name":"Watson","nationality":"American","url":"http://en.wikipedia.org/wiki/A.J._Watson"} 107 | {"constructorId":108,"constructorRef":"epperly","name":"Epperly","nationality":"American","url":"http://en.wikipedia.org/wiki/Epperly"} 108 | {"constructorId":109,"constructorRef":"phillips","name":"Phillips","nationality":"American","url":"http://en.wikipedia.org/wiki/Phillips_(constructor)"} 109 | {"constructorId":110,"constructorRef":"lesovsky","name":"Lesovsky","nationality":"American","url":"http://en.wikipedia.org/wiki/Lesovsky"} 110 | {"constructorId":111,"constructorRef":"trevis","name":"Trevis","nationality":"American","url":"http://en.wikipedia.org/wiki/Trevis"} 111 | {"constructorId":112,"constructorRef":"meskowski","name":"Meskowski","nationality":"American","url":"http://en.wikipedia.org/wiki/Meskowski"} 112 | {"constructorId":113,"constructorRef":"kurtis_kraft","name":"Kurtis Kraft","nationality":"American","url":"http://en.wikipedia.org/wiki/Kurtis_Kraft"} 113 | {"constructorId":114,"constructorRef":"kuzma","name":"Kuzma","nationality":"American","url":"http://en.wikipedia.org/wiki/Kuzma_(constructor)"} 114 | {"constructorId":115,"constructorRef":"vhristensen","name":"Christensen","nationality":"American","url":"http://en.wikipedia.org/wiki/Christensen_(constructor)"} 115 | {"constructorId":116,"constructorRef":"ewing","name":"Ewing","nationality":"American","url":"http://en.wikipedia.org/wiki/Ewing_(constructor)"} 116 | {"constructorId":117,"constructorRef":"aston_martin","name":"Aston Martin","nationality":"British","url":"http://en.wikipedia.org/wiki/Aston_Martin_in_Formula_One"} 117 | {"constructorId":118,"constructorRef":"vanwall","name":"Vanwall","nationality":"British","url":"http://en.wikipedia.org/wiki/Vanwall"} 118 | {"constructorId":119,"constructorRef":"moore","name":"Moore","nationality":"American","url":"http://en.wikipedia.org/wiki/Moore_(constructor)"} 119 | {"constructorId":120,"constructorRef":"dunn","name":"Dunn","nationality":"American","url":"http://en.wikipedia.org/wiki/Dunn_Engineering"} 120 | {"constructorId":121,"constructorRef":"elder","name":"Elder","nationality":"American","url":"http://en.wikipedia.org/wiki/Elder_(constructor)"} 121 | {"constructorId":122,"constructorRef":"sutton","name":"Sutton","nationality":"American","url":"http://en.wikipedia.org/wiki/Sutton_(constructor)"} 122 | {"constructorId":123,"constructorRef":"fry","name":"Fry","nationality":"British","url":"http://en.wikipedia.org/wiki/Fry_(racing_team)"} 123 | {"constructorId":124,"constructorRef":"tec-mec","name":"Tec-Mec","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Tec-Mec"} 124 | {"constructorId":125,"constructorRef":"connaught","name":"Connaught","nationality":"British","url":"http://en.wikipedia.org/wiki/Connaught_Engineering"} 125 | {"constructorId":126,"constructorRef":"alta","name":"Alta","nationality":"British","url":"http://en.wikipedia.org/wiki/Alta_auto_racing_team"} 126 | {"constructorId":127,"constructorRef":"osca","name":"OSCA","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Officine_Specializate_Costruzione_Automobili"} 127 | {"constructorId":128,"constructorRef":"gordini","name":"Gordini","nationality":"French","url":"http://en.wikipedia.org/wiki/Gordini"} 128 | {"constructorId":129,"constructorRef":"stevens","name":"Stevens","nationality":"American","url":"http://en.wikipedia.org/wiki/Stevens_(constructor)"} 129 | {"constructorId":130,"constructorRef":"bugatti","name":"Bugatti","nationality":"French","url":"http://en.wikipedia.org/wiki/Bugatti"} 130 | {"constructorId":131,"constructorRef":"mercedes","name":"Mercedes","nationality":"German","url":"http://en.wikipedia.org/wiki/Mercedes-Benz_in_Formula_One"} 131 | {"constructorId":132,"constructorRef":"lancia","name":"Lancia","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Lancia_in_Formula_One"} 132 | {"constructorId":133,"constructorRef":"hwm","name":"HWM","nationality":"British","url":"http://en.wikipedia.org/wiki/Hersham_and_Walton_Motors"} 133 | {"constructorId":134,"constructorRef":"schroeder","name":"Schroeder","nationality":"American","url":"http://en.wikipedia.org/wiki/Schroeder_(constructor)"} 134 | {"constructorId":135,"constructorRef":"pawl","name":"Pawl","nationality":"American","url":"http://en.wikipedia.org/wiki/Pawl_(constructor)"} 135 | {"constructorId":136,"constructorRef":"pankratz","name":"Pankratz","nationality":"American","url":"http://en.wikipedia.org/wiki/Pankratz"} 136 | {"constructorId":137,"constructorRef":"arzani-volpini","name":"Arzani-Volpini","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Arzani-Volpini"} 137 | {"constructorId":138,"constructorRef":"nichels","name":"Nichels","nationality":"American","url":"http://en.wikipedia.org/wiki/Nichels"} 138 | {"constructorId":139,"constructorRef":"bromme","name":"Bromme","nationality":"American","url":"http://en.wikipedia.org/wiki/Bromme"} 139 | {"constructorId":140,"constructorRef":"klenk","name":"Klenk","nationality":"German","url":"http://en.wikipedia.org/wiki/Klenk"} 140 | {"constructorId":141,"constructorRef":"simca","name":"Simca","nationality":"French","url":"http://en.wikipedia.org/wiki/Simca"} 141 | {"constructorId":142,"constructorRef":"turner","name":"Turner","nationality":"American","url":"http://en.wikipedia.org/wiki/Turner_(constructor)"} 142 | {"constructorId":143,"constructorRef":"del_roy","name":"Del Roy","nationality":"American","url":"http://en.wikipedia.org/wiki/Del_Roy"} 143 | {"constructorId":144,"constructorRef":"veritas","name":"Veritas","nationality":"German","url":"http://en.wikipedia.org/wiki/Veritas_(constructor)"} 144 | {"constructorId":145,"constructorRef":"bmw","name":"BMW","nationality":"German","url":"http://en.wikipedia.org/wiki/BMW"} 145 | {"constructorId":146,"constructorRef":"emw","name":"EMW","nationality":"East German","url":"http://en.wikipedia.org/wiki/Eisenacher_Motorenwerk"} 146 | {"constructorId":147,"constructorRef":"afm","name":"AFM","nationality":"German","url":"http://en.wikipedia.org/wiki/Alex_von_Falkenhausen_Motorenbau"} 147 | {"constructorId":148,"constructorRef":"frazer_nash","name":"Frazer Nash","nationality":"British","url":"http://en.wikipedia.org/wiki/Frazer_Nash"} 148 | {"constructorId":149,"constructorRef":"sherman","name":"Sherman","nationality":"American","url":"http://en.wikipedia.org/wiki/Sherman_(constructor)"} 149 | {"constructorId":150,"constructorRef":"deidt","name":"Deidt","nationality":"American","url":"http://en.wikipedia.org/wiki/Deidt"} 150 | {"constructorId":151,"constructorRef":"era","name":"ERA","nationality":"British","url":"http://en.wikipedia.org/wiki/English_Racing_Automobiles"} 151 | {"constructorId":152,"constructorRef":"butterworth","name":"Aston Butterworth","nationality":"British","url":"http://en.wikipedia.org/wiki/Aston_Butterworth"} 152 | {"constructorId":153,"constructorRef":"cisitalia","name":"Cisitalia","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Cisitalia"} 153 | {"constructorId":154,"constructorRef":"lago","name":"Talbot-Lago","nationality":"French","url":"http://en.wikipedia.org/wiki/Talbot-Lago"} 154 | {"constructorId":155,"constructorRef":"hall","name":"Hall","nationality":"American","url":"http://en.wikipedia.org/wiki/Hall_(constructor)"} 155 | {"constructorId":156,"constructorRef":"marchese","name":"Marchese","nationality":"American","url":"http://en.wikipedia.org/wiki/Marchese_(constructor)"} 156 | {"constructorId":157,"constructorRef":"langley","name":"Langley","nationality":"American","url":"http://en.wikipedia.org/wiki/Langley_(constructor)"} 157 | {"constructorId":158,"constructorRef":"rae","name":"Rae","nationality":"American","url":"http://en.wikipedia.org/wiki/Rae_(motorsport)"} 158 | {"constructorId":159,"constructorRef":"olson","name":"Olson","nationality":"American","url":"http://en.wikipedia.org/wiki/Olson_(constructor)"} 159 | {"constructorId":160,"constructorRef":"wetteroth","name":"Wetteroth","nationality":"American","url":"http://en.wikipedia.org/wiki/Wetteroth"} 160 | {"constructorId":161,"constructorRef":"adams","name":"Adams","nationality":"American","url":"http://en.wikipedia.org/wiki/Adams_(constructor)"} 161 | {"constructorId":162,"constructorRef":"snowberger","name":"Snowberger","nationality":"American","url":"http://en.wikipedia.org/wiki/Snowberger"} 162 | {"constructorId":163,"constructorRef":"milano","name":"Milano","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Scuderia_Milano"} 163 | {"constructorId":164,"constructorRef":"hrt","name":"HRT","nationality":"Spanish","url":"http://en.wikipedia.org/wiki/Hispania_Racing"} 164 | {"constructorId":167,"constructorRef":"cooper-maserati","name":"Cooper-Maserati","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 165 | {"constructorId":166,"constructorRef":"virgin","name":"Virgin","nationality":"British","url":"http://en.wikipedia.org/wiki/Virgin_Racing"} 166 | {"constructorId":168,"constructorRef":"cooper-osca","name":"Cooper-OSCA","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 167 | {"constructorId":169,"constructorRef":"cooper-borgward","name":"Cooper-Borgward","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 168 | {"constructorId":170,"constructorRef":"cooper-climax","name":"Cooper-Climax","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 169 | {"constructorId":171,"constructorRef":"cooper-castellotti","name":"Cooper-Castellotti","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 170 | {"constructorId":172,"constructorRef":"lotus-climax","name":"Lotus-Climax","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_Lotus"} 171 | {"constructorId":173,"constructorRef":"lotus-maserati","name":"Lotus-Maserati","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_Lotus"} 172 | {"constructorId":174,"constructorRef":"de_tomaso-osca","name":"De Tomaso-Osca","nationality":"Italian","url":"http://en.wikipedia.org/wiki/De_Tomaso"} 173 | {"constructorId":175,"constructorRef":"de_tomaso-alfa_romeo","name":"De Tomaso-Alfa Romeo","nationality":"Italian","url":"http://en.wikipedia.org/wiki/De_Tomaso"} 174 | {"constructorId":176,"constructorRef":"lotus-brm","name":"Lotus-BRM","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_Lotus"} 175 | {"constructorId":177,"constructorRef":"lotus-borgward","name":"Lotus-Borgward","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_Lotus"} 176 | {"constructorId":178,"constructorRef":"cooper-alfa_romeo","name":"Cooper-Alfa Romeo","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 177 | {"constructorId":179,"constructorRef":"de_tomaso-ferrari","name":"De Tomaso-Ferrari","nationality":"Italian","url":"http://en.wikipedia.org/wiki/De_Tomaso"} 178 | {"constructorId":180,"constructorRef":"lotus-ford","name":"Lotus-Ford","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_Lotus"} 179 | {"constructorId":181,"constructorRef":"brabham-brm","name":"Brabham-BRM","nationality":"British","url":"http://en.wikipedia.org/wiki/Brabham"} 180 | {"constructorId":182,"constructorRef":"brabham-ford","name":"Brabham-Ford","nationality":"British","url":"http://en.wikipedia.org/wiki/Brabham"} 181 | {"constructorId":183,"constructorRef":"brabham-climax","name":"Brabham-Climax","nationality":"British","url":"http://en.wikipedia.org/wiki/Brabham"} 182 | {"constructorId":184,"constructorRef":"lds-climax","name":"LDS-Climax","nationality":"South African","url":"http://en.wikipedia.org/wiki/LDS_(automobile)"} 183 | {"constructorId":185,"constructorRef":"lds-alfa_romeo","name":"LDS-Alfa Romeo","nationality":"South African","url":"http://en.wikipedia.org/wiki/LDS_(automobile)"} 184 | {"constructorId":186,"constructorRef":"cooper-ford","name":"Cooper-Ford","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 185 | {"constructorId":187,"constructorRef":"mclaren-ford","name":"McLaren-Ford","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_McLaren"} 186 | {"constructorId":188,"constructorRef":"mclaren-seren","name":"McLaren-Serenissima","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_McLaren"} 187 | {"constructorId":189,"constructorRef":"eagle-climax","name":"Eagle-Climax","nationality":"American","url":"http://en.wikipedia.org/wiki/Anglo_American_Racers"} 188 | {"constructorId":190,"constructorRef":"eagle-weslake","name":"Eagle-Weslake","nationality":"American","url":"http://en.wikipedia.org/wiki/Anglo_American_Racers"} 189 | {"constructorId":191,"constructorRef":"brabham-repco","name":"Brabham-Repco","nationality":"British","url":"http://en.wikipedia.org/wiki/Brabham"} 190 | {"constructorId":192,"constructorRef":"cooper-ferrari","name":"Cooper-Ferrari","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 191 | {"constructorId":193,"constructorRef":"cooper-ats","name":"Cooper-ATS","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 192 | {"constructorId":194,"constructorRef":"mclaren-brm","name":"McLaren-BRM","nationality":"British","url":"http://en.wikipedia.org/wiki/McLaren_(racing)"} 193 | {"constructorId":195,"constructorRef":"cooper-brm","name":"Cooper-BRM","nationality":"British","url":"http://en.wikipedia.org/wiki/Cooper_Car_Company"} 194 | {"constructorId":196,"constructorRef":"matra-ford","name":"Matra-Ford","nationality":"French","url":"http://en.wikipedia.org/wiki/Matra"} 195 | {"constructorId":197,"constructorRef":"brm-ford","name":"BRM-Ford","nationality":"British","url":"http://en.wikipedia.org/wiki/BRM"} 196 | {"constructorId":198,"constructorRef":"mclaren-alfa_romeo","name":"McLaren-Alfa Romeo","nationality":"British","url":"http://en.wikipedia.org/wiki/McLaren_(racing)"} 197 | {"constructorId":199,"constructorRef":"march-alfa_romeo","name":"March-Alfa Romeo","nationality":"British","url":"http://en.wikipedia.org/wiki/March_Engineering"} 198 | {"constructorId":200,"constructorRef":"march-ford","name":"March-Ford","nationality":"British","url":"http://en.wikipedia.org/wiki/March_Engineering"} 199 | {"constructorId":201,"constructorRef":"lotus-pw","name":"Lotus-Pratt & Whitney","nationality":"British","url":"http://en.wikipedia.org/wiki/Team_Lotus"} 200 | {"constructorId":202,"constructorRef":"shadow-ford","name":"Shadow-Ford","nationality":"British","url":"http://en.wikipedia.org/wiki/Shadow_Racing_Cars"} 201 | {"constructorId":203,"constructorRef":"shadow-matra","name":"Shadow-Matra","nationality":"British","url":"http://en.wikipedia.org/wiki/Shadow_Racing_Cars"} 202 | {"constructorId":204,"constructorRef":"brabham-alfa_romeo","name":"Brabham-Alfa Romeo","nationality":"British","url":"http://en.wikipedia.org/wiki/Brabham"} 203 | {"constructorId":205,"constructorRef":"lotus_racing","name":"Lotus","nationality":"Malaysian","url":"http://en.wikipedia.org/wiki/Lotus_Racing"} 204 | {"constructorId":206,"constructorRef":"marussia","name":"Marussia","nationality":"Russian","url":"http://en.wikipedia.org/wiki/Marussia_F1"} 205 | {"constructorId":207,"constructorRef":"caterham","name":"Caterham","nationality":"Malaysian","url":"http://en.wikipedia.org/wiki/Caterham_F1"} 206 | {"constructorId":208,"constructorRef":"lotus_f1","name":"Lotus F1","nationality":"British","url":"http://en.wikipedia.org/wiki/Lotus_F1"} 207 | {"constructorId":209,"constructorRef":"manor","name":"Manor Marussia","nationality":"British","url":"http://en.wikipedia.org/wiki/Manor_Motorsport"} 208 | {"constructorId":210,"constructorRef":"haas","name":"Haas F1 Team","nationality":"American","url":"http://en.wikipedia.org/wiki/Haas_F1_Team"} 209 | {"constructorId":211,"constructorRef":"racing_point","name":"Racing Point","nationality":"British","url":"http://en.wikipedia.org/wiki/Racing_Point_F1_Team"} 210 | {"constructorId":213,"constructorRef":"alphatauri","name":"AlphaTauri","nationality":"Italian","url":"http://en.wikipedia.org/wiki/Scuderia_AlphaTauri"} 211 | {"constructorId":214,"constructorRef":"alpine","name":"Alpine F1 Team","nationality":"French","url":"http://en.wikipedia.org/wiki/Alpine_F1_Team"} -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-21/lap_times/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Incremental_load_data/2021-03-21/lap_times/.DS_Store -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-21/qualifying/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Incremental_load_data/2021-03-21/qualifying/.DS_Store -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-28/circuits.csv: -------------------------------------------------------------------------------- 1 | circuitId,circuitRef,name,location,country,lat,lng,alt,url 2 | 1,"albert_park","Albert Park Grand Prix Circuit","Melbourne","Australia",-37.8497,144.968,10,"http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit" 3 | 2,"sepang","Sepang International Circuit","Kuala Lumpur","Malaysia",2.76083,101.738,18,"http://en.wikipedia.org/wiki/Sepang_International_Circuit" 4 | 3,"bahrain","Bahrain International Circuit","Sakhir","Bahrain",26.0325,50.5106,7,"http://en.wikipedia.org/wiki/Bahrain_International_Circuit" 5 | 4,"catalunya","Circuit de Barcelona-Catalunya","Montmeló","Spain",41.57,2.26111,109,"http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya" 6 | 5,"istanbul","Istanbul Park","Istanbul","Turkey",40.9517,29.405,130,"http://en.wikipedia.org/wiki/Istanbul_Park" 7 | 6,"monaco","Circuit de Monaco","Monte-Carlo","Monaco",43.7347,7.42056,7,"http://en.wikipedia.org/wiki/Circuit_de_Monaco" 8 | 7,"villeneuve","Circuit Gilles Villeneuve","Montreal","Canada",45.5,-73.5228,13,"http://en.wikipedia.org/wiki/Circuit_Gilles_Villeneuve" 9 | 8,"magny_cours","Circuit de Nevers Magny-Cours","Magny Cours","France",46.8642,3.16361,228,"http://en.wikipedia.org/wiki/Circuit_de_Nevers_Magny-Cours" 10 | 9,"silverstone","Silverstone Circuit","Silverstone","UK",52.0786,-1.01694,153,"http://en.wikipedia.org/wiki/Silverstone_Circuit" 11 | 10,"hockenheimring","Hockenheimring","Hockenheim","Germany",49.3278,8.56583,103,"http://en.wikipedia.org/wiki/Hockenheimring" 12 | 11,"hungaroring","Hungaroring","Budapest","Hungary",47.5789,19.2486,264,"http://en.wikipedia.org/wiki/Hungaroring" 13 | 12,"valencia","Valencia Street Circuit","Valencia","Spain",39.4589,-0.331667,4,"http://en.wikipedia.org/wiki/Valencia_Street_Circuit" 14 | 13,"spa","Circuit de Spa-Francorchamps","Spa","Belgium",50.4372,5.97139,401,"http://en.wikipedia.org/wiki/Circuit_de_Spa-Francorchamps" 15 | 14,"monza","Autodromo Nazionale di Monza","Monza","Italy",45.6156,9.28111,162,"http://en.wikipedia.org/wiki/Autodromo_Nazionale_Monza" 16 | 15,"marina_bay","Marina Bay Street Circuit","Marina Bay","Singapore",1.2914,103.864,18,"http://en.wikipedia.org/wiki/Marina_Bay_Street_Circuit" 17 | 16,"fuji","Fuji Speedway","Oyama","Japan",35.3717,138.927,583,"http://en.wikipedia.org/wiki/Fuji_Speedway" 18 | 17,"shanghai","Shanghai International Circuit","Shanghai","China",31.3389,121.22,5,"http://en.wikipedia.org/wiki/Shanghai_International_Circuit" 19 | 18,"interlagos","Autódromo José Carlos Pace","São Paulo","Brazil",-23.7036,-46.6997,785,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Jos%C3%A9_Carlos_Pace" 20 | 19,"indianapolis","Indianapolis Motor Speedway","Indianapolis","USA",39.795,-86.2347,223,"http://en.wikipedia.org/wiki/Indianapolis_Motor_Speedway" 21 | 20,"nurburgring","Nürburgring","Nürburg","Germany",50.3356,6.9475,578,"http://en.wikipedia.org/wiki/N%C3%BCrburgring" 22 | 21,"imola","Autodromo Enzo e Dino Ferrari","Imola","Italy",44.3439,11.7167,37,"http://en.wikipedia.org/wiki/Autodromo_Enzo_e_Dino_Ferrari" 23 | 22,"suzuka","Suzuka Circuit","Suzuka","Japan",34.8431,136.541,45,"http://en.wikipedia.org/wiki/Suzuka_Circuit" 24 | 23,"osterreichring","A1-Ring","Spielburg","Austria",47.2197,14.7647,678,"http://en.wikipedia.org/wiki/A1-Ring" 25 | 24,"yas_marina","Yas Marina Circuit","Abu Dhabi","UAE",24.4672,54.6031,3,"http://en.wikipedia.org/wiki/Yas_Marina_Circuit" 26 | 25,"galvez","Autódromo Juan y Oscar Gálvez","Buenos Aires","Argentina",-34.6943,-58.4593,8,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Oscar_Alfredo_G%C3%A1lvez" 27 | 26,"jerez","Circuito de Jerez","Jerez de la Frontera","Spain",36.7083,-6.03417,37,"http://en.wikipedia.org/wiki/Circuito_Permanente_de_Jerez" 28 | 27,"estoril","Autódromo do Estoril","Estoril","Portugal",38.7506,-9.39417,130,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_do_Estoril" 29 | 28,"okayama","Okayama International Circuit","Okayama","Japan",34.915,134.221,266,"http://en.wikipedia.org/wiki/TI_Circuit" 30 | 29,"adelaide","Adelaide Street Circuit","Adelaide","Australia",-34.9272,138.617,58,"http://en.wikipedia.org/wiki/Adelaide_Street_Circuit" 31 | 30,"kyalami","Kyalami","Midrand","South Africa",-25.9894,28.0767,1460,"http://en.wikipedia.org/wiki/Kyalami" 32 | 31,"donington","Donington Park","Castle Donington","UK",52.8306,-1.37528,88,"http://en.wikipedia.org/wiki/Donington_Park" 33 | 32,"rodriguez","Autódromo Hermanos Rodríguez","Mexico City","Mexico",19.4042,-99.0907,2227,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Hermanos_Rodr%C3%ADguez" 34 | 33,"phoenix","Phoenix street circuit","Phoenix","USA",33.4479,-112.075,345,"http://en.wikipedia.org/wiki/Phoenix_street_circuit" 35 | 34,"ricard","Circuit Paul Ricard","Le Castellet","France",43.2506,5.79167,432,"http://en.wikipedia.org/wiki/Paul_Ricard_Circuit" 36 | 35,"yeongam","Korean International Circuit","Yeongam County","Korea",34.7333,126.417,0,"http://en.wikipedia.org/wiki/Korean_International_Circuit" 37 | 36,"jacarepagua","Autódromo Internacional Nelson Piquet","Rio de Janeiro","Brazil",-22.9756,-43.395,1126,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Internacional_Nelson_Piquet" 38 | 37,"detroit","Detroit Street Circuit","Detroit","USA",42.3298,-83.0401,177,"http://en.wikipedia.org/wiki/Detroit_street_circuit" 39 | 38,"brands_hatch","Brands Hatch","Kent","UK",51.3569,0.263056,145,"http://en.wikipedia.org/wiki/Brands_Hatch" 40 | 39,"zandvoort","Circuit Park Zandvoort","Zandvoort","Netherlands",52.3888,4.54092,6,"http://en.wikipedia.org/wiki/Circuit_Zandvoort" 41 | 40,"zolder","Zolder","Heusden-Zolder","Belgium",50.9894,5.25694,36,"http://en.wikipedia.org/wiki/Zolder" 42 | 41,"dijon","Dijon-Prenois","Dijon","France",47.3625,4.89913,484,"http://en.wikipedia.org/wiki/Dijon-Prenois" 43 | 42,"dallas","Fair Park","Dallas","USA",32.7774,-96.7587,139,"http://en.wikipedia.org/wiki/Fair_Park" 44 | 43,"long_beach","Long Beach","California","USA",33.7651,-118.189,12,"http://en.wikipedia.org/wiki/Long_Beach,_California" 45 | 44,"las_vegas","Las Vegas Street Circuit","Nevada","USA",36.1162,-115.174,639,"http://en.wikipedia.org/wiki/Las_Vegas,_Nevada" 46 | 45,"jarama","Jarama","Madrid","Spain",40.6171,-3.58558,609,"http://en.wikipedia.org/wiki/Circuito_Permanente_Del_Jarama" 47 | 46,"watkins_glen","Watkins Glen","New York State","USA",42.3369,-76.9272,485,"http://en.wikipedia.org/wiki/Watkins_Glen_International" 48 | 47,"anderstorp","Scandinavian Raceway","Anderstorp","Sweden",57.2653,13.6042,153,"http://en.wikipedia.org/wiki/Scandinavian_Raceway" 49 | 48,"mosport","Mosport International Raceway","Ontario","Canada",44.0481,-78.6756,332,"http://en.wikipedia.org/wiki/Mosport" 50 | 49,"montjuic","Montjuïc","Barcelona","Spain",41.3664,2.15167,79,"http://en.wikipedia.org/wiki/Montju%C3%AFc_circuit" 51 | 50,"nivelles","Nivelles-Baulers","Brussels","Belgium",50.6211,4.32694,139,"http://en.wikipedia.org/wiki/Nivelles-Baulers" 52 | 51,"charade","Charade Circuit","Clermont-Ferrand","France",45.7472,3.03889,790,"http://en.wikipedia.org/wiki/Charade_Circuit" 53 | 52,"tremblant","Circuit Mont-Tremblant","Quebec","Canada",46.1877,-74.6099,214,"http://en.wikipedia.org/wiki/Circuit_Mont-Tremblant" 54 | 53,"essarts","Rouen-Les-Essarts","Rouen","France",49.3306,1.00458,81,"http://en.wikipedia.org/wiki/Rouen-Les-Essarts" 55 | 54,"lemans","Le Mans","Le Mans","France",47.95,0.224231,67,"http://en.wikipedia.org/wiki/Circuit_de_la_Sarthe#Bugatti_Circuit" 56 | 55,"reims","Reims-Gueux","Reims","France",49.2542,3.93083,88,"http://en.wikipedia.org/wiki/Reims-Gueux" 57 | 56,"george","Prince George Circuit","Eastern Cape Province","South Africa",-33.0486,27.8736,15,"http://en.wikipedia.org/wiki/Prince_George_Circuit" 58 | 57,"zeltweg","Zeltweg","Styria","Austria",47.2039,14.7478,676,"http://en.wikipedia.org/wiki/Zeltweg_Airfield" 59 | 58,"aintree","Aintree","Liverpool","UK",53.4769,-2.94056,20,"http://en.wikipedia.org/wiki/Aintree_Motor_Racing_Circuit" 60 | 59,"boavista","Circuito da Boavista","Oporto","Portugal",41.1705,-8.67325,28,"http://en.wikipedia.org/wiki/Circuito_da_Boavista" 61 | 60,"riverside","Riverside International Raceway","California","USA",33.937,-117.273,470,"http://en.wikipedia.org/wiki/Riverside_International_Raceway" 62 | 61,"avus","AVUS","Berlin","Germany",52.4806,13.2514,53,"http://en.wikipedia.org/wiki/AVUS" 63 | 62,"monsanto","Monsanto Park Circuit","Lisbon","Portugal",38.7197,-9.20306,158,"http://en.wikipedia.org/wiki/Monsanto_Park_Circuit" 64 | 63,"sebring","Sebring International Raceway","Florida","USA",27.4547,-81.3483,18,"http://en.wikipedia.org/wiki/Sebring_Raceway" 65 | 64,"ain-diab","Ain Diab","Casablanca","Morocco",33.5786,-7.6875,19,"http://en.wikipedia.org/wiki/Ain-Diab_Circuit" 66 | 65,"pescara","Pescara Circuit","Pescara","Italy",42.475,14.1508,129,"http://en.wikipedia.org/wiki/Pescara_Circuit" 67 | 66,"bremgarten","Circuit Bremgarten","Bern","Switzerland",46.9589,7.40194,551,"http://en.wikipedia.org/wiki/Circuit_Bremgarten" 68 | 67,"pedralbes","Circuit de Pedralbes","Barcelona","Spain",41.3903,2.11667,85,"http://en.wikipedia.org/wiki/Pedralbes_Circuit" 69 | 68,"buddh","Buddh International Circuit","Uttar Pradesh","India",28.3487,77.5331,194,"http://en.wikipedia.org/wiki/Buddh_International_Circuit" 70 | 69,"americas","Circuit of the Americas","Austin","USA",30.1328,-97.6411,161,"http://en.wikipedia.org/wiki/Circuit_of_the_Americas" 71 | 70,"red_bull_ring","Red Bull Ring","Spielburg","Austria",47.2197,14.7647,678,"http://en.wikipedia.org/wiki/Red_Bull_Ring" 72 | 71,"sochi","Sochi Autodrom","Sochi","Russia",43.4057,39.9578,2,"http://en.wikipedia.org/wiki/Sochi_Autodrom" 73 | 72,"port_imperial","Port Imperial Street Circuit","New Jersey","USA",40.7769,-74.0111,4,"http://en.wikipedia.org/wiki/Port_Imperial_Street_Circuit" 74 | 73,"BAK","Baku City Circuit","Baku","Azerbaijan",40.3725,49.8533,-7,"http://en.wikipedia.org/wiki/Baku_City_Circuit" 75 | 74,"hanoi","Hanoi Street Circuit","Hanoi","Vietnam",21.0166,105.766,9,"http://en.wikipedia.org/wiki/Hanoi_Street_Circuit" 76 | 75,"portimao","Autódromo Internacional do Algarve","Portimão","Portugal",37.227,-8.6267,108,"http://en.wikipedia.org/wiki/Algarve_International_Circuit" 77 | 76,"mugello","Autodromo Internazionale del Mugello","Mugello","Italy",43.9975,11.3719,255,"http://en.wikipedia.org/wiki/Mugello_Circuit" 78 | 77,"jeddah","Jeddah Street Circuit","Jeddah","Saudi Arabia",21.5433,39.1728,15,"http://en.wikipedia.org/wiki/Jeddah_Street_Circuit" 79 | -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-28/pit_stops.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "raceId": 1052, 4 | "driverId": 815, 5 | "stop": 1, 6 | "lap": 2, 7 | "time": "18:11:56", 8 | "duration": 23.993, 9 | "milliseconds": 23993 10 | }, 11 | { 12 | "raceId": 1052, 13 | "driverId": 842, 14 | "stop": 1, 15 | "lap": 4, 16 | "time": "18:16:14", 17 | "duration": 38.338, 18 | "milliseconds": 38338 19 | }, 20 | { 21 | "raceId": 1052, 22 | "driverId": 4, 23 | "stop": 1, 24 | "lap": 11, 25 | "time": "18:27:32", 26 | "duration": 24.373, 27 | "milliseconds": 24373 28 | }, 29 | { 30 | "raceId": 1052, 31 | "driverId": 846, 32 | "stop": 1, 33 | "lap": 12, 34 | "time": "18:29:05", 35 | "duration": 24.899, 36 | "milliseconds": 24899 37 | }, 38 | { 39 | "raceId": 1052, 40 | "driverId": 844, 41 | "stop": 1, 42 | "lap": 12, 43 | "time": "18:29:06", 44 | "duration": 24.925, 45 | "milliseconds": 24925 46 | }, 47 | { 48 | "raceId": 1052, 49 | "driverId": 840, 50 | "stop": 1, 51 | "lap": 12, 52 | "time": "18:29:09", 53 | "duration": 24.884, 54 | "milliseconds": 24884 55 | }, 56 | { 57 | "raceId": 1052, 58 | "driverId": 841, 59 | "stop": 1, 60 | "lap": 12, 61 | "time": "18:29:14", 62 | "duration": 31.998, 63 | "milliseconds": 31998 64 | }, 65 | { 66 | "raceId": 1052, 67 | "driverId": 1, 68 | "stop": 1, 69 | "lap": 13, 70 | "time": "18:30:29", 71 | "duration": 24.839, 72 | "milliseconds": 24839 73 | }, 74 | { 75 | "raceId": 1052, 76 | "driverId": 817, 77 | "stop": 1, 78 | "lap": 13, 79 | "time": "18:30:45", 80 | "duration": 24.688, 81 | "milliseconds": 24688 82 | }, 83 | { 84 | "raceId": 1052, 85 | "driverId": 8, 86 | "stop": 1, 87 | "lap": 13, 88 | "time": "18:30:52", 89 | "duration": 24.107, 90 | "milliseconds": 24107 91 | }, 92 | { 93 | "raceId": 1052, 94 | "driverId": 839, 95 | "stop": 1, 96 | "lap": 13, 97 | "time": "18:30:55", 98 | "duration": 25.226, 99 | "milliseconds": 25226 100 | }, 101 | { 102 | "raceId": 1052, 103 | "driverId": 847, 104 | "stop": 1, 105 | "lap": 13, 106 | "time": "18:31:02", 107 | "duration": 24.621, 108 | "milliseconds": 24621 109 | }, 110 | { 111 | "raceId": 1052, 112 | "driverId": 849, 113 | "stop": 1, 114 | "lap": 14, 115 | "time": "18:32:44", 116 | "duration": 26.046, 117 | "milliseconds": 26046 118 | }, 119 | { 120 | "raceId": 1052, 121 | "driverId": 854, 122 | "stop": 1, 123 | "lap": 14, 124 | "time": "18:32:58", 125 | "duration": 25.798, 126 | "milliseconds": 25798 127 | }, 128 | { 129 | "raceId": 1052, 130 | "driverId": 832, 131 | "stop": 1, 132 | "lap": 15, 133 | "time": "18:34:04", 134 | "duration": 24.353, 135 | "milliseconds": 24353 136 | }, 137 | { 138 | "raceId": 1052, 139 | "driverId": 852, 140 | "stop": 1, 141 | "lap": 15, 142 | "time": "18:34:12", 143 | "duration": 25.046, 144 | "milliseconds": 25046 145 | }, 146 | { 147 | "raceId": 1052, 148 | "driverId": 822, 149 | "stop": 1, 150 | "lap": 16, 151 | "time": "18:35:24", 152 | "duration": 24.262, 153 | "milliseconds": 24262 154 | }, 155 | { 156 | "raceId": 1052, 157 | "driverId": 830, 158 | "stop": 1, 159 | "lap": 17, 160 | "time": "18:36:54", 161 | "duration": 24.767, 162 | "milliseconds": 24767 163 | }, 164 | { 165 | "raceId": 1052, 166 | "driverId": 815, 167 | "stop": 2, 168 | "lap": 19, 169 | "time": "18:40:35", 170 | "duration": 24.105, 171 | "milliseconds": 24105 172 | }, 173 | { 174 | "raceId": 1052, 175 | "driverId": 842, 176 | "stop": 2, 177 | "lap": 19, 178 | "time": "18:41:17", 179 | "duration": 24.317, 180 | "milliseconds": 24317 181 | }, 182 | { 183 | "raceId": 1052, 184 | "driverId": 20, 185 | "stop": 1, 186 | "lap": 24, 187 | "time": "18:49:08", 188 | "duration": 24.626, 189 | "milliseconds": 24626 190 | }, 191 | { 192 | "raceId": 1052, 193 | "driverId": 1, 194 | "stop": 2, 195 | "lap": 28, 196 | "time": "18:54:40", 197 | "duration": 24.076, 198 | "milliseconds": 24076 199 | }, 200 | { 201 | "raceId": 1052, 202 | "driverId": 840, 203 | "stop": 2, 204 | "lap": 28, 205 | "time": "18:55:23", 206 | "duration": 25.525, 207 | "milliseconds": 25525 208 | }, 209 | { 210 | "raceId": 1052, 211 | "driverId": 8, 212 | "stop": 2, 213 | "lap": 29, 214 | "time": "18:57:06", 215 | "duration": 24.046, 216 | "milliseconds": 24046 217 | }, 218 | { 219 | "raceId": 1052, 220 | "driverId": 4, 221 | "stop": 2, 222 | "lap": 29, 223 | "time": "18:57:13", 224 | "duration": 24.775, 225 | "milliseconds": 24775 226 | }, 227 | { 228 | "raceId": 1052, 229 | "driverId": 822, 230 | "stop": 2, 231 | "lap": 30, 232 | "time": "18:57:58", 233 | "duration": 32.897, 234 | "milliseconds": 32897 235 | }, 236 | { 237 | "raceId": 1052, 238 | "driverId": 841, 239 | "stop": 2, 240 | "lap": 30, 241 | "time": "18:58:53", 242 | "duration": 24.223, 243 | "milliseconds": 24223 244 | }, 245 | { 246 | "raceId": 1052, 247 | "driverId": 839, 248 | "stop": 2, 249 | "lap": 31, 250 | "time": "19:00:29", 251 | "duration": 24.471, 252 | "milliseconds": 24471 253 | }, 254 | { 255 | "raceId": 1052, 256 | "driverId": 844, 257 | "stop": 2, 258 | "lap": 32, 259 | "time": "19:01:40", 260 | "duration": 24.176, 261 | "milliseconds": 24176 262 | }, 263 | { 264 | "raceId": 1052, 265 | "driverId": 817, 266 | "stop": 2, 267 | "lap": 32, 268 | "time": "19:01:44", 269 | "duration": 24.655, 270 | "milliseconds": 24655 271 | }, 272 | { 273 | "raceId": 1052, 274 | "driverId": 849, 275 | "stop": 2, 276 | "lap": 32, 277 | "time": "19:02:32", 278 | "duration": 23.983, 279 | "milliseconds": 23983 280 | }, 281 | { 282 | "raceId": 1052, 283 | "driverId": 846, 284 | "stop": 2, 285 | "lap": 33, 286 | "time": "19:03:11", 287 | "duration": 25.64, 288 | "milliseconds": 25640 289 | }, 290 | { 291 | "raceId": 1052, 292 | "driverId": 852, 293 | "stop": 2, 294 | "lap": 33, 295 | "time": "19:03:36", 296 | "duration": 24.328, 297 | "milliseconds": 24328 298 | }, 299 | { 300 | "raceId": 1052, 301 | "driverId": 854, 302 | "stop": 2, 303 | "lap": 33, 304 | "time": "19:04:27", 305 | "duration": 25.343, 306 | "milliseconds": 25343 307 | }, 308 | { 309 | "raceId": 1052, 310 | "driverId": 847, 311 | "stop": 2, 312 | "lap": 36, 313 | "time": "19:08:48", 314 | "duration": 24.248, 315 | "milliseconds": 24248 316 | }, 317 | { 318 | "raceId": 1052, 319 | "driverId": 832, 320 | "stop": 2, 321 | "lap": 37, 322 | "time": "19:09:53", 323 | "duration": 24.341, 324 | "milliseconds": 24341 325 | }, 326 | { 327 | "raceId": 1052, 328 | "driverId": 815, 329 | "stop": 3, 330 | "lap": 38, 331 | "time": "19:11:20", 332 | "duration": 24.191, 333 | "milliseconds": 24191 334 | }, 335 | { 336 | "raceId": 1052, 337 | "driverId": 830, 338 | "stop": 2, 339 | "lap": 39, 340 | "time": "19:12:13", 341 | "duration": 23.848, 342 | "milliseconds": 23848 343 | }, 344 | { 345 | "raceId": 1052, 346 | "driverId": 842, 347 | "stop": 3, 348 | "lap": 39, 349 | "time": "19:13:55", 350 | "duration": 24.983, 351 | "milliseconds": 24983 352 | }, 353 | { 354 | "raceId": 1052, 355 | "driverId": 822, 356 | "stop": 3, 357 | "lap": 54, 358 | "time": "19:36:21", 359 | "duration": 24.566, 360 | "milliseconds": 24566 361 | } 362 | ] -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-28/qualifying/qualifying_split_1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "qualifyId": 8735, 4 | "raceId": 1052, 5 | "driverId": 830, 6 | "constructorId": 9, 7 | "number": 33, 8 | "position": 1, 9 | "q1": "1:30.499", 10 | "q2": "1:30.318", 11 | "q3": "1:28.997" 12 | }, 13 | { 14 | "qualifyId": 8736, 15 | "raceId": 1052, 16 | "driverId": 1, 17 | "constructorId": 131, 18 | "number": 44, 19 | "position": 2, 20 | "q1": "1:30.617", 21 | "q2": "1:30.085", 22 | "q3": "1:29.385" 23 | }, 24 | { 25 | "qualifyId": 8737, 26 | "raceId": 1052, 27 | "driverId": 822, 28 | "constructorId": 131, 29 | "number": 77, 30 | "position": 3, 31 | "q1": "1:31.200", 32 | "q2": "1:30.186", 33 | "q3": "1:29.586" 34 | }, 35 | { 36 | "qualifyId": 8738, 37 | "raceId": 1052, 38 | "driverId": 844, 39 | "constructorId": 6, 40 | "number": 16, 41 | "position": 4, 42 | "q1": "1:30.691", 43 | "q2": "1:30.010", 44 | "q3": "1:29.678" 45 | }, 46 | { 47 | "qualifyId": 8739, 48 | "raceId": 1052, 49 | "driverId": 842, 50 | "constructorId": 213, 51 | "number": 10, 52 | "position": 5, 53 | "q1": "1:30.848", 54 | "q2": "1:30.513", 55 | "q3": "1:29.809" 56 | }, 57 | { 58 | "qualifyId": 8740, 59 | "raceId": 1052, 60 | "driverId": 817, 61 | "constructorId": 1, 62 | "number": 3, 63 | "position": 6, 64 | "q1": "1:30.795", 65 | "q2": "1:30.222", 66 | "q3": "1:29.927" 67 | }, 68 | { 69 | "qualifyId": 8741, 70 | "raceId": 1052, 71 | "driverId": 846, 72 | "constructorId": 1, 73 | "number": 4, 74 | "position": 7, 75 | "q1": "1:30.902", 76 | "q2": "1:30.099", 77 | "q3": "1:29.974" 78 | }, 79 | { 80 | "qualifyId": 8742, 81 | "raceId": 1052, 82 | "driverId": 832, 83 | "constructorId": 6, 84 | "number": 55, 85 | "position": 8, 86 | "q1": "1:31.653", 87 | "q2": "1:30.009", 88 | "q3": "1:30.215" 89 | }, 90 | { 91 | "qualifyId": 8743, 92 | "raceId": 1052, 93 | "driverId": 4, 94 | "constructorId": 214, 95 | "number": 14, 96 | "position": 9, 97 | "q1": "1:30.863", 98 | "q2": "1:30.595", 99 | "q3": "1:30.249" 100 | }, 101 | { 102 | "qualifyId": 8744, 103 | "raceId": 1052, 104 | "driverId": 840, 105 | "constructorId": 117, 106 | "number": 18, 107 | "position": 10, 108 | "q1": "1:31.261", 109 | "q2": "1:30.624", 110 | "q3": "1:30.601" 111 | }, 112 | { 113 | "qualifyId": 8745, 114 | "raceId": 1052, 115 | "driverId": 815, 116 | "constructorId": 9, 117 | "number": 11, 118 | "position": 11, 119 | "q1": "1:31.165", 120 | "q2": "1:30.659", 121 | "q3": "\\N" 122 | }, 123 | { 124 | "qualifyId": 8746, 125 | "raceId": 1052, 126 | "driverId": 841, 127 | "constructorId": 51, 128 | "number": 99, 129 | "position": 12, 130 | "q1": "1:30.998", 131 | "q2": "1:30.708", 132 | "q3": "\\N" 133 | }, 134 | { 135 | "qualifyId": 8747, 136 | "raceId": 1052, 137 | "driverId": 852, 138 | "constructorId": 213, 139 | "number": 22, 140 | "position": 13, 141 | "q1": "1:30.607", 142 | "q2": "1:31.203", 143 | "q3": "\\N" 144 | }, 145 | { 146 | "qualifyId": 8748, 147 | "raceId": 1052, 148 | "driverId": 8, 149 | "constructorId": 51, 150 | "number": 7, 151 | "position": 14, 152 | "q1": "1:31.547", 153 | "q2": "1:31.238", 154 | "q3": "\\N" 155 | }, 156 | { 157 | "qualifyId": 8749, 158 | "raceId": 1052, 159 | "driverId": 847, 160 | "constructorId": 3, 161 | "number": 63, 162 | "position": 15, 163 | "q1": "1:31.316", 164 | "q2": "1:33.430", 165 | "q3": "\\N" 166 | }, 167 | { 168 | "qualifyId": 8750, 169 | "raceId": 1052, 170 | "driverId": 839, 171 | "constructorId": 214, 172 | "number": 31, 173 | "position": 16, 174 | "q1": "1:31.724", 175 | "q2": "\\N", 176 | "q3": "\\N" 177 | }, 178 | { 179 | "qualifyId": 8751, 180 | "raceId": 1052, 181 | "driverId": 849, 182 | "constructorId": 3, 183 | "number": 6, 184 | "position": 17, 185 | "q1": "1:31.936", 186 | "q2": "\\N", 187 | "q3": "\\N" 188 | }, 189 | { 190 | "qualifyId": 8752, 191 | "raceId": 1052, 192 | "driverId": 20, 193 | "constructorId": 117, 194 | "number": 5, 195 | "position": 18, 196 | "q1": "1:32.056", 197 | "q2": "\\N", 198 | "q3": "\\N" 199 | }, 200 | { 201 | "qualifyId": 8753, 202 | "raceId": 1052, 203 | "driverId": 854, 204 | "constructorId": 210, 205 | "number": 47, 206 | "position": 19, 207 | "q1": "1:32.449", 208 | "q2": "\\N", 209 | "q3": "\\N" 210 | }, 211 | { 212 | "qualifyId": 8754, 213 | "raceId": 1052, 214 | "driverId": 853, 215 | "constructorId": 210, 216 | "number": 9, 217 | "position": 20, 218 | "q1": "1:33.273", 219 | "q2": "\\N", 220 | "q3": "\\N" 221 | } 222 | ] -------------------------------------------------------------------------------- /Incremental_load_data/2021-03-28/results.json: -------------------------------------------------------------------------------- 1 | {"resultId":24966,"raceId":1052,"driverId":1,"constructorId":131,"number":44,"grid":2,"position":1,"positionText":1,"positionOrder":1,"points":25,"laps":56,"time":"1:32:03.897","milliseconds":5523897,"fastestLap":44,"rank":4,"fastestLapTime":"1:34.015","fastestLapSpeed":207.235,"statusId":1} 2 | {"resultId":24967,"raceId":1052,"driverId":830,"constructorId":9,"number":33,"grid":1,"position":2,"positionText":2,"positionOrder":2,"points":18,"laps":56,"time":"+0.745","milliseconds":5524642,"fastestLap":41,"rank":2,"fastestLapTime":"1:33.228","fastestLapSpeed":208.984,"statusId":1} 3 | {"resultId":24968,"raceId":1052,"driverId":822,"constructorId":131,"number":77,"grid":3,"position":3,"positionText":3,"positionOrder":3,"points":16,"laps":56,"time":"+37.383","milliseconds":5561280,"fastestLap":56,"rank":1,"fastestLapTime":"1:32.090","fastestLapSpeed":211.566,"statusId":1} 4 | {"resultId":24969,"raceId":1052,"driverId":846,"constructorId":1,"number":4,"grid":7,"position":4,"positionText":4,"positionOrder":4,"points":12,"laps":56,"time":"+46.466","milliseconds":5570363,"fastestLap":38,"rank":6,"fastestLapTime":"1:34.396","fastestLapSpeed":206.398,"statusId":1} 5 | {"resultId":24970,"raceId":1052,"driverId":815,"constructorId":9,"number":11,"grid":0,"position":5,"positionText":5,"positionOrder":5,"points":10,"laps":56,"time":"+52.047","milliseconds":5575944,"fastestLap":44,"rank":3,"fastestLapTime":"1:33.970","fastestLapSpeed":207.334,"statusId":1} 6 | {"resultId":24971,"raceId":1052,"driverId":844,"constructorId":6,"number":16,"grid":4,"position":6,"positionText":6,"positionOrder":6,"points":8,"laps":56,"time":"+59.090","milliseconds":5582987,"fastestLap":39,"rank":11,"fastestLapTime":"1:34.988","fastestLapSpeed":205.112,"statusId":1} 7 | {"resultId":24972,"raceId":1052,"driverId":817,"constructorId":1,"number":3,"grid":6,"position":7,"positionText":7,"positionOrder":7,"points":6,"laps":56,"time":"+66.004","milliseconds":5589901,"fastestLap":36,"rank":10,"fastestLapTime":"1:34.932","fastestLapSpeed":205.233,"statusId":1} 8 | {"resultId":24973,"raceId":1052,"driverId":832,"constructorId":6,"number":55,"grid":8,"position":8,"positionText":8,"positionOrder":8,"points":4,"laps":56,"time":"+67.100","milliseconds":5590997,"fastestLap":48,"rank":7,"fastestLapTime":"1:34.509","fastestLapSpeed":206.151,"statusId":1} 9 | {"resultId":24974,"raceId":1052,"driverId":852,"constructorId":213,"number":22,"grid":13,"position":9,"positionText":9,"positionOrder":9,"points":2,"laps":56,"time":"+85.692","milliseconds":5609589,"fastestLap":38,"rank":8,"fastestLapTime":"1:34.761","fastestLapSpeed":205.603,"statusId":1} 10 | {"resultId":24975,"raceId":1052,"driverId":840,"constructorId":117,"number":18,"grid":10,"position":10,"positionText":10,"positionOrder":10,"points":1,"laps":56,"time":"+86.713","milliseconds":5610610,"fastestLap":31,"rank":9,"fastestLapTime":"1:34.865","fastestLapSpeed":205.378,"statusId":1} 11 | {"resultId":24976,"raceId":1052,"driverId":8,"constructorId":51,"number":7,"grid":14,"position":11,"positionText":11,"positionOrder":11,"points":0,"laps":56,"time":"+88.864","milliseconds":5612761,"fastestLap":45,"rank":14,"fastestLapTime":"1:35.192","fastestLapSpeed":204.672,"statusId":1} 12 | {"resultId":24977,"raceId":1052,"driverId":841,"constructorId":51,"number":99,"grid":12,"position":12,"positionText":12,"positionOrder":12,"points":0,"laps":55,"time":"\\N","milliseconds":"\\N","fastestLap":32,"rank":13,"fastestLapTime":"1:35.122","fastestLapSpeed":204.823,"statusId":11} 13 | {"resultId":24978,"raceId":1052,"driverId":839,"constructorId":214,"number":31,"grid":16,"position":13,"positionText":13,"positionOrder":13,"points":0,"laps":55,"time":"\\N","milliseconds":"\\N","fastestLap":33,"rank":15,"fastestLapTime":"1:35.250","fastestLapSpeed":204.548,"statusId":11} 14 | {"resultId":24979,"raceId":1052,"driverId":847,"constructorId":3,"number":63,"grid":15,"position":14,"positionText":14,"positionOrder":14,"points":0,"laps":55,"time":"\\N","milliseconds":"\\N","fastestLap":40,"rank":12,"fastestLapTime":"1:35.036","fastestLapSpeed":205.008,"statusId":11} 15 | {"resultId":24980,"raceId":1052,"driverId":20,"constructorId":117,"number":5,"grid":20,"position":15,"positionText":15,"positionOrder":15,"points":0,"laps":55,"time":"\\N","milliseconds":"\\N","fastestLap":26,"rank":16,"fastestLapTime":"1:35.566","fastestLapSpeed":203.871,"statusId":11} 16 | {"resultId":24981,"raceId":1052,"driverId":854,"constructorId":210,"number":47,"grid":18,"position":16,"positionText":16,"positionOrder":16,"points":0,"laps":55,"time":"\\N","milliseconds":"\\N","fastestLap":38,"rank":18,"fastestLapTime":"1:36.134","fastestLapSpeed":202.667,"statusId":11} 17 | {"resultId":24982,"raceId":1052,"driverId":842,"constructorId":213,"number":10,"grid":5,"position":17,"positionText":17,"positionOrder":17,"points":0,"laps":52,"time":"\\N","milliseconds":"\\N","fastestLap":48,"rank":5,"fastestLapTime":"1:34.090","fastestLapSpeed":207.069,"statusId":31} 18 | {"resultId":24983,"raceId":1052,"driverId":849,"constructorId":3,"number":6,"grid":17,"position":18,"positionText":18,"positionOrder":18,"points":0,"laps":51,"time":"\\N","milliseconds":"\\N","fastestLap":16,"rank":19,"fastestLapTime":"1:36.602","fastestLapSpeed":201.685,"statusId":31} 19 | {"resultId":24984,"raceId":1052,"driverId":4,"constructorId":214,"number":14,"grid":9,"position":"\\N","positionText":"R","positionOrder":19,"points":0,"laps":32,"time":"\\N","milliseconds":"\\N","fastestLap":31,"rank":17,"fastestLapTime":"1:36.063","fastestLapSpeed":202.816,"statusId":23} 20 | {"resultId":24985,"raceId":1052,"driverId":853,"constructorId":210,"number":9,"grid":19,"position":"\\N","positionText":"R","positionOrder":20,"points":0,"laps":0,"time":"\\N","milliseconds":"\\N","fastestLap":"\\N","rank":0,"fastestLapTime":"\\N","fastestLapSpeed":"\\N","statusId":3} -------------------------------------------------------------------------------- /Incremental_load_data/2021-04-18/circuits.csv: -------------------------------------------------------------------------------- 1 | circuitId,circuitRef,name,location,country,lat,lng,alt,url 2 | 1,"albert_park","Albert Park Grand Prix Circuit","Melbourne","Australia",-37.8497,144.968,10,"http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit" 3 | 2,"sepang","Sepang International Circuit","Kuala Lumpur","Malaysia",2.76083,101.738,18,"http://en.wikipedia.org/wiki/Sepang_International_Circuit" 4 | 3,"bahrain","Bahrain International Circuit","Sakhir","Bahrain",26.0325,50.5106,7,"http://en.wikipedia.org/wiki/Bahrain_International_Circuit" 5 | 4,"catalunya","Circuit de Barcelona-Catalunya","Montmeló","Spain",41.57,2.26111,109,"http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya" 6 | 5,"istanbul","Istanbul Park","Istanbul","Turkey",40.9517,29.405,130,"http://en.wikipedia.org/wiki/Istanbul_Park" 7 | 6,"monaco","Circuit de Monaco","Monte-Carlo","Monaco",43.7347,7.42056,7,"http://en.wikipedia.org/wiki/Circuit_de_Monaco" 8 | 7,"villeneuve","Circuit Gilles Villeneuve","Montreal","Canada",45.5,-73.5228,13,"http://en.wikipedia.org/wiki/Circuit_Gilles_Villeneuve" 9 | 8,"magny_cours","Circuit de Nevers Magny-Cours","Magny Cours","France",46.8642,3.16361,228,"http://en.wikipedia.org/wiki/Circuit_de_Nevers_Magny-Cours" 10 | 9,"silverstone","Silverstone Circuit","Silverstone","UK",52.0786,-1.01694,153,"http://en.wikipedia.org/wiki/Silverstone_Circuit" 11 | 10,"hockenheimring","Hockenheimring","Hockenheim","Germany",49.3278,8.56583,103,"http://en.wikipedia.org/wiki/Hockenheimring" 12 | 11,"hungaroring","Hungaroring","Budapest","Hungary",47.5789,19.2486,264,"http://en.wikipedia.org/wiki/Hungaroring" 13 | 12,"valencia","Valencia Street Circuit","Valencia","Spain",39.4589,-0.331667,4,"http://en.wikipedia.org/wiki/Valencia_Street_Circuit" 14 | 13,"spa","Circuit de Spa-Francorchamps","Spa","Belgium",50.4372,5.97139,401,"http://en.wikipedia.org/wiki/Circuit_de_Spa-Francorchamps" 15 | 14,"monza","Autodromo Nazionale di Monza","Monza","Italy",45.6156,9.28111,162,"http://en.wikipedia.org/wiki/Autodromo_Nazionale_Monza" 16 | 15,"marina_bay","Marina Bay Street Circuit","Marina Bay","Singapore",1.2914,103.864,18,"http://en.wikipedia.org/wiki/Marina_Bay_Street_Circuit" 17 | 16,"fuji","Fuji Speedway","Oyama","Japan",35.3717,138.927,583,"http://en.wikipedia.org/wiki/Fuji_Speedway" 18 | 17,"shanghai","Shanghai International Circuit","Shanghai","China",31.3389,121.22,5,"http://en.wikipedia.org/wiki/Shanghai_International_Circuit" 19 | 18,"interlagos","Autódromo José Carlos Pace","São Paulo","Brazil",-23.7036,-46.6997,785,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Jos%C3%A9_Carlos_Pace" 20 | 19,"indianapolis","Indianapolis Motor Speedway","Indianapolis","USA",39.795,-86.2347,223,"http://en.wikipedia.org/wiki/Indianapolis_Motor_Speedway" 21 | 20,"nurburgring","Nürburgring","Nürburg","Germany",50.3356,6.9475,578,"http://en.wikipedia.org/wiki/N%C3%BCrburgring" 22 | 21,"imola","Autodromo Enzo e Dino Ferrari","Imola","Italy",44.3439,11.7167,37,"http://en.wikipedia.org/wiki/Autodromo_Enzo_e_Dino_Ferrari" 23 | 22,"suzuka","Suzuka Circuit","Suzuka","Japan",34.8431,136.541,45,"http://en.wikipedia.org/wiki/Suzuka_Circuit" 24 | 23,"osterreichring","A1-Ring","Spielburg","Austria",47.2197,14.7647,678,"http://en.wikipedia.org/wiki/A1-Ring" 25 | 24,"yas_marina","Yas Marina Circuit","Abu Dhabi","UAE",24.4672,54.6031,3,"http://en.wikipedia.org/wiki/Yas_Marina_Circuit" 26 | 25,"galvez","Autódromo Juan y Oscar Gálvez","Buenos Aires","Argentina",-34.6943,-58.4593,8,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Oscar_Alfredo_G%C3%A1lvez" 27 | 26,"jerez","Circuito de Jerez","Jerez de la Frontera","Spain",36.7083,-6.03417,37,"http://en.wikipedia.org/wiki/Circuito_Permanente_de_Jerez" 28 | 27,"estoril","Autódromo do Estoril","Estoril","Portugal",38.7506,-9.39417,130,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_do_Estoril" 29 | 28,"okayama","Okayama International Circuit","Okayama","Japan",34.915,134.221,266,"http://en.wikipedia.org/wiki/TI_Circuit" 30 | 29,"adelaide","Adelaide Street Circuit","Adelaide","Australia",-34.9272,138.617,58,"http://en.wikipedia.org/wiki/Adelaide_Street_Circuit" 31 | 30,"kyalami","Kyalami","Midrand","South Africa",-25.9894,28.0767,1460,"http://en.wikipedia.org/wiki/Kyalami" 32 | 31,"donington","Donington Park","Castle Donington","UK",52.8306,-1.37528,88,"http://en.wikipedia.org/wiki/Donington_Park" 33 | 32,"rodriguez","Autódromo Hermanos Rodríguez","Mexico City","Mexico",19.4042,-99.0907,2227,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Hermanos_Rodr%C3%ADguez" 34 | 33,"phoenix","Phoenix street circuit","Phoenix","USA",33.4479,-112.075,345,"http://en.wikipedia.org/wiki/Phoenix_street_circuit" 35 | 34,"ricard","Circuit Paul Ricard","Le Castellet","France",43.2506,5.79167,432,"http://en.wikipedia.org/wiki/Paul_Ricard_Circuit" 36 | 35,"yeongam","Korean International Circuit","Yeongam County","Korea",34.7333,126.417,0,"http://en.wikipedia.org/wiki/Korean_International_Circuit" 37 | 36,"jacarepagua","Autódromo Internacional Nelson Piquet","Rio de Janeiro","Brazil",-22.9756,-43.395,1126,"http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Internacional_Nelson_Piquet" 38 | 37,"detroit","Detroit Street Circuit","Detroit","USA",42.3298,-83.0401,177,"http://en.wikipedia.org/wiki/Detroit_street_circuit" 39 | 38,"brands_hatch","Brands Hatch","Kent","UK",51.3569,0.263056,145,"http://en.wikipedia.org/wiki/Brands_Hatch" 40 | 39,"zandvoort","Circuit Park Zandvoort","Zandvoort","Netherlands",52.3888,4.54092,6,"http://en.wikipedia.org/wiki/Circuit_Zandvoort" 41 | 40,"zolder","Zolder","Heusden-Zolder","Belgium",50.9894,5.25694,36,"http://en.wikipedia.org/wiki/Zolder" 42 | 41,"dijon","Dijon-Prenois","Dijon","France",47.3625,4.89913,484,"http://en.wikipedia.org/wiki/Dijon-Prenois" 43 | 42,"dallas","Fair Park","Dallas","USA",32.7774,-96.7587,139,"http://en.wikipedia.org/wiki/Fair_Park" 44 | 43,"long_beach","Long Beach","California","USA",33.7651,-118.189,12,"http://en.wikipedia.org/wiki/Long_Beach,_California" 45 | 44,"las_vegas","Las Vegas Street Circuit","Nevada","USA",36.1162,-115.174,639,"http://en.wikipedia.org/wiki/Las_Vegas,_Nevada" 46 | 45,"jarama","Jarama","Madrid","Spain",40.6171,-3.58558,609,"http://en.wikipedia.org/wiki/Circuito_Permanente_Del_Jarama" 47 | 46,"watkins_glen","Watkins Glen","New York State","USA",42.3369,-76.9272,485,"http://en.wikipedia.org/wiki/Watkins_Glen_International" 48 | 47,"anderstorp","Scandinavian Raceway","Anderstorp","Sweden",57.2653,13.6042,153,"http://en.wikipedia.org/wiki/Scandinavian_Raceway" 49 | 48,"mosport","Mosport International Raceway","Ontario","Canada",44.0481,-78.6756,332,"http://en.wikipedia.org/wiki/Mosport" 50 | 49,"montjuic","Montjuïc","Barcelona","Spain",41.3664,2.15167,79,"http://en.wikipedia.org/wiki/Montju%C3%AFc_circuit" 51 | 50,"nivelles","Nivelles-Baulers","Brussels","Belgium",50.6211,4.32694,139,"http://en.wikipedia.org/wiki/Nivelles-Baulers" 52 | 51,"charade","Charade Circuit","Clermont-Ferrand","France",45.7472,3.03889,790,"http://en.wikipedia.org/wiki/Charade_Circuit" 53 | 52,"tremblant","Circuit Mont-Tremblant","Quebec","Canada",46.1877,-74.6099,214,"http://en.wikipedia.org/wiki/Circuit_Mont-Tremblant" 54 | 53,"essarts","Rouen-Les-Essarts","Rouen","France",49.3306,1.00458,81,"http://en.wikipedia.org/wiki/Rouen-Les-Essarts" 55 | 54,"lemans","Le Mans","Le Mans","France",47.95,0.224231,67,"http://en.wikipedia.org/wiki/Circuit_de_la_Sarthe#Bugatti_Circuit" 56 | 55,"reims","Reims-Gueux","Reims","France",49.2542,3.93083,88,"http://en.wikipedia.org/wiki/Reims-Gueux" 57 | 56,"george","Prince George Circuit","Eastern Cape Province","South Africa",-33.0486,27.8736,15,"http://en.wikipedia.org/wiki/Prince_George_Circuit" 58 | 57,"zeltweg","Zeltweg","Styria","Austria",47.2039,14.7478,676,"http://en.wikipedia.org/wiki/Zeltweg_Airfield" 59 | 58,"aintree","Aintree","Liverpool","UK",53.4769,-2.94056,20,"http://en.wikipedia.org/wiki/Aintree_Motor_Racing_Circuit" 60 | 59,"boavista","Circuito da Boavista","Oporto","Portugal",41.1705,-8.67325,28,"http://en.wikipedia.org/wiki/Circuito_da_Boavista" 61 | 60,"riverside","Riverside International Raceway","California","USA",33.937,-117.273,470,"http://en.wikipedia.org/wiki/Riverside_International_Raceway" 62 | 61,"avus","AVUS","Berlin","Germany",52.4806,13.2514,53,"http://en.wikipedia.org/wiki/AVUS" 63 | 62,"monsanto","Monsanto Park Circuit","Lisbon","Portugal",38.7197,-9.20306,158,"http://en.wikipedia.org/wiki/Monsanto_Park_Circuit" 64 | 63,"sebring","Sebring International Raceway","Florida","USA",27.4547,-81.3483,18,"http://en.wikipedia.org/wiki/Sebring_Raceway" 65 | 64,"ain-diab","Ain Diab","Casablanca","Morocco",33.5786,-7.6875,19,"http://en.wikipedia.org/wiki/Ain-Diab_Circuit" 66 | 65,"pescara","Pescara Circuit","Pescara","Italy",42.475,14.1508,129,"http://en.wikipedia.org/wiki/Pescara_Circuit" 67 | 66,"bremgarten","Circuit Bremgarten","Bern","Switzerland",46.9589,7.40194,551,"http://en.wikipedia.org/wiki/Circuit_Bremgarten" 68 | 67,"pedralbes","Circuit de Pedralbes","Barcelona","Spain",41.3903,2.11667,85,"http://en.wikipedia.org/wiki/Pedralbes_Circuit" 69 | 68,"buddh","Buddh International Circuit","Uttar Pradesh","India",28.3487,77.5331,194,"http://en.wikipedia.org/wiki/Buddh_International_Circuit" 70 | 69,"americas","Circuit of the Americas","Austin","USA",30.1328,-97.6411,161,"http://en.wikipedia.org/wiki/Circuit_of_the_Americas" 71 | 70,"red_bull_ring","Red Bull Ring","Spielburg","Austria",47.2197,14.7647,678,"http://en.wikipedia.org/wiki/Red_Bull_Ring" 72 | 71,"sochi","Sochi Autodrom","Sochi","Russia",43.4057,39.9578,2,"http://en.wikipedia.org/wiki/Sochi_Autodrom" 73 | 72,"port_imperial","Port Imperial Street Circuit","New Jersey","USA",40.7769,-74.0111,4,"http://en.wikipedia.org/wiki/Port_Imperial_Street_Circuit" 74 | 73,"BAK","Baku City Circuit","Baku","Azerbaijan",40.3725,49.8533,-7,"http://en.wikipedia.org/wiki/Baku_City_Circuit" 75 | 74,"hanoi","Hanoi Street Circuit","Hanoi","Vietnam",21.0166,105.766,9,"http://en.wikipedia.org/wiki/Hanoi_Street_Circuit" 76 | 75,"portimao","Autódromo Internacional do Algarve","Portimão","Portugal",37.227,-8.6267,108,"http://en.wikipedia.org/wiki/Algarve_International_Circuit" 77 | 76,"mugello","Autodromo Internazionale del Mugello","Mugello","Italy",43.9975,11.3719,255,"http://en.wikipedia.org/wiki/Mugello_Circuit" 78 | 77,"jeddah","Jeddah Street Circuit","Jeddah","Saudi Arabia",21.5433,39.1728,15,"http://en.wikipedia.org/wiki/Jeddah_Street_Circuit" 79 | -------------------------------------------------------------------------------- /Incremental_load_data/2021-04-18/pit_stops.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "raceId": 1053, 4 | "driverId": 839, 5 | "stop": 1, 6 | "lap": 1, 7 | "time": "15:05:16", 8 | "duration": 30.866, 9 | "milliseconds": 30866 10 | }, 11 | { 12 | "raceId": 1053, 13 | "driverId": 20, 14 | "stop": 1, 15 | "lap": 3, 16 | "time": "15:10:09", 17 | "duration": 32.024, 18 | "milliseconds": 32024 19 | }, 20 | { 21 | "raceId": 1053, 22 | "driverId": 854, 23 | "stop": 1, 24 | "lap": 5, 25 | "time": "15:15:11", 26 | "duration": 51.007, 27 | "milliseconds": 51007 28 | }, 29 | { 30 | "raceId": 1053, 31 | "driverId": 853, 32 | "stop": 1, 33 | "lap": 12, 34 | "time": "15:27:20", 35 | "duration": 31.168, 36 | "milliseconds": 31168 37 | }, 38 | { 39 | "raceId": 1053, 40 | "driverId": 842, 41 | "stop": 1, 42 | "lap": 14, 43 | "time": "15:30:10", 44 | "duration": 31.068, 45 | "milliseconds": 31068 46 | }, 47 | { 48 | "raceId": 1053, 49 | "driverId": 20, 50 | "stop": 2, 51 | "lap": 20, 52 | "time": "15:39:11", 53 | "duration": 31.184, 54 | "milliseconds": 31184 55 | }, 56 | { 57 | "raceId": 1053, 58 | "driverId": 854, 59 | "stop": 2, 60 | "lap": 21, 61 | "time": "15:41:24", 62 | "duration": 32.479, 63 | "milliseconds": 32479 64 | }, 65 | { 66 | "raceId": 1053, 67 | "driverId": 20, 68 | "stop": 3, 69 | "lap": 22, 70 | "time": "15:42:52", 71 | "duration": 39.502, 72 | "milliseconds": 39502 73 | }, 74 | { 75 | "raceId": 1053, 76 | "driverId": 853, 77 | "stop": 2, 78 | "lap": 23, 79 | "time": "15:45:20", 80 | "duration": 31.5, 81 | "milliseconds": 31500 82 | }, 83 | { 84 | "raceId": 1053, 85 | "driverId": 852, 86 | "stop": 1, 87 | "lap": 25, 88 | "time": "15:46:39", 89 | "duration": 30.696, 90 | "milliseconds": 30696 91 | }, 92 | { 93 | "raceId": 1053, 94 | "driverId": 847, 95 | "stop": 1, 96 | "lap": 26, 97 | "time": "15:47:53", 98 | "duration": 29.983, 99 | "milliseconds": 29983 100 | }, 101 | { 102 | "raceId": 1053, 103 | "driverId": 8, 104 | "stop": 1, 105 | "lap": 26, 106 | "time": "15:47:55", 107 | "duration": 30.28, 108 | "milliseconds": 30280 109 | }, 110 | { 111 | "raceId": 1053, 112 | "driverId": 830, 113 | "stop": 1, 114 | "lap": 27, 115 | "time": "15:47:59", 116 | "duration": 29.809, 117 | "milliseconds": 29809 118 | }, 119 | { 120 | "raceId": 1053, 121 | "driverId": 842, 122 | "stop": 2, 123 | "lap": 26, 124 | "time": "15:48:45", 125 | "duration": 32.277, 126 | "milliseconds": 32277 127 | }, 128 | { 129 | "raceId": 1053, 130 | "driverId": 832, 131 | "stop": 1, 132 | "lap": 27, 133 | "time": "15:48:51", 134 | "duration": 30.856, 135 | "milliseconds": 30856 136 | }, 137 | { 138 | "raceId": 1053, 139 | "driverId": 817, 140 | "stop": 1, 141 | "lap": 27, 142 | "time": "15:48:57", 143 | "duration": 34.34, 144 | "milliseconds": 34340 145 | }, 146 | { 147 | "raceId": 1053, 148 | "driverId": 840, 149 | "stop": 1, 150 | "lap": 27, 151 | "time": "15:49:16", 152 | "duration": 31.138, 153 | "milliseconds": 31138 154 | }, 155 | { 156 | "raceId": 1053, 157 | "driverId": 1, 158 | "stop": 1, 159 | "lap": 28, 160 | "time": "15:49:27", 161 | "duration": 32.112, 162 | "milliseconds": 32112 163 | }, 164 | { 165 | "raceId": 1053, 166 | "driverId": 841, 167 | "stop": 1, 168 | "lap": 27, 169 | "time": "15:49:32", 170 | "duration": 32.299, 171 | "milliseconds": 32299 172 | }, 173 | { 174 | "raceId": 1053, 175 | "driverId": 839, 176 | "stop": 2, 177 | "lap": 27, 178 | "time": "15:49:42", 179 | "duration": 30.652, 180 | "milliseconds": 30652 181 | }, 182 | { 183 | "raceId": 1053, 184 | "driverId": 844, 185 | "stop": 1, 186 | "lap": 28, 187 | "time": "15:49:43", 188 | "duration": 31.002, 189 | "milliseconds": 31002 190 | }, 191 | { 192 | "raceId": 1053, 193 | "driverId": 815, 194 | "stop": 1, 195 | "lap": 28, 196 | "time": "15:49:58", 197 | "duration": 44.608, 198 | "milliseconds": 44608 199 | }, 200 | { 201 | "raceId": 1053, 202 | "driverId": 846, 203 | "stop": 1, 204 | "lap": 28, 205 | "time": "15:50:11", 206 | "duration": 30.654, 207 | "milliseconds": 30654 208 | }, 209 | { 210 | "raceId": 1053, 211 | "driverId": 822, 212 | "stop": 1, 213 | "lap": 28, 214 | "time": "15:50:46", 215 | "duration": 30.167, 216 | "milliseconds": 30167 217 | }, 218 | { 219 | "raceId": 1053, 220 | "driverId": 4, 221 | "stop": 1, 222 | "lap": 28, 223 | "time": "15:51:18", 224 | "duration": 30.864, 225 | "milliseconds": 30864 226 | }, 227 | { 228 | "raceId": 1053, 229 | "driverId": 1, 230 | "stop": 2, 231 | "lap": 31, 232 | "time": "15:55:30", 233 | "duration": 38.198, 234 | "milliseconds": 38198 235 | }, 236 | { 237 | "raceId": 1053, 238 | "driverId": 839, 239 | "stop": 3, 240 | "lap": 31, 241 | "time": "15:56:23", 242 | "duration": 30.701, 243 | "milliseconds": 30701 244 | }, 245 | { 246 | "raceId": 1053, 247 | "driverId": 830, 248 | "stop": 2, 249 | "lap": 33, 250 | "time": "15:58:11", 251 | "duration": "27:01.361", 252 | "milliseconds": 1621361 253 | }, 254 | { 255 | "raceId": 1053, 256 | "driverId": 840, 257 | "stop": 2, 258 | "lap": 32, 259 | "time": "15:58:13", 260 | "duration": "24:52.090", 261 | "milliseconds": 1492090 262 | }, 263 | { 264 | "raceId": 1053, 265 | "driverId": 8, 266 | "stop": 2, 267 | "lap": 32, 268 | "time": "15:58:16", 269 | "duration": "24:52.612", 270 | "milliseconds": 1492612 271 | }, 272 | { 273 | "raceId": 1053, 274 | "driverId": 1, 275 | "stop": 3, 276 | "lap": 32, 277 | "time": "15:58:17", 278 | "duration": "24:54.731", 279 | "milliseconds": 1494731 280 | }, 281 | { 282 | "raceId": 1053, 283 | "driverId": 854, 284 | "stop": 3, 285 | "lap": 31, 286 | "time": "15:58:20", 287 | "duration": "25:21.462", 288 | "milliseconds": 1521462 289 | }, 290 | { 291 | "raceId": 1053, 292 | "driverId": 852, 293 | "stop": 2, 294 | "lap": 32, 295 | "time": "15:58:24", 296 | "duration": "24:51.384", 297 | "milliseconds": 1491384 298 | }, 299 | { 300 | "raceId": 1053, 301 | "driverId": 841, 302 | "stop": 2, 303 | "lap": 32, 304 | "time": "15:58:26", 305 | "duration": "24:56.541", 306 | "milliseconds": 1496541 307 | }, 308 | { 309 | "raceId": 1053, 310 | "driverId": 844, 311 | "stop": 2, 312 | "lap": 33, 313 | "time": "15:58:28", 314 | "duration": "26:49.484", 315 | "milliseconds": 1609484 316 | }, 317 | { 318 | "raceId": 1053, 319 | "driverId": 4, 320 | "stop": 2, 321 | "lap": 32, 322 | "time": "15:58:37", 323 | "duration": "24:48.372", 324 | "milliseconds": 1488372 325 | }, 326 | { 327 | "raceId": 1053, 328 | "driverId": 839, 329 | "stop": 4, 330 | "lap": 32, 331 | "time": "15:58:48", 332 | "duration": "24:38.974", 333 | "milliseconds": 1478974 334 | }, 335 | { 336 | "raceId": 1053, 337 | "driverId": 853, 338 | "stop": 3, 339 | "lap": 31, 340 | "time": "15:58:57", 341 | "duration": "24:46.154", 342 | "milliseconds": 1486154 343 | }, 344 | { 345 | "raceId": 1053, 346 | "driverId": 846, 347 | "stop": 2, 348 | "lap": 33, 349 | "time": "15:59:06", 350 | "duration": "26:24.953", 351 | "milliseconds": 1584953 352 | }, 353 | { 354 | "raceId": 1053, 355 | "driverId": 815, 356 | "stop": 2, 357 | "lap": 33, 358 | "time": "15:59:11", 359 | "duration": "26:21.158", 360 | "milliseconds": 1581158 361 | }, 362 | { 363 | "raceId": 1053, 364 | "driverId": 832, 365 | "stop": 2, 366 | "lap": 33, 367 | "time": "15:59:14", 368 | "duration": "26:22.211", 369 | "milliseconds": 1582211 370 | }, 371 | { 372 | "raceId": 1053, 373 | "driverId": 842, 374 | "stop": 3, 375 | "lap": 32, 376 | "time": "15:59:18", 377 | "duration": "24:13.539", 378 | "milliseconds": 1453539 379 | }, 380 | { 381 | "raceId": 1053, 382 | "driverId": 20, 383 | "stop": 4, 384 | "lap": 32, 385 | "time": "15:59:21", 386 | "duration": "24:12.323", 387 | "milliseconds": 1452323 388 | }, 389 | { 390 | "raceId": 1053, 391 | "driverId": 817, 392 | "stop": 2, 393 | "lap": 33, 394 | "time": "15:59:25", 395 | "duration": "26:14.827", 396 | "milliseconds": 1574827 397 | }, 398 | { 399 | "raceId": 1053, 400 | "driverId": 840, 401 | "stop": 3, 402 | "lap": 33, 403 | "time": "16:24:36", 404 | "duration": "1:05.390", 405 | "milliseconds": 65390 406 | }, 407 | { 408 | "raceId": 1053, 409 | "driverId": 8, 410 | "stop": 3, 411 | "lap": 33, 412 | "time": "16:24:42", 413 | "duration": "1:02.892", 414 | "milliseconds": 62892 415 | }, 416 | { 417 | "raceId": 1053, 418 | "driverId": 1, 419 | "stop": 4, 420 | "lap": 33, 421 | "time": "16:24:48", 422 | "duration": "1:05.870", 423 | "milliseconds": 65870 424 | }, 425 | { 426 | "raceId": 1053, 427 | "driverId": 852, 428 | "stop": 3, 429 | "lap": 33, 430 | "time": "16:24:52", 431 | "duration": "1:03.280", 432 | "milliseconds": 63280 433 | }, 434 | { 435 | "raceId": 1053, 436 | "driverId": 841, 437 | "stop": 3, 438 | "lap": 33, 439 | "time": "16:24:58", 440 | "duration": "1:00.172", 441 | "milliseconds": 60172 442 | }, 443 | { 444 | "raceId": 1053, 445 | "driverId": 4, 446 | "stop": 3, 447 | "lap": 33, 448 | "time": "16:25:04", 449 | "duration": 57.601, 450 | "milliseconds": 57601 451 | }, 452 | { 453 | "raceId": 1053, 454 | "driverId": 839, 455 | "stop": 5, 456 | "lap": 33, 457 | "time": "16:25:06", 458 | "duration": 56.733, 459 | "milliseconds": 56733 460 | }, 461 | { 462 | "raceId": 1053, 463 | "driverId": 842, 464 | "stop": 4, 465 | "lap": 33, 466 | "time": "16:25:09", 467 | "duration": 56.083, 468 | "milliseconds": 56083 469 | }, 470 | { 471 | "raceId": 1053, 472 | "driverId": 20, 473 | "stop": 5, 474 | "lap": 33, 475 | "time": "16:25:15", 476 | "duration": 52.043, 477 | "milliseconds": 52043 478 | }, 479 | { 480 | "raceId": 1053, 481 | "driverId": 854, 482 | "stop": 4, 483 | "lap": 32, 484 | "time": "16:25:16", 485 | "duration": 51.222, 486 | "milliseconds": 51222 487 | }, 488 | { 489 | "raceId": 1053, 490 | "driverId": 853, 491 | "stop": 4, 492 | "lap": 32, 493 | "time": "16:25:22", 494 | "duration": 49.729, 495 | "milliseconds": 49729 496 | }, 497 | { 498 | "raceId": 1053, 499 | "driverId": 841, 500 | "stop": 4, 501 | "lap": 40, 502 | "time": "16:36:13", 503 | "duration": 29.742, 504 | "milliseconds": 29742 505 | } 506 | ] -------------------------------------------------------------------------------- /Incremental_load_data/2021-04-18/qualifying/qualifying_split_1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "qualifyId": 8755, 4 | "raceId": 1053, 5 | "driverId": 1, 6 | "constructorId": 131, 7 | "number": 44, 8 | "position": 1, 9 | "q1": "1:14.823", 10 | "q2": "1:14.817", 11 | "q3": "1:14.411" 12 | }, 13 | { 14 | "qualifyId": 8756, 15 | "raceId": 1053, 16 | "driverId": 815, 17 | "constructorId": 9, 18 | "number": 11, 19 | "position": 2, 20 | "q1": "1:15.395", 21 | "q2": "1:14.716", 22 | "q3": "1:14.446" 23 | }, 24 | { 25 | "qualifyId": 8757, 26 | "raceId": 1053, 27 | "driverId": 830, 28 | "constructorId": 9, 29 | "number": 33, 30 | "position": 3, 31 | "q1": "1:15.109", 32 | "q2": "1:14.884", 33 | "q3": "1:14.498" 34 | }, 35 | { 36 | "qualifyId": 8758, 37 | "raceId": 1053, 38 | "driverId": 844, 39 | "constructorId": 6, 40 | "number": 16, 41 | "position": 4, 42 | "q1": "1:15.413", 43 | "q2": "1:14.808", 44 | "q3": "1:14.740" 45 | }, 46 | { 47 | "qualifyId": 8759, 48 | "raceId": 1053, 49 | "driverId": 842, 50 | "constructorId": 213, 51 | "number": 10, 52 | "position": 5, 53 | "q1": "1:15.548", 54 | "q2": "1:14.927", 55 | "q3": "1:14.790" 56 | }, 57 | { 58 | "qualifyId": 8760, 59 | "raceId": 1053, 60 | "driverId": 817, 61 | "constructorId": 1, 62 | "number": 3, 63 | "position": 6, 64 | "q1": "1:15.669", 65 | "q2": "1:15.033", 66 | "q3": "1:14.826" 67 | }, 68 | { 69 | "qualifyId": 8761, 70 | "raceId": 1053, 71 | "driverId": 846, 72 | "constructorId": 1, 73 | "number": 4, 74 | "position": 7, 75 | "q1": "1:15.009", 76 | "q2": "1:14.718", 77 | "q3": "1:14.875" 78 | }, 79 | { 80 | "qualifyId": 8762, 81 | "raceId": 1053, 82 | "driverId": 822, 83 | "constructorId": 131, 84 | "number": 77, 85 | "position": 8, 86 | "q1": "1:14.672", 87 | "q2": "1:14.905", 88 | "q3": "1:14.898" 89 | }, 90 | { 91 | "qualifyId": 8763, 92 | "raceId": 1053, 93 | "driverId": 839, 94 | "constructorId": 214, 95 | "number": 31, 96 | "position": 9, 97 | "q1": "1:15.385", 98 | "q2": "1:15.117", 99 | "q3": "1:15.210" 100 | }, 101 | { 102 | "qualifyId": 8764, 103 | "raceId": 1053, 104 | "driverId": 840, 105 | "constructorId": 117, 106 | "number": 18, 107 | "position": 10, 108 | "q1": "1:15.522", 109 | "q2": "1:15.138", 110 | "q3": "\\N" 111 | }, 112 | { 113 | "qualifyId": 8765, 114 | "raceId": 1053, 115 | "driverId": 832, 116 | "constructorId": 6, 117 | "number": 55, 118 | "position": 11, 119 | "q1": "1:15.406", 120 | "q2": "1:15.199", 121 | "q3": "\\N" 122 | }, 123 | { 124 | "qualifyId": 8766, 125 | "raceId": 1053, 126 | "driverId": 847, 127 | "constructorId": 3, 128 | "number": 63, 129 | "position": 12, 130 | "q1": "1:15.826", 131 | "q2": "1:15.261", 132 | "q3": "\\N" 133 | }, 134 | { 135 | "qualifyId": 8767, 136 | "raceId": 1053, 137 | "driverId": 20, 138 | "constructorId": 117, 139 | "number": 5, 140 | "position": 13, 141 | "q1": "1:15.459", 142 | "q2": "1:15.394", 143 | "q3": "\\N" 144 | }, 145 | { 146 | "qualifyId": 8768, 147 | "raceId": 1053, 148 | "driverId": 849, 149 | "constructorId": 3, 150 | "number": 6, 151 | "position": 14, 152 | "q1": "1:15.653", 153 | "q2": "1:15.593", 154 | "q3": "\\N" 155 | }, 156 | { 157 | "qualifyId": 8769, 158 | "raceId": 1053, 159 | "driverId": 4, 160 | "constructorId": 214, 161 | "number": 14, 162 | "position": 15, 163 | "q1": "1:15.832", 164 | "q2": "1:15.593", 165 | "q3": "\\N" 166 | }, 167 | { 168 | "qualifyId": 8770, 169 | "raceId": 1053, 170 | "driverId": 8, 171 | "constructorId": 51, 172 | "number": 7, 173 | "position": 16, 174 | "q1": "1:15.974", 175 | "q2": "\\N", 176 | "q3": "\\N" 177 | }, 178 | { 179 | "qualifyId": 8771, 180 | "raceId": 1053, 181 | "driverId": 841, 182 | "constructorId": 51, 183 | "number": 99, 184 | "position": 17, 185 | "q1": "1:16.122", 186 | "q2": "\\N", 187 | "q3": "\\N" 188 | }, 189 | { 190 | "qualifyId": 8772, 191 | "raceId": 1053, 192 | "driverId": 854, 193 | "constructorId": 210, 194 | "number": 47, 195 | "position": 18, 196 | "q1": "1:16.279", 197 | "q2": "\\N", 198 | "q3": "\\N" 199 | }, 200 | { 201 | "qualifyId": 8773, 202 | "raceId": 1053, 203 | "driverId": 853, 204 | "constructorId": 210, 205 | "number": 9, 206 | "position": 19, 207 | "q1": "1:16.797", 208 | "q2": "\\N", 209 | "q3": "\\N" 210 | }, 211 | { 212 | "qualifyId": 8774, 213 | "raceId": 1053, 214 | "driverId": 852, 215 | "constructorId": 213, 216 | "number": 22, 217 | "position": 20, 218 | "q1": "\\N", 219 | "q2": "\\N", 220 | "q3": "\\N" 221 | } 222 | ] -------------------------------------------------------------------------------- /Incremental_load_data/2021-04-18/results.json: -------------------------------------------------------------------------------- 1 | {"resultId":24986,"raceId":1053,"driverId":830,"constructorId":9,"number":33,"grid":3,"position":1,"positionText":1,"positionOrder":1,"points":25,"laps":63,"time":"2:02:34.598","milliseconds":7354598,"fastestLap":60,"rank":2,"fastestLapTime":"1:17.524","fastestLapSpeed":227.96,"statusId":1} 2 | {"resultId":24987,"raceId":1053,"driverId":1,"constructorId":131,"number":44,"grid":1,"position":2,"positionText":2,"positionOrder":2,"points":19,"laps":63,"time":"+22.000","milliseconds":7376598,"fastestLap":60,"rank":1,"fastestLapTime":"1:16.702","fastestLapSpeed":230.403,"statusId":1} 3 | {"resultId":24988,"raceId":1053,"driverId":846,"constructorId":1,"number":4,"grid":7,"position":3,"positionText":3,"positionOrder":3,"points":15,"laps":63,"time":"+23.702","milliseconds":7378300,"fastestLap":63,"rank":3,"fastestLapTime":"1:18.259","fastestLapSpeed":225.819,"statusId":1} 4 | {"resultId":24989,"raceId":1053,"driverId":844,"constructorId":6,"number":16,"grid":4,"position":4,"positionText":4,"positionOrder":4,"points":12,"laps":63,"time":"+25.579","milliseconds":7380177,"fastestLap":60,"rank":6,"fastestLapTime":"1:18.379","fastestLapSpeed":225.473,"statusId":1} 5 | {"resultId":24990,"raceId":1053,"driverId":832,"constructorId":6,"number":55,"grid":11,"position":5,"positionText":5,"positionOrder":5,"points":10,"laps":63,"time":"+27.036","milliseconds":7381634,"fastestLap":60,"rank":7,"fastestLapTime":"1:18.490","fastestLapSpeed":225.154,"statusId":1} 6 | {"resultId":24991,"raceId":1053,"driverId":817,"constructorId":1,"number":3,"grid":6,"position":6,"positionText":6,"positionOrder":6,"points":8,"laps":63,"time":"+51.220","milliseconds":7405818,"fastestLap":54,"rank":12,"fastestLapTime":"1:19.341","fastestLapSpeed":222.739,"statusId":1} 7 | {"resultId":24992,"raceId":1053,"driverId":842,"constructorId":213,"number":10,"grid":5,"position":7,"positionText":7,"positionOrder":7,"points":6,"laps":63,"time":"+52.818","milliseconds":7407416,"fastestLap":52,"rank":9,"fastestLapTime":"1:18.994","fastestLapSpeed":223.718,"statusId":1} 8 | {"resultId":24993,"raceId":1053,"driverId":840,"constructorId":117,"number":18,"grid":10,"position":8,"positionText":8,"positionOrder":8,"points":4,"laps":63,"time":"+56.909","milliseconds":7411507,"fastestLap":59,"rank":8,"fastestLapTime":"1:18.782","fastestLapSpeed":224.32,"statusId":1} 9 | {"resultId":24994,"raceId":1053,"driverId":839,"constructorId":214,"number":31,"grid":9,"position":9,"positionText":9,"positionOrder":9,"points":2,"laps":63,"time":"+65.704","milliseconds":7420302,"fastestLap":62,"rank":15,"fastestLapTime":"1:19.422","fastestLapSpeed":222.512,"statusId":1} 10 | {"resultId":24995,"raceId":1053,"driverId":4,"constructorId":214,"number":14,"grid":15,"position":10,"positionText":10,"positionOrder":10,"points":1,"laps":63,"time":"+66.561","milliseconds":7421159,"fastestLap":62,"rank":14,"fastestLapTime":"1:19.417","fastestLapSpeed":222.526,"statusId":1} 11 | {"resultId":24996,"raceId":1053,"driverId":815,"constructorId":9,"number":11,"grid":2,"position":11,"positionText":11,"positionOrder":11,"points":0,"laps":63,"time":"+67.151","milliseconds":7421749,"fastestLap":62,"rank":13,"fastestLapTime":"1:19.396","fastestLapSpeed":222.585,"statusId":1} 12 | {"resultId":24997,"raceId":1053,"driverId":852,"constructorId":213,"number":22,"grid":20,"position":12,"positionText":12,"positionOrder":12,"points":0,"laps":63,"time":"+73.184","milliseconds":7427782,"fastestLap":49,"rank":4,"fastestLapTime":"1:18.334","fastestLapSpeed":225.603,"statusId":1} 13 | {"resultId":24998,"raceId":1053,"driverId":8,"constructorId":51,"number":7,"grid":16,"position":13,"positionText":13,"positionOrder":13,"points":0,"laps":63,"time":"+94.773","milliseconds":7449371,"fastestLap":62,"rank":5,"fastestLapTime":"1:18.353","fastestLapSpeed":225.548,"statusId":1} 14 | {"resultId":24999,"raceId":1053,"driverId":841,"constructorId":51,"number":99,"grid":17,"position":14,"positionText":14,"positionOrder":14,"points":0,"laps":62,"time":"\\N","milliseconds":"\\N","fastestLap":57,"rank":16,"fastestLapTime":"1:19.470","fastestLapSpeed":222.378,"statusId":11} 15 | {"resultId":25000,"raceId":1053,"driverId":20,"constructorId":117,"number":5,"grid":0,"position":15,"positionText":15,"positionOrder":15,"points":0,"laps":61,"time":"\\N","milliseconds":"\\N","fastestLap":59,"rank":10,"fastestLapTime":"1:19.074","fastestLapSpeed":223.491,"statusId":6} 16 | {"resultId":25001,"raceId":1053,"driverId":854,"constructorId":210,"number":47,"grid":18,"position":16,"positionText":16,"positionOrder":16,"points":0,"laps":61,"time":"\\N","milliseconds":"\\N","fastestLap":58,"rank":11,"fastestLapTime":"1:19.193","fastestLapSpeed":223.156,"statusId":12} 17 | {"resultId":25002,"raceId":1053,"driverId":853,"constructorId":210,"number":9,"grid":19,"position":17,"positionText":17,"positionOrder":17,"points":0,"laps":61,"time":"\\N","milliseconds":"\\N","fastestLap":55,"rank":17,"fastestLapTime":"1:20.402","fastestLapSpeed":219.8,"statusId":12} 18 | {"resultId":25003,"raceId":1053,"driverId":822,"constructorId":131,"number":77,"grid":8,"position":"\\N","positionText":"R","positionOrder":18,"points":0,"laps":30,"time":"\\N","milliseconds":"\\N","fastestLap":30,"rank":19,"fastestLapTime":"1:28.485","fastestLapSpeed":199.721,"statusId":4} 19 | {"resultId":25004,"raceId":1053,"driverId":847,"constructorId":3,"number":63,"grid":12,"position":"\\N","positionText":"R","positionOrder":19,"points":0,"laps":30,"time":"\\N","milliseconds":"\\N","fastestLap":28,"rank":18,"fastestLapTime":"1:26.543","fastestLapSpeed":204.203,"statusId":4} 20 | {"resultId":25005,"raceId":1053,"driverId":849,"constructorId":3,"number":6,"grid":14,"position":"\\N","positionText":"R","positionOrder":20,"points":0,"laps":0,"time":"\\N","milliseconds":"\\N","fastestLap":"\\N","rank":0,"fastestLapTime":"\\N","fastestLapSpeed":"\\N","statusId":4} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Formula-1-Data-Engineering-Project-Using-Azure-Databricks 2 |

Project Overview:

3 | This project aims to provide a data analysis solution for Formula-1 race results using Azure Databricks. This is an ETL pipeline to ingest Formula 1 motor racing data, transform and load it into our data warehouse for reporting and analysis purposes. The data is sourced from ergast.com, a website dedicated to Formula 1 statistics, and is stored in Azure Datalake Gen2 storage. Data transformation and analysis were performed using Azure Databricks. The entire process is orchestrated using Azure Data Factory. 4 | 5 |

Formula1 Overview

6 | Formula 1 (F1) is the top tier of single-seater auto racing worldwide, governed by the FIA. It features high-tech, powerful cars with hybrid engines. Every season happens once a year, each race happens over weekends (Friday to Sunday). Each race is conducted in individual circuits. 10 Teams/Constructors will participate. Two Drivers will be assigned in a team. The season includes 20-23 races (Grands Prix) held in various countries. Safety is a priority with strict regulations and constant advancements. Pit stops for tire changes and adjustments are common. There will be a qualifying round conducted on Saturday to decide the grid positions of drivers for the Sunday match. Each race contains 50-70 laps. Pitstops will be available to change tires or cars. Race results include driver standings and constructor standings. The driver that tops the driver's standings becomes the drivers' champion and the team that tops the constructor standings becomes the constructors' champion. 7 | 8 |

Architecture diagram

9 | 10 | 11 | 12 | # ER Diagram: 13 | 14 | The structure of the database is shown in the following ER Diagram and explained in the [Database User Guide](http://ergast.com/docs/f1db_user_guide.txt) 15 | ![ERDiagram](http://ergast.com/images/ergast_db.png) 16 | 17 | ## How it works: 18 |

Source Date Files

19 | We are referring to open-source data from the website Ergast Developer API. Data was available from 1950 till 2022. 20 | 21 | | File Name | File Type | 22 | | ------------- | ------------- | 23 | | Circuits | CSV | 24 | | Races | CSV | 25 | | Constructors | Single Line JSON | 26 | | Drivers | Single Line Nested JSON | 27 | | Results | Single Line JSON | 28 | | PitStops | Multi Line JSON | 29 | | LapTimes | Split CSV Files | 30 | | Qualifying | Split Multi Line JSON Files | 31 | 32 | #### Execution Overview: 33 | - Azure Data Factory (ADF) is responsible for the execution of Azure Datarbicks notebooks as well as monitoring them. We import data from Ergast API to Azure Data Lake Storage Gen2 (ADLS). The raw data is stored in the container at **Bronze zone** (landing zone). 34 | - Data in the Bronze zone is ingested using Azure Databricks notebook. The data is transformed into delta tables using upsert functionality. ADF then uploads the data to ADLS **Silver zone** (standardization zone). 35 | - Ingested data in **Silver zone** is transformed using Azure Databricks SQL notebook. Tables are joined and aggregated for analytical and visualization purposes. The output is loaded to the **Gold zone** (analytical zone). 36 | 37 | #### ETL pipeline: 38 | ETL flow comprises two parts: 39 | - Ingestion: Process data from **Bronze zone** to **Silver zone** 40 | - Transformation: Process data from **Silver zone** to **Gold zone** 41 | 42 | In the first pipeline, data stored in JSON and CSV format is read using Apache Spark with minimal transformation saved into a delta table. The transformation includes dropping columns, renaming headers, applying schema, and adding audited columns (```ingestion_date``` and ```file_source```) and ```file_date``` as the notebook parameter. This serves as a dynamic expression in ADF. 43 | 44 | In the second pipeline, Databricks SQL reads preprocessed delta files and transforms them into the final dimensional model tables in delta format. Transformations performed include dropping duplicates, joining tables using join, and aggregating using a window. 45 | 46 | ADF is scheduled to run every Sunday at 10 PM and is designed to skip the execution if there is no race that week. We have another pipeline to execute the ingestion pipeline and transformation pipeline using file_date as the parameter for the tumbling window trigger. 47 | 48 | ![Screen Shot 2022-06-12 at 4 42 18 PM](https://user-images.githubusercontent.com/107358349/173252855-6a50be95-d7a7-481c-9438-8ae9fdc7df28.png) 49 | 50 | ## Azure Resources Used for this Project: 51 | * Azure Data Lake Storage 52 | * Azure Data Factory 53 | * Azure Databricks 54 | * Azure Key Vault 55 | 56 | ## Project Requirements: 57 | The requirements for this project are broken down into six different parts which are 58 | 59 | #### 1. Data Ingestion Requirements 60 | * Ingest all 8 files into Azure data lake. 61 | * Ingested data must have the same schema applied. 62 | * Ingested data must have audit columns. 63 | * Ingested data must be stored in columnar format (i.e. parquet). 64 | * We must be able to analyze the ingested data via SQL. 65 | * Ingestion Logic must be able to handle the incremental load. 66 | 67 | #### 2. Data Transformation Requirements 68 | * Join the key information required for reporting to create a new table. 69 | * Join the key information required for analysis to create a new table. 70 | * Transformed tables must have audit columns. 71 | * We must be able to analyze the transformed data via SQL. 72 | * Transformed data must be stored in columnar format (i.e. parquet). 73 | * Transformation logic must be able to handle the incremental load. 74 | 75 | #### 3. Data Reporting Requirements 76 | * We want to be able to know Driver Standings. 77 | * We should be able to know Constructor Standings as well. 78 | 79 | #### 4. Data Analysis Requirements 80 | * Find the Dominant drivers. 81 | * Find the Dominant Teams. 82 | * Visualize the Outputs. 83 | * Create Databricks dashboards. 84 | 85 | #### 5. Scheduling Requirements 86 | * Scheduled to run every Sunday at 10 pm. 87 | * Ability to monitor pipelines. 88 | * Ability to rerun failed pipelines. 89 | * Ability to set up alerts on failures 90 | 91 | #### 6. Other Non-Functional Requirements 92 | * Ability to delete individual records 93 | * Ability to see history and time travel 94 | * Ability to roll back to a previous version 95 | 96 | ## Analysis Result: 97 | ![image](https://user-images.githubusercontent.com/64007718/235310453-95b6d253-aaab-454b-87f1-8fb722600014.png) 98 | ![image](https://user-images.githubusercontent.com/64007718/235310459-c9141816-2832-4be7-8902-3fce7096c88d.png) 99 | ![image](https://user-images.githubusercontent.com/64007718/235310466-4a83e4ce-00c3-444c-b22a-83ad42530321.png) 100 | ![image](https://user-images.githubusercontent.com/64007718/235310470-9c966e29-ba76-4c10-9554-f201d72ee636.png) 101 | ![image](https://user-images.githubusercontent.com/64007718/235310476-98db1649-0fb4-45f5-bfc4-8892afc8bc80.png) 102 | ![image](https://user-images.githubusercontent.com/64007718/235310486-98404d97-ed11-4be2-90c3-535f538cfdc9.png) 103 | 104 | ## Tasks performed: 105 | • Built a solution architecture for a data engineering solution using Azure Databricks, Azure Data Lake Gen2, Azure Data Factory, and Power BI. 106 | 107 | • Created and used Azure Databricks service and the architecture of Databricks within Azure. 108 | 109 | • Worked with Databricks notebooks and used Databricks utilities, magic commands, etc. 110 | 111 | • Passed parameters between notebooks as well as created notebook workflows. 112 | 113 | • Created, configured, and monitored Databricks clusters, cluster pools, and jobs. 114 | 115 | • Mounted Azure Storage in Databricks using secrets stored in Azure Key Vault. 116 | 117 | • Worked with Databricks Tables, Databricks File System (DBFS), etc. 118 | 119 | • Used Delta Lake to implement a solution using Lakehouse architecture. 120 | 121 | • Created dashboards to visualize the outputs. 122 | 123 | • Connected to the Azure Databricks tables from PowerBI. 124 | 125 | ## Spark (Only PySpark and SQL) 126 | • Spark architecture, Data Sources API, and Dataframe API. 127 | 128 | • PySpark - Ingested CSV, simple, and complex JSON files into the data lake as parquet files/ tables. 129 | 130 | • PySpark - Transformations such as Filter, Join, Simple Aggregations, GroupBy, Window functions etc. 131 | 132 | • PySpark - Created global and temporary views. 133 | 134 | • Spark SQL - Created databases, tables, and views. 135 | 136 | • Spark SQL - Transformations such as Filter, Join, Simple Aggregations, GroupBy, Window functions etc. 137 | 138 | • Spark SQL - Created local and temporary views. 139 | 140 | • Implemented full refresh and incremental load patterns using partitions. 141 | 142 | ## Delta Lake 143 | • Performed Read, Write, Update, Delete, and Merge to delta lake using both PySpark as well as SQL. 144 | 145 | • History, Time Travel, and Vacuum. 146 | 147 | • Converted Parquet files to Delta files. 148 | 149 | • Implemented incremental load pattern using delta lake. 150 | 151 | ## Azure Data Factory 152 | • Created pipelines to execute Databricks notebooks. 153 | 154 | • Designed robust pipelines to deal with unexpected scenarios such as missing files. 155 | 156 | • Created dependencies between activities as well as pipelines. 157 | 158 | • Scheduled the pipelines using data factory triggers to execute at regular intervals. 159 | 160 | • Monitored the triggers/ pipelines to check for errors/ outputs. 161 | 162 | # About the Project: 163 | 164 |

Folders:

165 | 166 | - 1-Authentication: The folder contains all notebooks to demonstrate different ways to access Azure Data Lake Gen2 containers into the Databricks file system. 167 | 168 | - 2-includes: The folder contains notebooks with common functions and path configurations. 169 | 170 | - 3-Data Ingestion: The folder contains all notebooks to ingest the data from raw to processed. 171 | 172 | - 4-raw: The folder contains all notebooks to create raw tables in SQL. 173 | 174 | - 5-Data Transformation: The folder contains all notebooks that transform the raw data into the processed layer. 175 | 176 | - 6-Data Analysis: The folder contains all notebooks which include an analysis of the data. 177 | 178 | - 7-demo: The folder contains notebooks with all the pre-requisite demos. 179 | 180 | - 8-Power Bi reports: This folder contains all the reports created from the analyzed data. 181 | 182 |

Technologies/Tools Used:

183 | 193 | -------------------------------------------------------------------------------- /Screenshots/Step1 - Creating Resources.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step1 - Creating Resources.pdf -------------------------------------------------------------------------------- /Screenshots/Step10 - Data Analysis(Delta Format).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step10 - Data Analysis(Delta Format).pdf -------------------------------------------------------------------------------- /Screenshots/Step11 - Data Ingestion and Transformation using ADF.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step11 - Data Ingestion and Transformation using ADF.pdf -------------------------------------------------------------------------------- /Screenshots/Step12 - Creating Power Bi Reports.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step12 - Creating Power Bi Reports.pdf -------------------------------------------------------------------------------- /Screenshots/Step2 - Setting Authentication.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step2 - Setting Authentication.pdf -------------------------------------------------------------------------------- /Screenshots/Step3 - Data Ingestion.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step3 - Data Ingestion.pdf -------------------------------------------------------------------------------- /Screenshots/Step4 - Data Transformation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step4 - Data Transformation.pdf -------------------------------------------------------------------------------- /Screenshots/Step5 - Creating External and Managed Tables.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step5 - Creating External and Managed Tables.pdf -------------------------------------------------------------------------------- /Screenshots/Step6 - Data Analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step6 - Data Analysis.pdf -------------------------------------------------------------------------------- /Screenshots/Step7 - Ingestion using Incremental Load.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step7 - Ingestion using Incremental Load.pdf -------------------------------------------------------------------------------- /Screenshots/Step8 - Transformation using Incremental Load.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step8 - Transformation using Incremental Load.pdf -------------------------------------------------------------------------------- /Screenshots/Step9 - Data Ingestion(Delta Format).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshwarreddyt/Data-Engineering-project-on-Formula1-Racing-using-Azure-Databricks/7023515e185ba70d837ad3b04db42c2b4d2aa234/Screenshots/Step9 - Data Ingestion(Delta Format).pdf -------------------------------------------------------------------------------- /demo/1.filter_demo.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %run "../includes/configuration" 3 | 4 | # COMMAND ---------- 5 | 6 | races_df = spark.read.parquet(f"{processed_folder_path}/races") 7 | 8 | # COMMAND ---------- 9 | 10 | races_filtered_df = races_df.filter("race_year = 2019 and round <= 5") 11 | 12 | # COMMAND ---------- 13 | 14 | races_filtered_df = races_df.where((races_df["race_year"] == 2019) & (races_df["round"] <= 5)) 15 | 16 | # COMMAND ---------- 17 | 18 | display(races_filtered_df) 19 | 20 | # COMMAND ---------- 21 | 22 | -------------------------------------------------------------------------------- /demo/10.delta_lake_demo.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC 1. Write data to delta lake (managed table) 4 | # MAGIC 2. Write data to delta lake (external table) 5 | # MAGIC 3. Read data from delta lake (Table) 6 | # MAGIC 4. Read data from delta lake (File) 7 | 8 | # COMMAND ---------- 9 | 10 | # MAGIC %sql 11 | # MAGIC CREATE DATABASE IF NOT EXISTS f1_demo 12 | # MAGIC LOCATION '/mnt/formula1dl/demo' 13 | 14 | # COMMAND ---------- 15 | 16 | results_df = spark.read \ 17 | .option("inferSchema", True) \ 18 | .json("/mnt/formula1dl/raw/2021-03-28/results.json") 19 | 20 | # COMMAND ---------- 21 | 22 | results_df.write.format("delta").mode("overwrite").saveAsTable("f1_demo.results_managed") 23 | 24 | # COMMAND ---------- 25 | 26 | # MAGIC %sql 27 | # MAGIC SELECT * FROM f1_demo.results_managed; 28 | 29 | # COMMAND ---------- 30 | 31 | results_df.write.format("delta").mode("overwrite").save("/mnt/formula1dl/demo/results_external") 32 | 33 | # COMMAND ---------- 34 | 35 | # MAGIC %sql 36 | # MAGIC CREATE TABLE f1_demo.results_external 37 | # MAGIC USING DELTA 38 | # MAGIC LOCATION '/mnt/formula1dl/demo/results_external' 39 | 40 | # COMMAND ---------- 41 | 42 | # MAGIC %sql 43 | # MAGIC SELECT * FROM f1_demo.results_external 44 | 45 | # COMMAND ---------- 46 | 47 | results_external_df = spark.read.format("delta").load("/mnt/formula1dl/demo/results_external") 48 | 49 | # COMMAND ---------- 50 | 51 | display(results_external_df) 52 | 53 | # COMMAND ---------- 54 | 55 | results_df.write.format("delta").mode("overwrite").partitionBy("constructorId").saveAsTable("f1_demo.results_partitioned") 56 | 57 | # COMMAND ---------- 58 | 59 | # MAGIC %sql 60 | # MAGIC SHOW PARTITIONS f1_demo.results_partitioned 61 | 62 | # COMMAND ---------- 63 | 64 | # MAGIC %md 65 | # MAGIC 1. Update Delta Table 66 | # MAGIC 2. Delete From Delta Table 67 | 68 | # COMMAND ---------- 69 | 70 | # MAGIC %sql 71 | # MAGIC SELECT * FROM f1_demo.results_managed; 72 | 73 | # COMMAND ---------- 74 | 75 | # MAGIC %sql 76 | # MAGIC UPDATE f1_demo.results_managed 77 | # MAGIC SET points = 11 - position 78 | # MAGIC WHERE position <= 10 79 | 80 | # COMMAND ---------- 81 | 82 | # MAGIC %sql 83 | # MAGIC SELECT * FROM f1_demo.results_managed; 84 | 85 | # COMMAND ---------- 86 | 87 | from delta.tables import DeltaTable 88 | 89 | deltaTable = DeltaTable.forPath(spark, "/mnt/formula1dl/demo/results_managed") 90 | 91 | deltaTable.update("position <= 10", { "points": "21 - position" } ) 92 | 93 | # COMMAND ---------- 94 | 95 | # MAGIC %sql 96 | # MAGIC SELECT * FROM f1_demo.results_managed; 97 | 98 | # COMMAND ---------- 99 | 100 | # MAGIC %sql 101 | # MAGIC DELETE FROM f1_demo.results_managed 102 | # MAGIC WHERE position > 10; 103 | 104 | # COMMAND ---------- 105 | 106 | # MAGIC %sql 107 | # MAGIC SELECT * FROM f1_demo.results_managed; 108 | 109 | # COMMAND ---------- 110 | 111 | from delta.tables import DeltaTable 112 | 113 | deltaTable = DeltaTable.forPath(spark, "/mnt/formula1dl/demo/results_managed") 114 | 115 | deltaTable.delete("points = 0") 116 | 117 | # COMMAND ---------- 118 | 119 | # MAGIC %sql 120 | # MAGIC SELECT * FROM f1_demo.results_managed; 121 | 122 | # COMMAND ---------- 123 | 124 | # MAGIC %md 125 | # MAGIC Upsert using merge 126 | 127 | # COMMAND ---------- 128 | 129 | drivers_day1_df = spark.read \ 130 | .option("inferSchema", True) \ 131 | .json("/mnt/formula1dl/raw/2021-03-28/drivers.json") \ 132 | .filter("driverId <= 10") \ 133 | .select("driverId", "dob", "name.forename", "name.surname") 134 | 135 | # COMMAND ---------- 136 | 137 | display(drivers_day1_df) 138 | 139 | # COMMAND ---------- 140 | 141 | drivers_day1_df.createOrReplaceTempView("drivers_day1") 142 | 143 | # COMMAND ---------- 144 | 145 | from pyspark.sql.functions import upper 146 | 147 | drivers_day2_df = spark.read \ 148 | .option("inferSchema", True) \ 149 | .json("/mnt/formula1dl/raw/2021-03-28/drivers.json") \ 150 | .filter("driverId BETWEEN 6 AND 15") \ 151 | .select("driverId", "dob", upper("name.forename").alias("forename"), upper("name.surname").alias("surname")) 152 | 153 | # COMMAND ---------- 154 | 155 | drivers_day2_df.createOrReplaceTempView("drivers_day2") 156 | 157 | # COMMAND ---------- 158 | 159 | display(drivers_day2_df) 160 | 161 | # COMMAND ---------- 162 | 163 | from pyspark.sql.functions import upper 164 | 165 | drivers_day3_df = spark.read \ 166 | .option("inferSchema", True) \ 167 | .json("/mnt/formula1dl/raw/2021-03-28/drivers.json") \ 168 | .filter("driverId BETWEEN 1 AND 5 OR driverId BETWEEN 16 AND 20") \ 169 | .select("driverId", "dob", upper("name.forename").alias("forename"), upper("name.surname").alias("surname")) 170 | 171 | # COMMAND ---------- 172 | 173 | # MAGIC %sql 174 | # MAGIC CREATE TABLE IF NOT EXISTS f1_demo.drivers_merge ( 175 | # MAGIC driverId INT, 176 | # MAGIC dob DATE, 177 | # MAGIC forename STRING, 178 | # MAGIC surname STRING, 179 | # MAGIC createdDate DATE, 180 | # MAGIC updatedDate DATE 181 | # MAGIC ) 182 | # MAGIC USING DELTA 183 | 184 | # COMMAND ---------- 185 | 186 | # MAGIC %md Day1 187 | 188 | # COMMAND ---------- 189 | 190 | # MAGIC %sql 191 | # MAGIC MERGE INTO f1_demo.drivers_merge tgt 192 | # MAGIC USING drivers_day1 upd 193 | # MAGIC ON tgt.driverId = upd.driverId 194 | # MAGIC WHEN MATCHED THEN 195 | # MAGIC UPDATE SET tgt.dob = upd.dob, 196 | # MAGIC tgt.forename = upd.forename, 197 | # MAGIC tgt.surname = upd.surname, 198 | # MAGIC tgt.updatedDate = current_timestamp 199 | # MAGIC WHEN NOT MATCHED 200 | # MAGIC THEN INSERT (driverId, dob, forename,surname,createdDate ) VALUES (driverId, dob, forename,surname, current_timestamp) 201 | 202 | # COMMAND ---------- 203 | 204 | # MAGIC %sql SELECT * FROM f1_demo.drivers_merge; 205 | 206 | # COMMAND ---------- 207 | 208 | # MAGIC %md 209 | # MAGIC Day 2 210 | 211 | # COMMAND ---------- 212 | 213 | # MAGIC %sql 214 | # MAGIC MERGE INTO f1_demo.drivers_merge tgt 215 | # MAGIC USING drivers_day2 upd 216 | # MAGIC ON tgt.driverId = upd.driverId 217 | # MAGIC WHEN MATCHED THEN 218 | # MAGIC UPDATE SET tgt.dob = upd.dob, 219 | # MAGIC tgt.forename = upd.forename, 220 | # MAGIC tgt.surname = upd.surname, 221 | # MAGIC tgt.updatedDate = current_timestamp 222 | # MAGIC WHEN NOT MATCHED 223 | # MAGIC THEN INSERT (driverId, dob, forename,surname,createdDate ) VALUES (driverId, dob, forename,surname, current_timestamp) 224 | 225 | # COMMAND ---------- 226 | 227 | # MAGIC %sql SELECT * FROM f1_demo.drivers_merge; 228 | 229 | # COMMAND ---------- 230 | 231 | # MAGIC %md 232 | # MAGIC Day 3 233 | 234 | # COMMAND ---------- 235 | 236 | from pyspark.sql.functions import current_timestamp 237 | from delta.tables import DeltaTable 238 | 239 | deltaTable = DeltaTable.forPath(spark, "/mnt/formula1dl/demo/drivers_merge") 240 | 241 | deltaTable.alias("tgt").merge( 242 | drivers_day3_df.alias("upd"), 243 | "tgt.driverId = upd.driverId") \ 244 | .whenMatchedUpdate(set = { "dob" : "upd.dob", "forename" : "upd.forename", "surname" : "upd.surname", "updatedDate": "current_timestamp()" } ) \ 245 | .whenNotMatchedInsert(values = 246 | { 247 | "driverId": "upd.driverId", 248 | "dob": "upd.dob", 249 | "forename" : "upd.forename", 250 | "surname" : "upd.surname", 251 | "createdDate": "current_timestamp()" 252 | } 253 | ) \ 254 | .execute() 255 | 256 | # COMMAND ---------- 257 | 258 | # MAGIC %sql SELECT * FROM f1_demo.drivers_merge; 259 | 260 | # COMMAND ---------- 261 | 262 | # MAGIC %md 263 | # MAGIC 1. History & Versioning 264 | # MAGIC 2. Time Travel 265 | # MAGIC 3. Vaccum 266 | 267 | # COMMAND ---------- 268 | 269 | # MAGIC %sql 270 | # MAGIC DESC HISTORY f1_demo.drivers_merge 271 | 272 | # COMMAND ---------- 273 | 274 | # MAGIC %sql 275 | # MAGIC SELECT * FROM f1_demo.drivers_merge VERSION AS OF 2; 276 | 277 | # COMMAND ---------- 278 | 279 | # MAGIC %sql 280 | # MAGIC SELECT * FROM f1_demo.drivers_merge TIMESTAMP AS OF '2021-06-23T15:40:33.000+0000'; 281 | 282 | # COMMAND ---------- 283 | 284 | df = spark.read.format("delta").option("timestampAsOf", '2021-06-23T15:40:33.000+0000').load("/mnt/formula1dl/demo/drivers_merge") 285 | 286 | # COMMAND ---------- 287 | 288 | display(df) 289 | 290 | # COMMAND ---------- 291 | 292 | # MAGIC %sql 293 | # MAGIC VACUUM f1_demo.drivers_merge 294 | 295 | # COMMAND ---------- 296 | 297 | # MAGIC %sql 298 | # MAGIC SELECT * FROM f1_demo.drivers_merge TIMESTAMP AS OF '2021-06-23T15:40:33.000+0000'; 299 | 300 | # COMMAND ---------- 301 | 302 | # MAGIC %sql 303 | # MAGIC SET spark.databricks.delta.retentionDurationCheck.enabled = false; 304 | # MAGIC VACUUM f1_demo.drivers_merge RETAIN 0 HOURS 305 | 306 | # COMMAND ---------- 307 | 308 | # MAGIC %sql 309 | # MAGIC SELECT * FROM f1_demo.drivers_merge TIMESTAMP AS OF '2021-06-23T15:40:33.000+0000'; 310 | 311 | # COMMAND ---------- 312 | 313 | # MAGIC %sql 314 | # MAGIC SELECT * FROM f1_demo.drivers_merge 315 | 316 | # COMMAND ---------- 317 | 318 | # MAGIC %sql 319 | # MAGIC DESC HISTORY f1_demo.drivers_merge; 320 | 321 | # COMMAND ---------- 322 | 323 | # MAGIC %sql 324 | # MAGIC DELETE FROM f1_demo.drivers_merge WHERE driverId = 1; 325 | 326 | # COMMAND ---------- 327 | 328 | # MAGIC %sql 329 | # MAGIC SELECT * FROM f1_demo.drivers_merge VERSION AS OF 3; 330 | 331 | # COMMAND ---------- 332 | 333 | # MAGIC %sql 334 | # MAGIC MERGE INTO f1_demo.drivers_merge tgt 335 | # MAGIC USING f1_demo.drivers_merge VERSION AS OF 3 src 336 | # MAGIC ON (tgt.driverId = src.driverId) 337 | # MAGIC WHEN NOT MATCHED THEN 338 | # MAGIC INSERT * 339 | 340 | # COMMAND ---------- 341 | 342 | # MAGIC %sql DESC HISTORY f1_demo.drivers_merge 343 | 344 | # COMMAND ---------- 345 | 346 | # MAGIC %sql 347 | # MAGIC SELECT * FROM f1_demo.drivers_merge 348 | 349 | # COMMAND ---------- 350 | 351 | # MAGIC %md 352 | # MAGIC Transaction Logs 353 | 354 | # COMMAND ---------- 355 | 356 | # MAGIC %sql 357 | # MAGIC CREATE TABLE IF NOT EXISTS f1_demo.drivers_txn ( 358 | # MAGIC driverId INT, 359 | # MAGIC dob DATE, 360 | # MAGIC forename STRING, 361 | # MAGIC surname STRING, 362 | # MAGIC createdDate DATE, 363 | # MAGIC updatedDate DATE 364 | # MAGIC ) 365 | # MAGIC USING DELTA 366 | 367 | # COMMAND ---------- 368 | 369 | # MAGIC %sql 370 | # MAGIC DESC HISTORY f1_demo.drivers_txn 371 | 372 | # COMMAND ---------- 373 | 374 | # MAGIC %sql 375 | # MAGIC INSERT INTO f1_demo.drivers_txn 376 | # MAGIC SELECT * FROM f1_demo.drivers_merge 377 | # MAGIC WHERE driverId = 1; 378 | 379 | # COMMAND ---------- 380 | 381 | # MAGIC %sql 382 | # MAGIC DESC HISTORY f1_demo.drivers_txn 383 | 384 | # COMMAND ---------- 385 | 386 | # MAGIC %sql 387 | # MAGIC INSERT INTO f1_demo.drivers_txn 388 | # MAGIC SELECT * FROM f1_demo.drivers_merge 389 | # MAGIC WHERE driverId = 2; 390 | 391 | # COMMAND ---------- 392 | 393 | # MAGIC %sql 394 | # MAGIC DELETE FROM f1_demo.drivers_txn 395 | # MAGIC WHERE driverId = 1; 396 | 397 | # COMMAND ---------- 398 | 399 | for driver_id in range(3, 20): 400 | spark.sql(f"""INSERT INTO f1_demo.drivers_txn 401 | SELECT * FROM f1_demo.drivers_merge 402 | WHERE driverId = {driver_id}""") 403 | 404 | # COMMAND ---------- 405 | 406 | # MAGIC %sql 407 | # MAGIC INSERT INTO f1_demo.drivers_txn 408 | # MAGIC SELECT * FROM f1_demo.drivers_merge; 409 | 410 | # COMMAND ---------- 411 | 412 | # MAGIC %md 413 | # MAGIC Convert Parquet to Delta 414 | 415 | # COMMAND ---------- 416 | 417 | # MAGIC %sql 418 | # MAGIC CREATE TABLE IF NOT EXISTS f1_demo.drivers_convert_to_delta ( 419 | # MAGIC driverId INT, 420 | # MAGIC dob DATE, 421 | # MAGIC forename STRING, 422 | # MAGIC surname STRING, 423 | # MAGIC createdDate DATE, 424 | # MAGIC updatedDate DATE 425 | # MAGIC ) 426 | # MAGIC USING PARQUET 427 | 428 | # COMMAND ---------- 429 | 430 | # MAGIC %sql 431 | # MAGIC INSERT INTO f1_demo.drivers_convert_to_delta 432 | # MAGIC SELECT * FROM f1_demo.drivers_merge 433 | 434 | # COMMAND ---------- 435 | 436 | # MAGIC %sql 437 | # MAGIC CONVERT TO DELTA f1_demo.drivers_convert_to_delta 438 | 439 | # COMMAND ---------- 440 | 441 | df = spark.table("f1_demo.drivers_convert_to_delta") 442 | 443 | # COMMAND ---------- 444 | 445 | df.write.format("parquet").save("/mnt/formula1dl/demo/drivers_convert_to_delta_new") 446 | 447 | # COMMAND ---------- 448 | 449 | # MAGIC %sql 450 | # MAGIC CONVERT TO DELTA parquet.`/mnt/formula1dl/demo/drivers_convert_to_delta_new` 451 | 452 | # COMMAND ---------- 453 | 454 | -------------------------------------------------------------------------------- /demo/2.join_demo.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC ##### Spark Join Transformation 4 | 5 | # COMMAND ---------- 6 | 7 | # MAGIC %run "../includes/configuration" 8 | 9 | # COMMAND ---------- 10 | 11 | circuits_df = spark.read.parquet(f"{processed_folder_path}/circuits") \ 12 | .filter("circuit_id < 70") \ 13 | .withColumnRenamed("name", "circuit_name") 14 | 15 | # COMMAND ---------- 16 | 17 | races_df = spark.read.parquet(f"{processed_folder_path}/races").filter("race_year = 2019") \ 18 | .withColumnRenamed("name", "race_name") 19 | 20 | # COMMAND ---------- 21 | 22 | display(circuits_df) 23 | 24 | # COMMAND ---------- 25 | 26 | display(races_df) 27 | 28 | # COMMAND ---------- 29 | 30 | # MAGIC %md 31 | # MAGIC ##### Inner Join 32 | 33 | # COMMAND ---------- 34 | 35 | race_circuits_df = circuits_df.join(races_df, circuits_df.circuit_id == races_df.circuit_id, "inner") \ 36 | .select(circuits_df.circuit_name, circuits_df.location, circuits_df.country, races_df.race_name, races_df.round) 37 | 38 | # COMMAND ---------- 39 | 40 | display(race_circuits_df) 41 | 42 | # COMMAND ---------- 43 | 44 | # MAGIC %md 45 | # MAGIC ##### Outer Joins 46 | 47 | # COMMAND ---------- 48 | 49 | # Left Outer Join 50 | race_circuits_df = circuits_df.join(races_df, circuits_df.circuit_id == races_df.circuit_id, "left") \ 51 | .select(circuits_df.circuit_name, circuits_df.location, circuits_df.country, races_df.race_name, races_df.round) 52 | 53 | # COMMAND ---------- 54 | 55 | display(race_circuits_df) 56 | 57 | # COMMAND ---------- 58 | 59 | # right Outer Join 60 | race_circuits_df = circuits_df.join(races_df, circuits_df.circuit_id == races_df.circuit_id, "right") \ 61 | .select(circuits_df.circuit_name, circuits_df.location, circuits_df.country, races_df.race_name, races_df.round) 62 | 63 | # COMMAND ---------- 64 | 65 | display(race_circuits_df) 66 | 67 | # COMMAND ---------- 68 | 69 | # full Outer Join 70 | race_circuits_df = circuits_df.join(races_df, circuits_df.circuit_id == races_df.circuit_id, "full") \ 71 | .select(circuits_df.circuit_name, circuits_df.location, circuits_df.country, races_df.race_name, races_df.round) 72 | 73 | # COMMAND ---------- 74 | 75 | display(race_circuits_df) 76 | 77 | # COMMAND ---------- 78 | 79 | # MAGIC %md 80 | # MAGIC ##### Semi Joins 81 | 82 | # COMMAND ---------- 83 | 84 | race_circuits_df = circuits_df.join(races_df, circuits_df.circuit_id == races_df.circuit_id, "semi") 85 | 86 | # COMMAND ---------- 87 | 88 | display(race_circuits_df) 89 | 90 | # COMMAND ---------- 91 | 92 | # MAGIC %md 93 | # MAGIC ##### Anti Joins 94 | 95 | # COMMAND ---------- 96 | 97 | race_circuits_df = races_df.join(circuits_df, circuits_df.circuit_id == races_df.circuit_id, "anti") 98 | 99 | # COMMAND ---------- 100 | 101 | display(race_circuits_df) 102 | 103 | # COMMAND ---------- 104 | 105 | # MAGIC %md 106 | # MAGIC ##### Cross Joins 107 | 108 | # COMMAND ---------- 109 | 110 | race_circuits_df = races_df.crossJoin(circuits_df) 111 | 112 | # COMMAND ---------- 113 | 114 | display(race_circuits_df) 115 | 116 | # COMMAND ---------- 117 | 118 | race_circuits_df.count() 119 | 120 | # COMMAND ---------- 121 | 122 | int(races_df.count()) * int(circuits_df.count()) 123 | 124 | # COMMAND ---------- 125 | 126 | -------------------------------------------------------------------------------- /demo/3.aggregation_demo.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %run "../includes/configuration" 3 | 4 | # COMMAND ---------- 5 | 6 | # MAGIC %md 7 | # MAGIC #### Aggregate functions demo 8 | 9 | # COMMAND ---------- 10 | 11 | # MAGIC %md 12 | # MAGIC ##### Built-in Aggregate functions 13 | 14 | # COMMAND ---------- 15 | 16 | race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results") 17 | 18 | # COMMAND ---------- 19 | 20 | display(race_results_df) 21 | 22 | # COMMAND ---------- 23 | 24 | demo_df = race_results_df.filter("race_year=2020") 25 | 26 | # COMMAND ---------- 27 | 28 | display(demo_df) 29 | 30 | # COMMAND ---------- 31 | 32 | from pyspark.sql.functions import count, countDistinct, sum 33 | 34 | # COMMAND ---------- 35 | 36 | demo_df.select(count("*")).show() 37 | 38 | # COMMAND ---------- 39 | 40 | demo_df.select(count("race_name")).show() 41 | 42 | # COMMAND ---------- 43 | 44 | demo_df.select(countDistinct("race_name")).show() 45 | 46 | # COMMAND ---------- 47 | 48 | demo_df.select(sum("points")).show() 49 | 50 | # COMMAND ---------- 51 | 52 | demo_df.filter("driver_name = 'Lewis Hamilton'").select(sum("points")).show() 53 | 54 | # COMMAND ---------- 55 | 56 | demo_df.filter("driver_name = 'Lewis Hamilton'").select(sum("points"), countDistinct("race_name")) \ 57 | .withColumnRenamed("sum(points)", "total_points") \ 58 | .withColumnRenamed("count(DISTINCT race_name)", "number_of_races") \ 59 | .show() 60 | 61 | # COMMAND ---------- 62 | 63 | # MAGIC %md 64 | # MAGIC ##### groupBy 65 | 66 | # COMMAND ---------- 67 | 68 | demo_df\ 69 | .groupBy("driver_name") \ 70 | .agg(sum("points").alias("total_points"), countDistinct("race_name").alias("number_of_races")) \ 71 | .show() 72 | 73 | # COMMAND ---------- 74 | 75 | # MAGIC %md 76 | # MAGIC ##### Window Functions 77 | 78 | # COMMAND ---------- 79 | 80 | demo_df = race_results_df.filter("race_year in (2019, 2020)") 81 | 82 | # COMMAND ---------- 83 | 84 | display(demo_df) 85 | 86 | # COMMAND ---------- 87 | 88 | demo_grouped_df = demo_df\ 89 | .groupBy("race_year", "driver_name") \ 90 | .agg(sum("points").alias("total_points"), countDistinct("race_name").alias("number_of_races")) 91 | 92 | # COMMAND ---------- 93 | 94 | display(demo_grouped_df) 95 | 96 | # COMMAND ---------- 97 | 98 | from pyspark.sql.window import Window 99 | from pyspark.sql.functions import desc, rank 100 | 101 | driverRankSpec = Window.partitionBy("race_year").orderBy(desc("total_points")) 102 | demo_grouped_df.withColumn("rank", rank().over(driverRankSpec)).show(100) 103 | 104 | # COMMAND ---------- 105 | 106 | -------------------------------------------------------------------------------- /demo/4.sql_temp_view_demo.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md 3 | # MAGIC #### Access dataframes using SQL 4 | # MAGIC ##### Objectives 5 | # MAGIC 1. Create temporary views on dataframes 6 | # MAGIC 2. Access the view from SQL cell 7 | # MAGIC 3. Access the view from Python cell 8 | 9 | # COMMAND ---------- 10 | 11 | # MAGIC %run "../includes/configuration" 12 | 13 | # COMMAND ---------- 14 | 15 | race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results") 16 | 17 | # COMMAND ---------- 18 | 19 | race_results_df.createOrReplaceTempView("v_race_results") 20 | 21 | # COMMAND ---------- 22 | 23 | # MAGIC %sql 24 | # MAGIC SELECT COUNT(1) 25 | # MAGIC FROM v_race_results 26 | # MAGIC WHERE race_year = 2020 27 | 28 | # COMMAND ---------- 29 | 30 | p_race_year = 2020 31 | 32 | # COMMAND ---------- 33 | 34 | race_results_2019_df = spark.sql(f"SELECT * FROM v_race_results WHERE race_year = {p_race_year}") 35 | 36 | # COMMAND ---------- 37 | 38 | display(race_results_2019_df) 39 | 40 | # COMMAND ---------- 41 | 42 | # MAGIC %md 43 | # MAGIC ##### Global Temporary Views 44 | # MAGIC 1. Create global temporary views on dataframes 45 | # MAGIC 2. Access the view from SQL cell 46 | # MAGIC 3. Access the view from Python cell 47 | # MAGIC 4. Acesss the view from another notebook 48 | 49 | # COMMAND ---------- 50 | 51 | race_results_df.createOrReplaceGlobalTempView("gv_race_results") 52 | 53 | # COMMAND ---------- 54 | 55 | # MAGIC %sql 56 | # MAGIC SHOW TABLES IN global_temp; 57 | 58 | # COMMAND ---------- 59 | 60 | # MAGIC %sql 61 | # MAGIC SELECT * 62 | # MAGIC FROM global_temp.gv_race_results; 63 | 64 | # COMMAND ---------- 65 | 66 | spark.sql("SELECT * \ 67 | FROM global_temp.gv_race_results").show() 68 | 69 | # COMMAND ---------- 70 | 71 | -------------------------------------------------------------------------------- /demo/5.sql_temp_view_demo.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %sql 3 | # MAGIC SELECT * FROM v_race_results 4 | 5 | # COMMAND ---------- 6 | 7 | # MAGIC %sql 8 | # MAGIC SELECT * 9 | # MAGIC FROM global_temp.gv_race_results; 10 | 11 | # COMMAND ---------- 12 | 13 | -------------------------------------------------------------------------------- /demo/6.sql_objects_demo.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md 3 | -- MAGIC ##### Lesson Objectives 4 | -- MAGIC 1. Spark SQL documentation 5 | -- MAGIC 1. Create Database demo 6 | -- MAGIC 1. Data tab in the UI 7 | -- MAGIC 1. SHOW command 8 | -- MAGIC 1. DESCRIBE command 9 | -- MAGIC 1. Find the current database 10 | 11 | -- COMMAND ---------- 12 | 13 | CREATE DATABASE demo; 14 | 15 | -- COMMAND ---------- 16 | 17 | CREATE DATABASE IF NOT EXISTS demo; 18 | 19 | -- COMMAND ---------- 20 | 21 | SHOW databases; 22 | 23 | -- COMMAND ---------- 24 | 25 | DESCRIBE DATABASE demo; 26 | 27 | -- COMMAND ---------- 28 | 29 | DESCRIBE DATABASE EXTENDED demo; 30 | 31 | -- COMMAND ---------- 32 | 33 | SELECT CURRENT_DATABASE(); 34 | 35 | -- COMMAND ---------- 36 | 37 | SHOW TABLES; 38 | 39 | -- COMMAND ---------- 40 | 41 | SHOW TABLES IN demo; 42 | 43 | -- COMMAND ---------- 44 | 45 | USE demo; 46 | 47 | -- COMMAND ---------- 48 | 49 | SELECT CURRENT_DATABASE(); 50 | 51 | -- COMMAND ---------- 52 | 53 | SHOW TABLES; 54 | 55 | -- COMMAND ---------- 56 | 57 | -- MAGIC %md 58 | -- MAGIC #### Managed Tables 59 | -- MAGIC ##### Learning Objectives 60 | -- MAGIC 1. Create managed table using Python 61 | -- MAGIC 1. Create managed table using SQL 62 | -- MAGIC 1. Effect of dropping a managed table 63 | -- MAGIC 1. Describe table 64 | 65 | -- COMMAND ---------- 66 | 67 | -- MAGIC %run "../includes/configuration" 68 | 69 | -- COMMAND ---------- 70 | 71 | -- MAGIC %python 72 | -- MAGIC race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results") 73 | 74 | -- COMMAND ---------- 75 | 76 | -- MAGIC %python 77 | -- MAGIC race_results_df.write.format("parquet").saveAsTable("demo.race_results_python") 78 | 79 | -- COMMAND ---------- 80 | 81 | USE demo; 82 | SHOW TABLES; 83 | 84 | -- COMMAND ---------- 85 | 86 | DESC EXTENDED race_results_python; 87 | 88 | -- COMMAND ---------- 89 | 90 | SELECT * 91 | FROM demo.race_results_python 92 | WHERE race_year = 2020; 93 | 94 | -- COMMAND ---------- 95 | 96 | CREATE TABLE demo.race_results_sql 97 | AS 98 | SELECT * 99 | FROM demo.race_results_python 100 | WHERE race_year = 2020; 101 | 102 | -- COMMAND ---------- 103 | 104 | SELECT CURRENT_DATABASE() 105 | 106 | -- COMMAND ---------- 107 | 108 | DESC EXTENDED demo.race_results_sql; 109 | 110 | -- COMMAND ---------- 111 | 112 | SHOW TABLES IN demo; 113 | 114 | -- COMMAND ---------- 115 | 116 | DROP TABLE demo.race_results_sql; 117 | 118 | -- COMMAND ---------- 119 | 120 | SHOW TABLES IN demo; 121 | 122 | -- COMMAND ---------- 123 | 124 | -- MAGIC %md 125 | -- MAGIC #### External Tables 126 | -- MAGIC ##### Learning Objectives 127 | -- MAGIC 1. Create external table using Python 128 | -- MAGIC 1. Create external table using SQL 129 | -- MAGIC 1. Effect of dropping an external table 130 | 131 | -- COMMAND ---------- 132 | 133 | -- MAGIC %python 134 | -- MAGIC race_results_df.write.format("parquet").option("path", f"{presentation_folder_path}/race_results_ext_py").saveAsTable("demo.race_results_ext_py") 135 | 136 | -- COMMAND ---------- 137 | 138 | DESC EXTENDED demo.race_results_ext_py 139 | 140 | -- COMMAND ---------- 141 | 142 | CREATE TABLE demo.race_results_ext_sql 143 | (race_year INT, 144 | race_name STRING, 145 | race_date TIMESTAMP, 146 | circuit_location STRING, 147 | driver_name STRING, 148 | driver_number INT, 149 | driver_nationality STRING, 150 | team STRING, 151 | grid INT, 152 | fastest_lap INT, 153 | race_time STRING, 154 | points FLOAT, 155 | position INT, 156 | created_date TIMESTAMP 157 | ) 158 | USING parquet 159 | LOCATION "abfss://presentation@erformula1dl.dfs.core.windows.net/race_results_ext_sql" 160 | 161 | -- COMMAND ---------- 162 | 163 | SHOW TABLES IN demo; 164 | 165 | -- COMMAND ---------- 166 | 167 | INSERT INTO demo.race_results_ext_sql 168 | SELECT * FROM demo.race_results_ext_py WHERE race_year = 2020; 169 | 170 | -- COMMAND ---------- 171 | 172 | SELECT COUNT(1) FROM demo.race_results_ext_sql; 173 | 174 | -- COMMAND ---------- 175 | 176 | SHOW TABLES IN demo; 177 | 178 | -- COMMAND ---------- 179 | 180 | DROP TABLE demo.race_results_ext_sql 181 | 182 | -- COMMAND ---------- 183 | 184 | SHOW TABLES IN demo; 185 | 186 | -- COMMAND ---------- 187 | 188 | -- MAGIC %md 189 | -- MAGIC #### Views on tables 190 | -- MAGIC ##### Learning Objectives 191 | -- MAGIC 1. Create Temp View 192 | -- MAGIC 1. Create Global Temp View 193 | -- MAGIC 1. Create Permanent View 194 | 195 | -- COMMAND ---------- 196 | 197 | SELECT CURRENT_DATABASE(); 198 | 199 | -- COMMAND ---------- 200 | 201 | CREATE OR REPLACE TEMP VIEW v_race_results 202 | AS 203 | SELECT * 204 | FROM demo.race_results_python 205 | WHERE race_year = 2018; 206 | 207 | -- COMMAND ---------- 208 | 209 | SELECT * FROM v_race_results; 210 | 211 | -- COMMAND ---------- 212 | 213 | CREATE OR REPLACE GLOBAL TEMP VIEW gv_race_results 214 | AS 215 | SELECT * 216 | FROM demo.race_results_python 217 | WHERE race_year = 2012; 218 | 219 | -- COMMAND ---------- 220 | 221 | SELECT * FROM global_temp.gv_race_results 222 | 223 | -- COMMAND ---------- 224 | 225 | SHOW TABLES IN global_temp; 226 | 227 | -- COMMAND ---------- 228 | 229 | CREATE OR REPLACE VIEW demo.pv_race_results 230 | AS 231 | SELECT * 232 | FROM demo.race_results_python 233 | WHERE race_year = 2000; 234 | 235 | -- COMMAND ---------- 236 | 237 | SHOW TABLES IN demo; 238 | 239 | -- COMMAND ---------- 240 | 241 | SELECT * FROM demo.pv_race_results; 242 | 243 | -- COMMAND ---------- 244 | 245 | -------------------------------------------------------------------------------- /demo/7.sql_basics_demo.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | SHOW DATABASES; 3 | 4 | -- COMMAND ---------- 5 | 6 | SELECT CURRENT_DATABASE() 7 | 8 | -- COMMAND ---------- 9 | 10 | USE f1_processed; 11 | 12 | -- COMMAND ---------- 13 | 14 | SHOW TABLES; 15 | 16 | -- COMMAND ---------- 17 | 18 | SELECT * 19 | FROM drivers; 20 | 21 | -- COMMAND ---------- 22 | 23 | DESC drivers; 24 | 25 | -- COMMAND ---------- 26 | 27 | SELECT * 28 | FROM drivers 29 | WHERE nationality = 'British' 30 | AND dob >= '1990-01-01'; 31 | 32 | -- COMMAND ---------- 33 | 34 | SELECT name, dob AS date_of_birth 35 | FROM drivers 36 | WHERE nationality = 'British' 37 | AND dob >= '1990-01-01'; 38 | 39 | -- COMMAND ---------- 40 | 41 | SELECT name, dob 42 | FROM drivers 43 | WHERE nationality = 'British' 44 | AND dob >= '1990-01-01' 45 | ORDER BY dob DESC; 46 | 47 | -- COMMAND ---------- 48 | 49 | SELECT * 50 | FROM drivers 51 | ORDER BY nationality ASC, 52 | dob DESC; 53 | 54 | -- COMMAND ---------- 55 | 56 | SELECT name, nationality,dob 57 | FROM drivers 58 | WHERE (nationality = 'British' 59 | AND dob >= '1990-01-01') 60 | OR nationality = 'Indian' 61 | ORDER BY dob DESC; 62 | 63 | -- COMMAND ---------- 64 | 65 | -------------------------------------------------------------------------------- /demo/8.sql_functions_demo.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | USE f1_processed; 3 | 4 | -- COMMAND ---------- 5 | 6 | SELECT *, CONCAT(driver_ref, '-', code) AS new_driver_ref 7 | FROM drivers 8 | 9 | -- COMMAND ---------- 10 | 11 | SELECT *, SPLIT(name, ' ')[0] forename, SPLIT(name, ' ')[1] surname 12 | FROM drivers 13 | 14 | -- COMMAND ---------- 15 | 16 | SELECT *, current_timestamp 17 | FROM drivers 18 | 19 | -- COMMAND ---------- 20 | 21 | SELECT *, date_format(dob, 'dd-MM-yyyy') 22 | FROM drivers 23 | 24 | -- COMMAND ---------- 25 | 26 | SELECT *, date_add(dob, 1) 27 | FROM drivers 28 | 29 | -- COMMAND ---------- 30 | 31 | SELECT COUNT(*) 32 | FROM drivers; 33 | 34 | -- COMMAND ---------- 35 | 36 | SELECT MAX(dob) 37 | FROM drivers; 38 | 39 | -- COMMAND ---------- 40 | 41 | SELECT * FROM drivers WHERE dob = '2000-05-11' 42 | 43 | -- COMMAND ---------- 44 | 45 | SELECT COUNT(*) 46 | FROM drivers 47 | WHERE nationality = 'British' ; 48 | 49 | -- COMMAND ---------- 50 | 51 | SELECT nationality, COUNT(*) 52 | FROM drivers 53 | GROUP BY nationality 54 | ORDER BY nationality; 55 | 56 | -- COMMAND ---------- 57 | 58 | SELECT nationality, COUNT(*) 59 | FROM drivers 60 | GROUP BY nationality 61 | HAVING COUNT(*) > 100 62 | ORDER BY nationality; 63 | 64 | -- COMMAND ---------- 65 | 66 | SELECT nationality, name, dob, RANK() OVER(PARTITION BY nationality ORDER BY dob DESC) AS age_rank 67 | FROM drivers 68 | ORDER BY nationality, age_rank 69 | 70 | -- COMMAND ---------- 71 | 72 | -------------------------------------------------------------------------------- /demo/9.sql_joins_demo.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | USE f1_presentation; 3 | 4 | -- COMMAND ---------- 5 | 6 | DESC driver_standings 7 | 8 | -- COMMAND ---------- 9 | 10 | CREATE OR REPLACE TEMP VIEW v_driver_standings_2018 11 | AS 12 | SELECT race_year, driver_name, team, total_points, wins, rank 13 | FROM driver_standings 14 | WHERE race_year = 2018; 15 | 16 | -- COMMAND ---------- 17 | 18 | SELECT * FROM v_driver_standings_2018 19 | 20 | -- COMMAND ---------- 21 | 22 | CREATE OR REPLACE TEMP VIEW v_driver_standings_2020 23 | AS 24 | SELECT race_year, driver_name, team, total_points, wins, rank 25 | FROM driver_standings 26 | WHERE race_year = 2020; 27 | 28 | -- COMMAND ---------- 29 | 30 | SELECT * FROM v_driver_standings_2020; 31 | 32 | -- COMMAND ---------- 33 | 34 | -- MAGIC %md 35 | -- MAGIC Inner Join 36 | 37 | -- COMMAND ---------- 38 | 39 | SELECT * 40 | FROM v_driver_standings_2018 d_2018 41 | JOIN v_driver_standings_2020 d_2020 42 | ON (d_2018.driver_name = d_2020.driver_name) 43 | 44 | -- COMMAND ---------- 45 | 46 | -- MAGIC %md 47 | -- MAGIC Left Join 48 | 49 | -- COMMAND ---------- 50 | 51 | SELECT * 52 | FROM v_driver_standings_2018 d_2018 53 | LEFT JOIN v_driver_standings_2020 d_2020 54 | ON (d_2018.driver_name = d_2020.driver_name) 55 | 56 | -- COMMAND ---------- 57 | 58 | -- MAGIC %md Right Join 59 | 60 | -- COMMAND ---------- 61 | 62 | SELECT * 63 | FROM v_driver_standings_2018 d_2018 64 | RIGHT JOIN v_driver_standings_2020 d_2020 65 | ON (d_2018.driver_name = d_2020.driver_name) 66 | 67 | -- COMMAND ---------- 68 | 69 | -- MAGIC %md 70 | -- MAGIC Full Join 71 | -- MAGIC 72 | 73 | -- COMMAND ---------- 74 | 75 | SELECT * 76 | FROM v_driver_standings_2018 d_2018 77 | FULL JOIN v_driver_standings_2020 d_2020 78 | ON (d_2018.driver_name = d_2020.driver_name) 79 | 80 | -- COMMAND ---------- 81 | 82 | -- MAGIC %md Semi Join 83 | 84 | -- COMMAND ---------- 85 | 86 | SELECT * 87 | FROM v_driver_standings_2018 d_2018 88 | SEMI JOIN v_driver_standings_2020 d_2020 89 | ON (d_2018.driver_name = d_2020.driver_name) 90 | 91 | -- COMMAND ---------- 92 | 93 | -- MAGIC %md Anti Join 94 | 95 | -- COMMAND ---------- 96 | 97 | SELECT * 98 | FROM v_driver_standings_2018 d_2018 99 | ANTI JOIN v_driver_standings_2020 d_2020 100 | ON (d_2018.driver_name = d_2020.driver_name) 101 | 102 | -- COMMAND ---------- 103 | 104 | -- MAGIC %md 105 | -- MAGIC Cross Join 106 | 107 | -- COMMAND ---------- 108 | 109 | SELECT * 110 | FROM v_driver_standings_2018 d_2018 111 | CROSS JOIN v_driver_standings_2020 d_2020 112 | 113 | -- COMMAND ---------- 114 | 115 | -------------------------------------------------------------------------------- /utils/1.prepare_for_incremental_load.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md 3 | -- MAGIC ##### Drop all the tables 4 | 5 | -- COMMAND ---------- 6 | 7 | DROP DATABASE IF EXISTS f1_processed CASCADE; 8 | 9 | -- COMMAND ---------- 10 | 11 | CREATE DATABASE IF NOT EXISTS f1_processed 12 | LOCATION "abfss://processed@erformula1dl.dfs.core.windows.net/"; 13 | 14 | -- COMMAND ---------- 15 | 16 | DROP DATABASE IF EXISTS f1_presentation CASCADE; 17 | 18 | -- COMMAND ---------- 19 | 20 | CREATE DATABASE IF NOT EXISTS f1_presentation 21 | LOCATION "abfss://presentation@erformula1dl.dfs.core.windows.net/"; 22 | 23 | -- COMMAND ---------- 24 | 25 | -------------------------------------------------------------------------------- /utils/read.md: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------