├── .gitignore ├── README.md ├── data ├── GreenTaxi │ └── green_tripdata_2019-12.csv ├── YellowTaxi │ ├── yellow_tripdata_2021-01.parquet │ └── yellow_tripdata_2022-01.parquet └── nyctaxi │ └── taxizone │ └── taxi_rate_code.csv ├── db-notebooks ├── Chapter-03 │ ├── 00 - Chapter Initialization.py │ ├── 01 - Hive Databases.py │ ├── 02 - CreateDeltaTablesWithSQL.py │ ├── 03 - The Describe Command.py │ ├── 04 - The DataFrameWriter API.py │ ├── 05 - The DeltaTableBuilder API.py │ ├── 06 - Generated Columns.py │ ├── 07 - Read Delta Table with SQL.py │ ├── 08 - Read Table with PySpark.py │ ├── 10 - Writing To a Delta Table.py │ ├── 11 - Partitions.py │ └── 12 - User Defined Metadata.py ├── Chapter-04 │ ├── 00 - Chapter Initialization.py │ ├── 01 - Delete Operations.py │ ├── 02 - Update Operations.py │ └── 03 - Merge Operations.py ├── Chapter-05 │ ├── 00 - Chapter Initialization.py │ ├── 01 - Data Skipping.py │ ├── 02 - Partitioning.py │ ├── 03 - Compaction, Optimize and ZOrder.py │ └── 04 - LiquidClustering.py ├── Chapter-06 │ ├── 00 - Chapter Initialization.py │ ├── 01 - Time Travel.py │ └── 02 - Change Data Feed.py ├── Chapter-07 │ ├── 00 - Chapter Initialization.py │ ├── 01 - Schema Enforcement.py │ ├── 02 - Schema Evolution.py │ └── 03 - Explicit Schema Updates.py ├── Chapter-08 │ ├── 00 - Chapter Initialization.py │ ├── 01 - Simple Streaming.py │ ├── 02 - AvailableNow Streaming.py │ └── 03 - Change Data Feed.py ├── Chapter-10 │ └── 01 - Sharing Example.py ├── DeltaLakeUpAndRunning.dbc └── README.md └── python └── chapter02 ├── helloDeltaLake.py ├── manualPartitioning.py ├── multipleWriteOperations.py ├── readCheckPointFile.py ├── transactionLogCheckPointExample.py ├── updateOperation.py ├── writeDeltaFile.py └── writeParquetFile.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/README.md -------------------------------------------------------------------------------- /data/GreenTaxi/green_tripdata_2019-12.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/data/GreenTaxi/green_tripdata_2019-12.csv -------------------------------------------------------------------------------- /data/YellowTaxi/yellow_tripdata_2021-01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/data/YellowTaxi/yellow_tripdata_2021-01.parquet -------------------------------------------------------------------------------- /data/YellowTaxi/yellow_tripdata_2022-01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/data/YellowTaxi/yellow_tripdata_2022-01.parquet -------------------------------------------------------------------------------- /data/nyctaxi/taxizone/taxi_rate_code.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/data/nyctaxi/taxizone/taxi_rate_code.csv -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/00 - Chapter Initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/00 - Chapter Initialization.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/01 - Hive Databases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/01 - Hive Databases.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/02 - CreateDeltaTablesWithSQL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/02 - CreateDeltaTablesWithSQL.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/03 - The Describe Command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/03 - The Describe Command.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/04 - The DataFrameWriter API.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/04 - The DataFrameWriter API.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/05 - The DeltaTableBuilder API.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/05 - The DeltaTableBuilder API.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/06 - Generated Columns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/06 - Generated Columns.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/07 - Read Delta Table with SQL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/07 - Read Delta Table with SQL.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/08 - Read Table with PySpark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/08 - Read Table with PySpark.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/10 - Writing To a Delta Table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/10 - Writing To a Delta Table.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/11 - Partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/11 - Partitions.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-03/12 - User Defined Metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-03/12 - User Defined Metadata.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-04/00 - Chapter Initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-04/00 - Chapter Initialization.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-04/01 - Delete Operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-04/01 - Delete Operations.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-04/02 - Update Operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-04/02 - Update Operations.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-04/03 - Merge Operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-04/03 - Merge Operations.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-05/00 - Chapter Initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-05/00 - Chapter Initialization.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-05/01 - Data Skipping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-05/01 - Data Skipping.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-05/02 - Partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-05/02 - Partitioning.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-05/03 - Compaction, Optimize and ZOrder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-05/03 - Compaction, Optimize and ZOrder.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-05/04 - LiquidClustering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-05/04 - LiquidClustering.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-06/00 - Chapter Initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-06/00 - Chapter Initialization.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-06/01 - Time Travel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-06/01 - Time Travel.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-06/02 - Change Data Feed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-06/02 - Change Data Feed.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-07/00 - Chapter Initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-07/00 - Chapter Initialization.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-07/01 - Schema Enforcement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-07/01 - Schema Enforcement.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-07/02 - Schema Evolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-07/02 - Schema Evolution.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-07/03 - Explicit Schema Updates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-07/03 - Explicit Schema Updates.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-08/00 - Chapter Initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-08/00 - Chapter Initialization.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-08/01 - Simple Streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-08/01 - Simple Streaming.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-08/02 - AvailableNow Streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-08/02 - AvailableNow Streaming.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-08/03 - Change Data Feed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-08/03 - Change Data Feed.py -------------------------------------------------------------------------------- /db-notebooks/Chapter-10/01 - Sharing Example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/Chapter-10/01 - Sharing Example.py -------------------------------------------------------------------------------- /db-notebooks/DeltaLakeUpAndRunning.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/DeltaLakeUpAndRunning.dbc -------------------------------------------------------------------------------- /db-notebooks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/db-notebooks/README.md -------------------------------------------------------------------------------- /python/chapter02/helloDeltaLake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/helloDeltaLake.py -------------------------------------------------------------------------------- /python/chapter02/manualPartitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/manualPartitioning.py -------------------------------------------------------------------------------- /python/chapter02/multipleWriteOperations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/multipleWriteOperations.py -------------------------------------------------------------------------------- /python/chapter02/readCheckPointFile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/readCheckPointFile.py -------------------------------------------------------------------------------- /python/chapter02/transactionLogCheckPointExample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/transactionLogCheckPointExample.py -------------------------------------------------------------------------------- /python/chapter02/updateOperation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/updateOperation.py -------------------------------------------------------------------------------- /python/chapter02/writeDeltaFile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/writeDeltaFile.py -------------------------------------------------------------------------------- /python/chapter02/writeParquetFile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benniehaelen/delta-lake-up-and-running/HEAD/python/chapter02/writeParquetFile.py --------------------------------------------------------------------------------