├── .gitattributes ├── .gitignore ├── 01_spark_session.ipynb ├── 02_basic_transformations_1.ipynb ├── 03_basic_transformations_2.ipynb ├── 04_string_and_dates.ipynb ├── 05_sort_union_aggregation.ipynb ├── 06_unique_data_and_window.ipynb ├── 07_joins_and_data_partitions.ipynb ├── 08_reading_from_csv_files.ipynb ├── 09_reading_complex_data_formats.ipynb ├── 10_read_json_files.ipynb ├── 11_writing_data.ipynb ├── 12_understand cluster.ipynb ├── 12_understand_cluster.py ├── 13_User_Defined_Functions.ipynb ├── 14_understand_dag_plan.ipynb ├── 15_optimizing_shuffles.ipynb ├── 16_spark_caching_techiniques.ipynb ├── 17_distributed_shared_variables.ipynb ├── 18_optimizing_joins.ipynb ├── 19_dynamic_allocation.ipynb ├── 20_skewness_and_spillage.ipynb ├── 21_aqe_spark.ipynb ├── 22_spark_sql.ipynb ├── 23_delta_lake.ipynb ├── 24_data_scanning_and_partitioning.ipynb ├── 25_delta_lake_optimization_and_z_ordering.ipynb ├── 26_run_concurrent_tasks.ipynb ├── 27_spark_memory_and_oom.ipynb ├── README.md ├── datasets ├── cities.csv ├── department_data.csv ├── emp.csv ├── emp_new.csv ├── employee_records.csv ├── new_sales-Copy1.csv ├── new_sales.csv ├── order_multiline.json ├── order_singleline.json ├── sales.csv ├── sales_data.orc ├── sales_data.parquet ├── sales_recursive │ └── sales_1 │ │ ├── ._SUCCESS.crc │ │ ├── .part-00000-5666d6d4-40f7-4238-8cb2-9596d0947d65-c000.snappy.parquet.crc │ │ ├── 1.parquet │ │ └── sales_2 │ │ ├── ._SUCCESS.crc │ │ ├── .part-00000-1d8bb305-3cd7-4253-9bbf-ac8658c7cbd2-c000.snappy.parquet.crc │ │ └── 2.parquet ├── sales_total_orc │ ├── ._SUCCESS.crc │ ├── .part-00000-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── .part-00001-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── .part-00002-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── .part-00003-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── .part-00004-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── .part-00005-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── .part-00006-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── .part-00007-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc │ ├── part-00000-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc │ ├── part-00001-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc │ ├── part-00002-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc │ ├── part-00003-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc │ ├── part-00004-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc │ ├── part-00005-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc │ ├── part-00006-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc │ └── part-00007-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc └── sales_total_parquet │ ├── ._SUCCESS.crc │ ├── .part-00000-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── .part-00001-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── .part-00002-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── .part-00003-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── .part-00004-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── .part-00005-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── .part-00006-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── .part-00007-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc │ ├── part-00000-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet │ ├── part-00001-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet │ ├── part-00002-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet │ ├── part-00003-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet │ ├── part-00004-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet │ ├── part-00005-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet │ ├── part-00006-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet │ └── part-00007-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet ├── examples └── emp.csv └── spark_oom_files.7z /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/.gitignore -------------------------------------------------------------------------------- /01_spark_session.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/01_spark_session.ipynb -------------------------------------------------------------------------------- /02_basic_transformations_1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/02_basic_transformations_1.ipynb -------------------------------------------------------------------------------- /03_basic_transformations_2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/03_basic_transformations_2.ipynb -------------------------------------------------------------------------------- /04_string_and_dates.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/04_string_and_dates.ipynb -------------------------------------------------------------------------------- /05_sort_union_aggregation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/05_sort_union_aggregation.ipynb -------------------------------------------------------------------------------- /06_unique_data_and_window.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/06_unique_data_and_window.ipynb -------------------------------------------------------------------------------- /07_joins_and_data_partitions.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/07_joins_and_data_partitions.ipynb -------------------------------------------------------------------------------- /08_reading_from_csv_files.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/08_reading_from_csv_files.ipynb -------------------------------------------------------------------------------- /09_reading_complex_data_formats.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/09_reading_complex_data_formats.ipynb -------------------------------------------------------------------------------- /10_read_json_files.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/10_read_json_files.ipynb -------------------------------------------------------------------------------- /11_writing_data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/11_writing_data.ipynb -------------------------------------------------------------------------------- /12_understand cluster.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/12_understand cluster.ipynb -------------------------------------------------------------------------------- /12_understand_cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/12_understand_cluster.py -------------------------------------------------------------------------------- /13_User_Defined_Functions.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/13_User_Defined_Functions.ipynb -------------------------------------------------------------------------------- /14_understand_dag_plan.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/14_understand_dag_plan.ipynb -------------------------------------------------------------------------------- /15_optimizing_shuffles.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/15_optimizing_shuffles.ipynb -------------------------------------------------------------------------------- /16_spark_caching_techiniques.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/16_spark_caching_techiniques.ipynb -------------------------------------------------------------------------------- /17_distributed_shared_variables.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/17_distributed_shared_variables.ipynb -------------------------------------------------------------------------------- /18_optimizing_joins.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/18_optimizing_joins.ipynb -------------------------------------------------------------------------------- /19_dynamic_allocation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/19_dynamic_allocation.ipynb -------------------------------------------------------------------------------- /20_skewness_and_spillage.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/20_skewness_and_spillage.ipynb -------------------------------------------------------------------------------- /21_aqe_spark.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/21_aqe_spark.ipynb -------------------------------------------------------------------------------- /22_spark_sql.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/22_spark_sql.ipynb -------------------------------------------------------------------------------- /23_delta_lake.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/23_delta_lake.ipynb -------------------------------------------------------------------------------- /24_data_scanning_and_partitioning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/24_data_scanning_and_partitioning.ipynb -------------------------------------------------------------------------------- /25_delta_lake_optimization_and_z_ordering.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/25_delta_lake_optimization_and_z_ordering.ipynb -------------------------------------------------------------------------------- /26_run_concurrent_tasks.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/26_run_concurrent_tasks.ipynb -------------------------------------------------------------------------------- /27_spark_memory_and_oom.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/27_spark_memory_and_oom.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/README.md -------------------------------------------------------------------------------- /datasets/cities.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/cities.csv -------------------------------------------------------------------------------- /datasets/department_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/department_data.csv -------------------------------------------------------------------------------- /datasets/emp.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/emp.csv -------------------------------------------------------------------------------- /datasets/emp_new.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/emp_new.csv -------------------------------------------------------------------------------- /datasets/employee_records.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/employee_records.csv -------------------------------------------------------------------------------- /datasets/new_sales-Copy1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/new_sales-Copy1.csv -------------------------------------------------------------------------------- /datasets/new_sales.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/new_sales.csv -------------------------------------------------------------------------------- /datasets/order_multiline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/order_multiline.json -------------------------------------------------------------------------------- /datasets/order_singleline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/order_singleline.json -------------------------------------------------------------------------------- /datasets/sales.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales.csv -------------------------------------------------------------------------------- /datasets/sales_data.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_data.orc -------------------------------------------------------------------------------- /datasets/sales_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_data.parquet -------------------------------------------------------------------------------- /datasets/sales_recursive/sales_1/._SUCCESS.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_recursive/sales_1/._SUCCESS.crc -------------------------------------------------------------------------------- /datasets/sales_recursive/sales_1/.part-00000-5666d6d4-40f7-4238-8cb2-9596d0947d65-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_recursive/sales_1/.part-00000-5666d6d4-40f7-4238-8cb2-9596d0947d65-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_recursive/sales_1/1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_recursive/sales_1/1.parquet -------------------------------------------------------------------------------- /datasets/sales_recursive/sales_1/sales_2/._SUCCESS.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_recursive/sales_1/sales_2/._SUCCESS.crc -------------------------------------------------------------------------------- /datasets/sales_recursive/sales_1/sales_2/.part-00000-1d8bb305-3cd7-4253-9bbf-ac8658c7cbd2-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_recursive/sales_1/sales_2/.part-00000-1d8bb305-3cd7-4253-9bbf-ac8658c7cbd2-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_recursive/sales_1/sales_2/2.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_recursive/sales_1/sales_2/2.parquet -------------------------------------------------------------------------------- /datasets/sales_total_orc/._SUCCESS.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/._SUCCESS.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00000-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00000-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00001-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00001-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00002-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00002-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00003-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00003-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00004-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00004-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00005-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00005-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00006-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00006-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/.part-00007-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/.part-00007-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc.crc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00000-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00000-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00001-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00001-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00002-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00002-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00003-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00003-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00004-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00004-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00005-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00005-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00006-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00006-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_orc/part-00007-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_orc/part-00007-d8adfd02-1b5d-4ec5-a4c2-8323a54d7bbf-c000.snappy.orc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/._SUCCESS.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/._SUCCESS.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00000-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00000-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00001-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00001-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00002-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00002-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00003-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00003-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00004-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00004-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00005-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00005-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00006-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00006-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/.part-00007-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/.part-00007-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00000-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00000-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00001-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00001-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00002-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00002-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00003-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00003-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00004-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00004-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00005-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00005-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00006-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00006-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /datasets/sales_total_parquet/part-00007-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/datasets/sales_total_parquet/part-00007-ddf48a6d-f4d3-44ad-8912-f4482603026b-c000.snappy.parquet -------------------------------------------------------------------------------- /examples/emp.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/examples/emp.csv -------------------------------------------------------------------------------- /spark_oom_files.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/subhamkharwal/pyspark-zero-to-hero/HEAD/spark_oom_files.7z --------------------------------------------------------------------------------