├── .gitignore ├── 10_Union_vs_Union_all.ipynb ├── 11_S3_bucket_transformation.ipynb ├── 1_USER_DEFINED_SCHEMA_IN_SPARK.ipynb ├── 2_HANDELING_CORRUPTED_RECORDS_IN_SPARK.ipynb ├── 3_READ_FILES_IN_SPARK.ipynb ├── 4_JSON_DATA_READS.ipynb ├── 5_WRITING_FILES.ipynb ├── 6_CREATING_DATAFRAMES.ipynb ├── 7_FUNDAMENTAL_DATAFRAME_OPERATIONS.ipynb ├── 8_write_CSV_files_In_Databricks.ipynb ├── 9_Transformation_in_spark.ipynb ├── FILES_THAT_I_USED ├── JSON │ ├── Multi_line_incorrect.json │ ├── corrupted_json.json │ └── multiline_correct_json.json ├── TO_READ │ ├── corruption.csv │ └── flight_data.csv └── TO_WRITE │ └── write_fn.csv └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | git_push.sh 2 | -------------------------------------------------------------------------------- /10_Union_vs_Union_all.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/10_Union_vs_Union_all.ipynb -------------------------------------------------------------------------------- /11_S3_bucket_transformation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/11_S3_bucket_transformation.ipynb -------------------------------------------------------------------------------- /1_USER_DEFINED_SCHEMA_IN_SPARK.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/1_USER_DEFINED_SCHEMA_IN_SPARK.ipynb -------------------------------------------------------------------------------- /2_HANDELING_CORRUPTED_RECORDS_IN_SPARK.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/2_HANDELING_CORRUPTED_RECORDS_IN_SPARK.ipynb -------------------------------------------------------------------------------- /3_READ_FILES_IN_SPARK.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/3_READ_FILES_IN_SPARK.ipynb -------------------------------------------------------------------------------- /4_JSON_DATA_READS.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/4_JSON_DATA_READS.ipynb -------------------------------------------------------------------------------- /5_WRITING_FILES.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/5_WRITING_FILES.ipynb -------------------------------------------------------------------------------- /6_CREATING_DATAFRAMES.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/6_CREATING_DATAFRAMES.ipynb -------------------------------------------------------------------------------- /7_FUNDAMENTAL_DATAFRAME_OPERATIONS.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/7_FUNDAMENTAL_DATAFRAME_OPERATIONS.ipynb -------------------------------------------------------------------------------- /8_write_CSV_files_In_Databricks.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/8_write_CSV_files_In_Databricks.ipynb -------------------------------------------------------------------------------- /9_Transformation_in_spark.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/9_Transformation_in_spark.ipynb -------------------------------------------------------------------------------- /FILES_THAT_I_USED/JSON/Multi_line_incorrect.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/FILES_THAT_I_USED/JSON/Multi_line_incorrect.json -------------------------------------------------------------------------------- /FILES_THAT_I_USED/JSON/corrupted_json.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/FILES_THAT_I_USED/JSON/corrupted_json.json -------------------------------------------------------------------------------- /FILES_THAT_I_USED/JSON/multiline_correct_json.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/FILES_THAT_I_USED/JSON/multiline_correct_json.json -------------------------------------------------------------------------------- /FILES_THAT_I_USED/TO_READ/corruption.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/FILES_THAT_I_USED/TO_READ/corruption.csv -------------------------------------------------------------------------------- /FILES_THAT_I_USED/TO_READ/flight_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/FILES_THAT_I_USED/TO_READ/flight_data.csv -------------------------------------------------------------------------------- /FILES_THAT_I_USED/TO_WRITE/write_fn.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/FILES_THAT_I_USED/TO_WRITE/write_fn.csv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aaditya-git/PYSPARK/HEAD/README.md --------------------------------------------------------------------------------