├── Datasets ├── kaggle_housing │ ├── data_description.txt │ ├── sample_submission.csv │ ├── test.csv │ └── train.csv ├── kaggle_titanic │ ├── gender_submission.csv │ ├── test.csv │ └── train.csv └── movielens │ ├── README.txt │ ├── links.csv │ ├── movies.csv │ ├── ratings.csv │ └── tags.csv ├── LICENSE.md ├── Modules ├── 01 Introduction │ ├── Labs │ │ ├── Lab 1 - Introducing Apache Spark.ipynb │ │ ├── Lab 2 - Transformations on RDD.ipynb │ │ ├── Lab 3 - Actions on RDD.ipynb │ │ └── Lab 4 - Advanced Transformations on RDD.ipynb │ └── Lectures │ │ └── Lecture 1 - Introduction to the Course.pdf ├── 02 SQL and DataFrame │ ├── Lab 5 - Spark SQL and Data Frames.ipynb │ ├── Lab 6 - Data Frame Operations.ipynb │ └── Weather in Australia Challenge.ipynb ├── 03 Batch Processing │ ├── Lecture 2 - Building Batch Applications.pdf │ └── Lecture 3 - Our ETL Application.pdf ├── 04 Stream Processing │ ├── Labs │ │ ├── Lab 7 - Structured Streaming.ipynb │ │ ├── Lab 8 - Structured Streaming Kinesis.ipynb │ │ └── push_data_to_kinesis.py │ └── lectures │ │ ├── Lecture 4 - Streaming Application.pdf │ │ └── Lecture 5 - Spark Streaming vs Structured Streaming.pdf └── 05 Machine Learning │ ├── Lab │ ├── Classification.ipynb │ └── Regression.ipynb │ └── Lecture 6 - Machine Learning.pdf ├── README.md ├── Resources ├── A_Gentle_Introduction_to_Apache_Spark.pdf ├── Import_Data_to_Databricks.pdf ├── PySpark_Cheat_Sheet_for_Python.pdf ├── PySpark_SQL_Cheat_Sheet_Python.pdf └── Python_Cheat_Sheet.pdf └── Scripts ├── ec2_install_env.sh ├── ec2_install_superset.sh └── transform_load_example_script_for_project_1.py /Datasets/kaggle_housing/data_description.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/kaggle_housing/data_description.txt -------------------------------------------------------------------------------- /Datasets/kaggle_housing/sample_submission.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/kaggle_housing/sample_submission.csv -------------------------------------------------------------------------------- /Datasets/kaggle_housing/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/kaggle_housing/test.csv -------------------------------------------------------------------------------- /Datasets/kaggle_housing/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/kaggle_housing/train.csv -------------------------------------------------------------------------------- /Datasets/kaggle_titanic/gender_submission.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/kaggle_titanic/gender_submission.csv -------------------------------------------------------------------------------- /Datasets/kaggle_titanic/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/kaggle_titanic/test.csv -------------------------------------------------------------------------------- /Datasets/kaggle_titanic/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/kaggle_titanic/train.csv -------------------------------------------------------------------------------- /Datasets/movielens/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/movielens/README.txt -------------------------------------------------------------------------------- /Datasets/movielens/links.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/movielens/links.csv -------------------------------------------------------------------------------- /Datasets/movielens/movies.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/movielens/movies.csv -------------------------------------------------------------------------------- /Datasets/movielens/ratings.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/movielens/ratings.csv -------------------------------------------------------------------------------- /Datasets/movielens/tags.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Datasets/movielens/tags.csv -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/LICENSE.md -------------------------------------------------------------------------------- /Modules/01 Introduction/Labs/Lab 1 - Introducing Apache Spark.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/01 Introduction/Labs/Lab 1 - Introducing Apache Spark.ipynb -------------------------------------------------------------------------------- /Modules/01 Introduction/Labs/Lab 2 - Transformations on RDD.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/01 Introduction/Labs/Lab 2 - Transformations on RDD.ipynb -------------------------------------------------------------------------------- /Modules/01 Introduction/Labs/Lab 3 - Actions on RDD.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/01 Introduction/Labs/Lab 3 - Actions on RDD.ipynb -------------------------------------------------------------------------------- /Modules/01 Introduction/Labs/Lab 4 - Advanced Transformations on RDD.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/01 Introduction/Labs/Lab 4 - Advanced Transformations on RDD.ipynb -------------------------------------------------------------------------------- /Modules/01 Introduction/Lectures/Lecture 1 - Introduction to the Course.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/01 Introduction/Lectures/Lecture 1 - Introduction to the Course.pdf -------------------------------------------------------------------------------- /Modules/02 SQL and DataFrame/Lab 5 - Spark SQL and Data Frames.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/02 SQL and DataFrame/Lab 5 - Spark SQL and Data Frames.ipynb -------------------------------------------------------------------------------- /Modules/02 SQL and DataFrame/Lab 6 - Data Frame Operations.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/02 SQL and DataFrame/Lab 6 - Data Frame Operations.ipynb -------------------------------------------------------------------------------- /Modules/02 SQL and DataFrame/Weather in Australia Challenge.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/02 SQL and DataFrame/Weather in Australia Challenge.ipynb -------------------------------------------------------------------------------- /Modules/03 Batch Processing/Lecture 2 - Building Batch Applications.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/03 Batch Processing/Lecture 2 - Building Batch Applications.pdf -------------------------------------------------------------------------------- /Modules/03 Batch Processing/Lecture 3 - Our ETL Application.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/03 Batch Processing/Lecture 3 - Our ETL Application.pdf -------------------------------------------------------------------------------- /Modules/04 Stream Processing/Labs/Lab 7 - Structured Streaming.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/04 Stream Processing/Labs/Lab 7 - Structured Streaming.ipynb -------------------------------------------------------------------------------- /Modules/04 Stream Processing/Labs/Lab 8 - Structured Streaming Kinesis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/04 Stream Processing/Labs/Lab 8 - Structured Streaming Kinesis.ipynb -------------------------------------------------------------------------------- /Modules/04 Stream Processing/Labs/push_data_to_kinesis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/04 Stream Processing/Labs/push_data_to_kinesis.py -------------------------------------------------------------------------------- /Modules/04 Stream Processing/lectures/Lecture 4 - Streaming Application.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/04 Stream Processing/lectures/Lecture 4 - Streaming Application.pdf -------------------------------------------------------------------------------- /Modules/04 Stream Processing/lectures/Lecture 5 - Spark Streaming vs Structured Streaming.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/04 Stream Processing/lectures/Lecture 5 - Spark Streaming vs Structured Streaming.pdf -------------------------------------------------------------------------------- /Modules/05 Machine Learning/Lab/Classification.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/05 Machine Learning/Lab/Classification.ipynb -------------------------------------------------------------------------------- /Modules/05 Machine Learning/Lab/Regression.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/05 Machine Learning/Lab/Regression.ipynb -------------------------------------------------------------------------------- /Modules/05 Machine Learning/Lecture 6 - Machine Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Modules/05 Machine Learning/Lecture 6 - Machine Learning.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/README.md -------------------------------------------------------------------------------- /Resources/A_Gentle_Introduction_to_Apache_Spark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Resources/A_Gentle_Introduction_to_Apache_Spark.pdf -------------------------------------------------------------------------------- /Resources/Import_Data_to_Databricks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Resources/Import_Data_to_Databricks.pdf -------------------------------------------------------------------------------- /Resources/PySpark_Cheat_Sheet_for_Python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Resources/PySpark_Cheat_Sheet_for_Python.pdf -------------------------------------------------------------------------------- /Resources/PySpark_SQL_Cheat_Sheet_Python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Resources/PySpark_SQL_Cheat_Sheet_Python.pdf -------------------------------------------------------------------------------- /Resources/Python_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Resources/Python_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /Scripts/ec2_install_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Scripts/ec2_install_env.sh -------------------------------------------------------------------------------- /Scripts/ec2_install_superset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Scripts/ec2_install_superset.sh -------------------------------------------------------------------------------- /Scripts/transform_load_example_script_for_project_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osin-vladimir/architect_big_data_solutions_with_spark/HEAD/Scripts/transform_load_example_script_for_project_1.py --------------------------------------------------------------------------------