├── .gitignore ├── LICENSE ├── Labs ├── Lab 1 Midway RCC and mpi4py │ ├── midway_cheat_sheet.md │ ├── mpi.sbatch │ ├── mpi_multi_job.sbatch │ └── mpi_rand_walk.py ├── Lab 2 PyOpenCL │ ├── Lab_2_PyOpenCL_Random_Walk_Tutorial.ipynb │ ├── gpu.sbatch │ ├── gpu_rand_walk.py │ └── print_gpu_info.py ├── Lab 3 AWS EC2 and PyWren │ ├── Lab_3_PyWren.ipynb │ └── pywren_workflow.png ├── Lab 4 Accessing Large-Scale Data in S3 │ └── Lab 4 Working with Large Data Sources in S3.ipynb ├── Lab 5 Ingesting and Processing Large-Scale Data │ ├── Part I MapReduce │ │ ├── .mrjob.conf │ │ ├── mapreduce_lab5.py │ │ ├── mrjob_cheatsheet.md │ │ └── sample_us.tsv │ └── Part II Kinesis │ │ ├── Lab 5 Kinesis.ipynb │ │ ├── consumer.py │ │ ├── consumer_feed.png │ │ ├── producer.py │ │ └── simple_kinesis_architecture.png ├── Lab 6 PySpark EDA and ML in an EMR Notebook │ ├── Lab_6.ipynb │ └── Local_Colab_Spark_Setup.ipynb └── Lab 7 Large-Scale Graph Processing with PySpark │ ├── Lab_7_GraphFrames.ipynb │ ├── edges.csv │ └── nodes.csv └── README.md /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/LICENSE -------------------------------------------------------------------------------- /Labs/Lab 1 Midway RCC and mpi4py/midway_cheat_sheet.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 1 Midway RCC and mpi4py/midway_cheat_sheet.md -------------------------------------------------------------------------------- /Labs/Lab 1 Midway RCC and mpi4py/mpi.sbatch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 1 Midway RCC and mpi4py/mpi.sbatch -------------------------------------------------------------------------------- /Labs/Lab 1 Midway RCC and mpi4py/mpi_multi_job.sbatch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 1 Midway RCC and mpi4py/mpi_multi_job.sbatch -------------------------------------------------------------------------------- /Labs/Lab 1 Midway RCC and mpi4py/mpi_rand_walk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 1 Midway RCC and mpi4py/mpi_rand_walk.py -------------------------------------------------------------------------------- /Labs/Lab 2 PyOpenCL/Lab_2_PyOpenCL_Random_Walk_Tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 2 PyOpenCL/Lab_2_PyOpenCL_Random_Walk_Tutorial.ipynb -------------------------------------------------------------------------------- /Labs/Lab 2 PyOpenCL/gpu.sbatch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 2 PyOpenCL/gpu.sbatch -------------------------------------------------------------------------------- /Labs/Lab 2 PyOpenCL/gpu_rand_walk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 2 PyOpenCL/gpu_rand_walk.py -------------------------------------------------------------------------------- /Labs/Lab 2 PyOpenCL/print_gpu_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 2 PyOpenCL/print_gpu_info.py -------------------------------------------------------------------------------- /Labs/Lab 3 AWS EC2 and PyWren/Lab_3_PyWren.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 3 AWS EC2 and PyWren/Lab_3_PyWren.ipynb -------------------------------------------------------------------------------- /Labs/Lab 3 AWS EC2 and PyWren/pywren_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 3 AWS EC2 and PyWren/pywren_workflow.png -------------------------------------------------------------------------------- /Labs/Lab 4 Accessing Large-Scale Data in S3/Lab 4 Working with Large Data Sources in S3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 4 Accessing Large-Scale Data in S3/Lab 4 Working with Large Data Sources in S3.ipynb -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/.mrjob.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/.mrjob.conf -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/mapreduce_lab5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/mapreduce_lab5.py -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/mrjob_cheatsheet.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/mrjob_cheatsheet.md -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/sample_us.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part I MapReduce/sample_us.tsv -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/Lab 5 Kinesis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/Lab 5 Kinesis.ipynb -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/consumer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/consumer.py -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/consumer_feed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/consumer_feed.png -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/producer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/producer.py -------------------------------------------------------------------------------- /Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/simple_kinesis_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 5 Ingesting and Processing Large-Scale Data/Part II Kinesis/simple_kinesis_architecture.png -------------------------------------------------------------------------------- /Labs/Lab 6 PySpark EDA and ML in an EMR Notebook/Lab_6.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 6 PySpark EDA and ML in an EMR Notebook/Lab_6.ipynb -------------------------------------------------------------------------------- /Labs/Lab 6 PySpark EDA and ML in an EMR Notebook/Local_Colab_Spark_Setup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 6 PySpark EDA and ML in an EMR Notebook/Local_Colab_Spark_Setup.ipynb -------------------------------------------------------------------------------- /Labs/Lab 7 Large-Scale Graph Processing with PySpark/Lab_7_GraphFrames.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 7 Large-Scale Graph Processing with PySpark/Lab_7_GraphFrames.ipynb -------------------------------------------------------------------------------- /Labs/Lab 7 Large-Scale Graph Processing with PySpark/edges.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 7 Large-Scale Graph Processing with PySpark/edges.csv -------------------------------------------------------------------------------- /Labs/Lab 7 Large-Scale Graph Processing with PySpark/nodes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/Labs/Lab 7 Large-Scale Graph Processing with PySpark/nodes.csv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonclindaniel/LargeScaleComputing_S20/HEAD/README.md --------------------------------------------------------------------------------