├── .gitignore ├── HadoopWindowsUserSetup.md ├── README.md ├── docker-clean.sh ├── project └── build.properties ├── spark-cluster ├── README.md ├── build-images.sh ├── docker-compose.yml ├── docker │ ├── base │ │ └── Dockerfile │ ├── spark-master │ │ ├── Dockerfile │ │ └── start-master.sh │ ├── spark-submit │ │ ├── Dockerfile │ │ └── spark-submit.sh │ └── spark-worker │ │ ├── Dockerfile │ │ └── start-worker.sh └── env │ └── spark-worker.sh └── src ├── META-INF └── MANIFEST.MF └── main ├── resources └── data │ ├── bands │ └── bands.json │ ├── cars │ └── cars.json │ ├── employees │ └── employees.csv │ ├── employees_headers │ └── employees_headers.csv │ ├── flights │ └── flights.json │ ├── guitarPlayers │ └── guitarPlayers.json │ ├── guitars │ └── guitars.json │ ├── lipsum │ └── words.txt │ └── movies │ └── movies.json └── scala ├── common ├── DataGenerator.scala └── GuitarsDomain.scala ├── part1recap ├── ScalaRecap.scala └── SparkRecap.scala ├── part2foundations ├── CatalystDemo.scala ├── ReadingDAGs.scala ├── ReadingQueryPlans.scala ├── SparkAPIs.scala ├── SparkJobAnatomy.scala ├── TestDeployApp.scala └── TungstenDemo.scala ├── part3caching ├── Caching.scala └── Checkpointing.scala ├── part4partitioning ├── Partitioners.scala ├── PartitioningProblems.scala └── RepartitionCoalesce.scala ├── part5boost ├── FixingDataSkews.scala ├── KryoSerializer.scala └── SerializationProblems.scala └── playground └── Playground.scala /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/.gitignore -------------------------------------------------------------------------------- /HadoopWindowsUserSetup.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/HadoopWindowsUserSetup.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/README.md -------------------------------------------------------------------------------- /docker-clean.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/docker-clean.sh -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.9.6 -------------------------------------------------------------------------------- /spark-cluster/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/README.md -------------------------------------------------------------------------------- /spark-cluster/build-images.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/build-images.sh -------------------------------------------------------------------------------- /spark-cluster/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker-compose.yml -------------------------------------------------------------------------------- /spark-cluster/docker/base/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker/base/Dockerfile -------------------------------------------------------------------------------- /spark-cluster/docker/spark-master/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker/spark-master/Dockerfile -------------------------------------------------------------------------------- /spark-cluster/docker/spark-master/start-master.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker/spark-master/start-master.sh -------------------------------------------------------------------------------- /spark-cluster/docker/spark-submit/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker/spark-submit/Dockerfile -------------------------------------------------------------------------------- /spark-cluster/docker/spark-submit/spark-submit.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker/spark-submit/spark-submit.sh -------------------------------------------------------------------------------- /spark-cluster/docker/spark-worker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker/spark-worker/Dockerfile -------------------------------------------------------------------------------- /spark-cluster/docker/spark-worker/start-worker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/docker/spark-worker/start-worker.sh -------------------------------------------------------------------------------- /spark-cluster/env/spark-worker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/spark-cluster/env/spark-worker.sh -------------------------------------------------------------------------------- /src/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/META-INF/MANIFEST.MF -------------------------------------------------------------------------------- /src/main/resources/data/bands/bands.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/bands/bands.json -------------------------------------------------------------------------------- /src/main/resources/data/cars/cars.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/cars/cars.json -------------------------------------------------------------------------------- /src/main/resources/data/employees/employees.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/employees/employees.csv -------------------------------------------------------------------------------- /src/main/resources/data/employees_headers/employees_headers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/employees_headers/employees_headers.csv -------------------------------------------------------------------------------- /src/main/resources/data/flights/flights.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/flights/flights.json -------------------------------------------------------------------------------- /src/main/resources/data/guitarPlayers/guitarPlayers.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/guitarPlayers/guitarPlayers.json -------------------------------------------------------------------------------- /src/main/resources/data/guitars/guitars.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/guitars/guitars.json -------------------------------------------------------------------------------- /src/main/resources/data/lipsum/words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/lipsum/words.txt -------------------------------------------------------------------------------- /src/main/resources/data/movies/movies.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/resources/data/movies/movies.json -------------------------------------------------------------------------------- /src/main/scala/common/DataGenerator.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/common/DataGenerator.scala -------------------------------------------------------------------------------- /src/main/scala/common/GuitarsDomain.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/common/GuitarsDomain.scala -------------------------------------------------------------------------------- /src/main/scala/part1recap/ScalaRecap.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part1recap/ScalaRecap.scala -------------------------------------------------------------------------------- /src/main/scala/part1recap/SparkRecap.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part1recap/SparkRecap.scala -------------------------------------------------------------------------------- /src/main/scala/part2foundations/CatalystDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part2foundations/CatalystDemo.scala -------------------------------------------------------------------------------- /src/main/scala/part2foundations/ReadingDAGs.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part2foundations/ReadingDAGs.scala -------------------------------------------------------------------------------- /src/main/scala/part2foundations/ReadingQueryPlans.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part2foundations/ReadingQueryPlans.scala -------------------------------------------------------------------------------- /src/main/scala/part2foundations/SparkAPIs.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part2foundations/SparkAPIs.scala -------------------------------------------------------------------------------- /src/main/scala/part2foundations/SparkJobAnatomy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part2foundations/SparkJobAnatomy.scala -------------------------------------------------------------------------------- /src/main/scala/part2foundations/TestDeployApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part2foundations/TestDeployApp.scala -------------------------------------------------------------------------------- /src/main/scala/part2foundations/TungstenDemo.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part2foundations/TungstenDemo.scala -------------------------------------------------------------------------------- /src/main/scala/part3caching/Caching.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part3caching/Caching.scala -------------------------------------------------------------------------------- /src/main/scala/part3caching/Checkpointing.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part3caching/Checkpointing.scala -------------------------------------------------------------------------------- /src/main/scala/part4partitioning/Partitioners.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part4partitioning/Partitioners.scala -------------------------------------------------------------------------------- /src/main/scala/part4partitioning/PartitioningProblems.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part4partitioning/PartitioningProblems.scala -------------------------------------------------------------------------------- /src/main/scala/part4partitioning/RepartitionCoalesce.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part4partitioning/RepartitionCoalesce.scala -------------------------------------------------------------------------------- /src/main/scala/part5boost/FixingDataSkews.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part5boost/FixingDataSkews.scala -------------------------------------------------------------------------------- /src/main/scala/part5boost/KryoSerializer.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part5boost/KryoSerializer.scala -------------------------------------------------------------------------------- /src/main/scala/part5boost/SerializationProblems.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/part5boost/SerializationProblems.scala -------------------------------------------------------------------------------- /src/main/scala/playground/Playground.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rockthejvm/spark-performance-tuning/HEAD/src/main/scala/playground/Playground.scala --------------------------------------------------------------------------------