├── .gitignore ├── HadoopWindowsUserSetup.md ├── README.md ├── build.sbt ├── docker-clean.sh ├── project └── build.properties ├── spark-cluster ├── README.md ├── build-images.sh ├── docker-compose.yml ├── docker │ ├── base │ │ └── Dockerfile │ ├── spark-master │ │ ├── Dockerfile │ │ └── start-master.sh │ ├── spark-submit │ │ ├── Dockerfile │ │ └── spark-submit.sh │ └── spark-worker │ │ ├── Dockerfile │ │ └── start-worker.sh └── env │ └── spark-worker.sh └── src ├── META-INF └── MANIFEST.MF └── main ├── resources └── data │ ├── bands │ └── bands.json │ ├── cars │ └── cars.json │ ├── employees │ └── employees.csv │ ├── employees_headers │ └── employees_headers.csv │ ├── flights │ └── flights.json │ ├── guitarPlayers │ └── guitarPlayers.json │ ├── guitars │ └── guitars.json │ ├── lipsum │ └── words.txt │ └── movies │ └── movies.json └── scala ├── common ├── DataGenerator.scala └── GuitarsDomain.scala ├── part1recap ├── ScalaRecap.scala └── SparkRecap.scala ├── part2foundations ├── CatalystDemo.scala ├── ReadingDAGs.scala ├── ReadingQueryPlans.scala ├── SparkAPIs.scala ├── SparkJobAnatomy.scala ├── TestDeployApp.scala └── TungstenDemo.scala ├── part3caching ├── Caching.scala └── Checkpointing.scala ├── part4partitioning ├── Partitioners.scala ├── PartitioningProblems.scala └── RepartitionCoalesce.scala ├── part5boost ├── FixingDataSkews.scala ├── KryoSerializer.scala └── SerializationProblems.scala └── playground └── Playground.scala /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/sbt,java,scala,spark,intellij 3 | # Edit at https://www.gitignore.io/?templates=sbt,java,scala,spark,intellij 4 | 5 | ### Intellij ### 6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 8 | 9 | # User-specific stuff 10 | .idea/**/workspace.xml 11 | .idea/**/tasks.xml 12 | .idea/**/usage.statistics.xml 13 | .idea/**/dictionaries 14 | .idea/**/shelf 15 | 16 | # Generated files 17 | .idea/**/contentModel.xml 18 | 19 | # Sensitive or high-churn files 20 | .idea/**/dataSources/ 21 | .idea/**/dataSources.ids 22 | .idea/**/dataSources.local.xml 23 | .idea/**/sqlDataSources.xml 24 | .idea/**/dynamic.xml 25 | .idea/**/uiDesigner.xml 26 | .idea/**/dbnavigator.xml 27 | 28 | # Gradle 29 | .idea/**/gradle.xml 30 | .idea/**/libraries 31 | 32 | # Gradle and Maven with auto-import 33 | # When using Gradle or Maven with auto-import, you should exclude module files, 34 | # since they will be recreated, and may cause churn. Uncomment if using 35 | # auto-import. 36 | # .idea/modules.xml 37 | # .idea/*.iml 38 | # .idea/modules 39 | # *.iml 40 | # *.ipr 41 | 42 | # CMake 43 | cmake-build-*/ 44 | 45 | # Mongo Explorer plugin 46 | .idea/**/mongoSettings.xml 47 | 48 | # File-based project format 49 | *.iws 50 | 51 | # IntelliJ 52 | out/ 53 | 54 | # mpeltonen/sbt-idea plugin 55 | .idea_modules/ 56 | 57 | # JIRA plugin 58 | atlassian-ide-plugin.xml 59 | 60 | # Cursive Clojure plugin 61 | .idea/replstate.xml 62 | 63 | # Crashlytics plugin (for Android Studio and IntelliJ) 64 | com_crashlytics_export_strings.xml 65 | crashlytics.properties 66 | crashlytics-build.properties 67 | fabric.properties 68 | 69 | # Editor-based Rest Client 70 | .idea/httpRequests 71 | 72 | # Android studio 3.1+ serialized cache file 73 | .idea/caches/build_file_checksums.ser 74 | 75 | ### Intellij Patch ### 76 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 77 | 78 | # *.iml 79 | # modules.xml 80 | # .idea/misc.xml 81 | # *.ipr 82 | 83 | # Sonarlint plugin 84 | .idea/**/sonarlint/ 85 | 86 | # SonarQube Plugin 87 | .idea/**/sonarIssues.xml 88 | 89 | # Markdown Navigator plugin 90 | .idea/**/markdown-navigator.xml 91 | .idea/**/markdown-navigator/ 92 | 93 | ### Java ### 94 | # Compiled class file 95 | *.class 96 | 97 | # Log file 98 | *.log 99 | 100 | # BlueJ files 101 | *.ctxt 102 | 103 | # Mobile Tools for Java (J2ME) 104 | .mtj.tmp/ 105 | 106 | # Package Files # 107 | *.jar 108 | *.war 109 | *.nar 110 | *.ear 111 | *.zip 112 | *.tar.gz 113 | *.rar 114 | 115 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 116 | hs_err_pid* 117 | 118 | ### SBT ### 119 | # Simple Build Tool 120 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control 121 | 122 | dist/* 123 | target/ 124 | lib_managed/ 125 | src_managed/ 126 | project/boot/ 127 | project/plugins/project/ 128 | .history 129 | .cache 130 | .lib/ 131 | 132 | ### Scala ### 133 | *.metals 134 | 135 | ### Spark ### 136 | *#*# 137 | *.#* 138 | *.iml 139 | *.ipr 140 | *.pyc 141 | *.pyo 142 | *.swp 143 | *~ 144 | .DS_Store 145 | .classpath 146 | .ensime 147 | .ensime_cache/ 148 | .ensime_lucene 149 | .generated-mima* 150 | .idea/ 151 | .project 152 | .pydevproject 153 | .scala_dependencies 154 | .settings 155 | /lib/ 156 | R-unit-tests.log 157 | R/unit-tests.out 158 | R/cran-check.out 159 | R/pkg/vignettes/sparkr-vignettes.html 160 | R/pkg/tests/fulltests/Rplots.pdf 161 | build/*.jar 162 | build/apache-maven* 163 | build/scala* 164 | build/zinc* 165 | cache 166 | checkpoint 167 | conf/*.cmd 168 | conf/*.conf 169 | conf/*.properties 170 | conf/*.sh 171 | conf/*.xml 172 | conf/java-opts 173 | conf/slaves 174 | dependency-reduced-pom.xml 175 | derby.log 176 | dev/create-release/*final 177 | dev/create-release/*txt 178 | dev/pr-deps/ 179 | dist/ 180 | docs/_site 181 | docs/api 182 | sql/docs 183 | sql/site 184 | lint-r-report.log 185 | log/ 186 | logs/ 187 | project/build/target/ 188 | project/plugins/lib_managed/ 189 | project/plugins/project/build.properties 190 | project/plugins/src_managed/ 191 | project/plugins/target/ 192 | python/lib/pyspark.zip 193 | python/deps 194 | python/test_coverage/coverage_data 195 | python/test_coverage/htmlcov 196 | python/pyspark/python 197 | reports/ 198 | scalastyle-on-compile.generated.xml 199 | scalastyle-output.xml 200 | scalastyle.txt 201 | spark-*-bin-*.tgz 202 | spark-tests.log 203 | streaming-tests.log 204 | unit-tests.log 205 | work/ 206 | docs/.jekyll-metadata 207 | 208 | # For Hive 209 | TempStatsStore/ 210 | metastore/ 211 | metastore_db/ 212 | sql/hive-thriftserver/test_warehouses 213 | warehouse/ 214 | spark-warehouse/ 215 | 216 | # For R session data 217 | .RData 218 | .RHistory 219 | .Rhistory 220 | *.Rproj 221 | *.Rproj.* 222 | 223 | .Rproj.user 224 | 225 | # For SBT 226 | .jvmopts 227 | 228 | 229 | # End of https://www.gitignore.io/api/sbt,java,scala,spark,intellij 230 | 231 | # Daniel added 232 | src/main/resources/generated/ 233 | .bsp -------------------------------------------------------------------------------- /HadoopWindowsUserSetup.md: -------------------------------------------------------------------------------- 1 | *Apache Spark doesn't have its system to organize files in a distributed way (the file system) therefore, it requires 2 | any file systems to store and process large datasets. For this reason, programmers install Spark 3 | on top of Hadoop so that Spark's advanced analytics applications can make use of the data stored using the Hadoop Distributed 4 | File System(HDFS).* 5 | 6 | ****Prerequisites:**** 7 | 8 | Before you start installing Hadoop on Windows, there are a few prerequisites that you need to have in place: 9 | 10 | 1. Java Development Kit (JDK) version 8 or higher 11 | 2. Apache Hadoop distribution suitable for Windows 12 | 13 | **Step 1:** *Install the Java Development Kit* 14 | `Hadoop is built using Java, so you’ll need to install the Java Development Kit (JDK) version 8 15 | or higher on your computer. You can download the JDK from the Oracle website. 16 | (https://www.oracle.com/in/java/technologies/javase/javase8-archive-downloads.html) Once the download is 17 | complete, run the installer and follow the instructions to install the JDK.` 18 | 19 | **Step 2:** *Download the Hadoop distribution* 20 | `To install Hadoop on Windows, you’ll need to download the appropriate distribution from the 21 | Apache Hadoop website (https://hadoop.apache.org/releases.html). 22 | You’ll want to choose the distribution that is compatible with your version of Windows (hadoop-3.3.6) and click on binary. 23 | Once you’ve downloaded the distribution, extract files of hadoop-3.3.6.tar.gz and place under “C:\Hadoop”.` 24 | 25 | **Step 3:** *Set up the Environment Variables* 26 | 27 | To use Java & Hadoop, you’ll need to set up some environment variables. 28 | This will allow you to run Java & Hadoop commands from any directory on your computer. 29 | To set up the environment variables, follow these steps: 30 | 31 | 1. Open the Start menu and search for “Environment Variables”. 32 | 2. Click on “Edit the system environment variables”. 33 | 3. Click on the “Environment Variables” button. 34 | 4. Under “System Variables”, click on “New”. 35 | 5. Enter “JAVA_HOME” as the variable name & the path to the directory where your java is installed (example- C:\Program Files\Java\jdk1.8.0) as the variable value. 36 | 6. Click “OK”. 37 | 7. Enter “HADOOP_HOME” as the variable name and the path to the directory where you extracted the Hadoop distribution (example- C:\hadoop) as the variable value. 38 | 8. Click “OK”. 39 | 9. Locate the “Path” variable in the “System Variables” list and click “Edit”. 40 | 10. Add the following to the end of the “Variable value” field: %JAVA_HOME%\bin; %HADOOP_HOME%\bin; %HADOOP_HOME%\sbin; 41 | 11. Click “OK” to close all the windows. 42 | 43 | **Step 4:** *Install Hadoop native IO binary* 44 | `Clone or download the winutils repository (https://github.com/cdarlint/winutils/tree/master/hadoop-3.3.5/bin) 45 | and copy the contents of hadoop-3.3.5/bin into the extracted location of the Hadoop binary package. 46 | In our example, it will be C:\hadoop\bin.` 47 | 48 | 49 | **Important Note:** Below steps are not necessary for Spark to run, above steps are sufficient to work with Spark. 50 | However, you can proceed if you really want the entire Hadoop distribution to work with. 51 | 52 | 53 | **Step 5:** *Hadoop Configuration* 54 | To configure Hadoop, you’ll need to modify a few configuration files. 55 | These files are located in the “etc/hadoop” directory of the Hadoop folder. 56 | Open each of the following files in a text editor and make the changes described below and save the files: 57 | 58 | 1. `core-site.xml`: Add the following lines to the file inside `` like this: 59 | ``` 60 | 61 | 62 | fs.defaultFS 63 | hdfs://localhost:9000 64 | 65 | 66 | ``` 67 | 68 | 2. Open this file `hadoop-env.cmd` (windows command script) and replace `set JAVA_HOME=%JAVA_HOME%` 69 | with java installed location like this `set JAVA_HOME=C:\Program Files\Java\jdk1.8.0` 70 | or if it doesn't work then use this `set JAVA_HOME=C:\Progra~1\Java\jdk1.8.0` also go to bottom of the file 71 | and give your name to this variable like this : `set HADOOP_IDENT_STRING=RockTheJVM`. 72 | 73 | 74 | 3. `hdfs-site.xml`: First create these folders - `C:/hadoop/data/dfs/datanode` and `C:/hadoop/data/dfs/datanode` 75 | Add the following lines to the file inside `` like this: 76 | ``` 77 | 78 | dfs.replication 79 | 1 80 | 81 | 82 | dfs.namenode.name.dir 83 | file:///C:/hadoop/data/dfs/namenode 84 | 85 | 86 | dfs.datanode.data.dir 87 | file:///C:/hadoop/data/dfs/datanode 88 | 89 | ``` 90 | 91 | 4. `mapred-site.xml`: Add the following lines to the file inside `` like this: 92 | ``` 93 | 94 | 95 | mapreduce.framework.name 96 | yarn 97 | 98 | 99 | ``` 100 | 101 | 5. `yarn-site.xml`: Add the following lines to the file inside `` like this: 102 | ``` 103 | 104 | 105 | yarn.nodemanager.aux-services 106 | mapreduce_shuffle 107 | Yarn Node Manager Aux Service 108 | 109 | 110 | ``` 111 | 112 | **Step 6:** **If you want to start Hadoop:** 113 | To start Hadoop, open a command prompt and navigate to the directory where you extracted the Hadoop distribution. 114 | Then, run the following commands: 115 | ``` 116 | cd sbin 117 | start-all.cmd 118 | ``` 119 | This will start the Hadoop daemons and launch the web interface. You can access the web interface by going to http://localhost:9000/ in your web browser. 120 | 121 | ****Conclusion:**** 122 | Setting up Hadoop on a Windows system can pose some challenges, but by following this comprehensive guide, 123 | you'll be able to configure it smoothly and quickly. Hadoop is a robust solution for handling extensive datasets and executing distributed applications, 124 | making it a favored choice for numerous enterprises and institutions worldwide. 125 | Whether you're a data scientist or a software developer, integrating Hadoop into your toolkit is highly beneficial. 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The official repository for the Rock the JVM Spark Performance Tuning course 2 | 3 | Powered by [Rock the JVM!](rockthejvm.com) 4 | 5 | This repository contains the code we wrote during [Rock the JVM's Spark Performance Tuning](https://rockthejvm.com/course/spark-performance-tuning) course. Unless explicitly mentioned, the code in this repository is exactly what was caught on camera. 6 | 7 | ### Install and setup 8 | 9 | - install [IntelliJ IDEA](https://jetbrains.com/idea) 10 | - install [Docker Desktop](https://docker.com) 11 | - either clone the repo or download as zip 12 | - open with IntelliJ as an SBT project 13 | 14 | As you open the project, the IDE will take care to download and apply the appropriate library dependencies. 15 | 16 | To set up the dockerized Spark cluster we will be using in the course, do the following: 17 | 18 | - open a terminal and navigate to `spark-cluster` 19 | - run `build-images.sh` (if you don't have a bash terminal, just open the file and run each line one by one) 20 | - run `docker-compose up` 21 | 22 | To interact with the Spark cluster, the folders `data` and `apps` inside the `spark-cluster` folder are mounted onto the Docker containers under `/opt/spark-data` and `/opt/spark-apps` respectively. 23 | 24 | To run a Spark shell, first run `docker-compose up` inside the `spark-cluster` directory, then in another terminal, do 25 | 26 | ``` 27 | docker exec -it spark-cluster_spark-master_1 bash 28 | ``` 29 | 30 | and then 31 | 32 | ``` 33 | /spark/bin/spark-shell 34 | ``` 35 | 36 | ### How to use intermediate states of this repository 37 | 38 | Start by cloning this repository and checkout the `start` tag: 39 | 40 | ``` 41 | git checkout start 42 | ``` 43 | 44 | ### How to run an intermediate state 45 | 46 | The repository was built while recording the lectures. Prior to each lecture, I tagged each commit so you can easily go back to an earlier state of the repo! 47 | 48 | The tags are as follows: 49 | 50 | * `start` 51 | * `1.1-scala-recap` 52 | * `1.2-spark-recap` 53 | * `2.1-job-anatomy` 54 | * `2.2-query-plans` 55 | * `2.3-query-plans-exercises` 56 | * `2.4-spark-ui-dags` 57 | * `2.5-api-differences` 58 | * `2.6-deploy-config` 59 | * `2.7-catalyst` 60 | * `2.8-tungsten` 61 | * `3.2-caching` 62 | * `3.3-checkpointing` 63 | * `4.1-repartition-coalesce` 64 | * `4.2-partitioning-problems` 65 | * `4.3-partitioners` 66 | * `5.1-data-skews` 67 | * `5.2-serialization-problems` 68 | * `5.3-serialization-problems-2` 69 | * `5.4-kryo` 70 | 71 | When you watch a lecture, you can `git checkout` the appropriate tag and the repo will go back to the exact code I had when I started the lecture. 72 | 73 | ### For questions or suggestions 74 | 75 | If you have changes to suggest to this repo, either 76 | - submit a GitHub issue 77 | - tell me in the course Q/A forum 78 | - submit a pull request! 79 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | 2 | name := "spark-performance-tuning" 3 | 4 | version := "0.2" 5 | 6 | scalaVersion := "2.13.12" 7 | val sparkVersion = "3.5.0" 8 | val postgresVersion = "42.6.0" 9 | val log4jVersion = "2.20.0" 10 | 11 | resolvers ++= Seq( 12 | "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven", 13 | "Typesafe Simple Repository" at "https://repo.typesafe.com/typesafe/simple/maven-releases", 14 | "MavenRepository" at "https://mvnrepository.com" 15 | ) 16 | 17 | libraryDependencies ++= Seq( 18 | "org.apache.spark" %% "spark-core" % sparkVersion, 19 | "org.apache.spark" %% "spark-sql" % sparkVersion, 20 | // logging 21 | "org.apache.logging.log4j" % "log4j-api" % log4jVersion, 22 | "org.apache.logging.log4j" % "log4j-core" % log4jVersion, 23 | ) -------------------------------------------------------------------------------- /docker-clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker rm -f $(docker ps -aq) -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.9.6 -------------------------------------------------------------------------------- /spark-cluster/README.md: -------------------------------------------------------------------------------- 1 | # Spark Cluster with Docker & docker-compose 2 | 3 | # General 4 | 5 | A simple spark standalone cluster for your testing environment purposses. A *docker-compose up* away from you solution for your spark development environment. 6 | 7 | The Docker compose will create the following containers: 8 | 9 | container|Ip address 10 | ---|--- 11 | spark-master|10.5.0.2 12 | spark-worker-1|10.5.0.3 13 | spark-worker-2|10.5.0.4 14 | spark-worker-3|10.5.0.5 15 | 16 | # Installation 17 | 18 | The following steps will make you run your spark cluster's containers. 19 | 20 | ## Pre requisites 21 | 22 | * Docker installed 23 | 24 | * Docker compose installed 25 | 26 | * A spark Application Jar to play with(Optional) 27 | 28 | ## Build the images 29 | 30 | The first step to deploy the cluster will be the build of the custom images, these builds can be performed with the *build-images.sh* script. 31 | 32 | The executions is as simple as the following steps: 33 | 34 | ```sh 35 | chmod +x build-images.sh 36 | ./build-images.sh 37 | ``` 38 | 39 | This will create the following docker images: 40 | 41 | * spark-base:3.5.0: A base image based on java:alpine-jdk-8 wich ships scala, python3 and spark 2.3.1 42 | 43 | * spark-master:3.5.0: A image based on the previously created spark image, used to create a spark master containers. 44 | 45 | * spark-worker:3.5.0: A image based on the previously created spark image, used to create spark worker containers. 46 | 47 | * spark-submit:3.5.0: A image based on the previously created spark image, used to create spark submit containers(run, deliver driver and die gracefully). 48 | 49 | ## Run the docker-compose 50 | 51 | The final step to create your test cluster will be to run the compose file: 52 | 53 | ```sh 54 | docker-compose up --scale spark-worker=3 55 | ``` 56 | 57 | ## Validate your cluster 58 | 59 | Just validate your cluster accessing the spark UI on each worker & master URL. 60 | 61 | ### Spark Master 62 | 63 | http://10.5.0.2:8080/ 64 | 65 | ![alt text](docs/spark-master.png "Spark master UI") 66 | 67 | ### Spark Worker 1 68 | 69 | http://10.5.0.3:8081/ 70 | 71 | ![alt text](docs/spark-worker-1.png "Spark worker 1 UI") 72 | 73 | ### Spark Worker 2 74 | 75 | http://10.5.0.4:8081/ 76 | 77 | ![alt text](docs/spark-worker-2.png "Spark worker 2 UI") 78 | 79 | ### Spark Worker 3 80 | 81 | http://10.5.0.5:8081/ 82 | 83 | ![alt text](docs/spark-worker-3.png "Spark worker 3 UI") 84 | 85 | # Resource Allocation 86 | 87 | This cluster is shipped with three workers and one spark master, each of these has a particular set of resource allocation(basically RAM & cpu cores allocation). 88 | 89 | * The default CPU cores allocation for each spark worker is 1 core. 90 | 91 | * The default RAM for each spark-worker is 1024 MB. 92 | 93 | * The default RAM allocation for spark executors is 256mb. 94 | 95 | * The default RAM allocation for spark driver is 128mb 96 | 97 | * If you wish to modify this allocations just edit the env/spark-worker.sh file. 98 | 99 | # Binded Volumes 100 | 101 | To make app running easier I've shipped two volume mounts described in the following chart: 102 | 103 | Host Mount|Container Mount|Purposse 104 | ---|---|--- 105 | /mnt/spark-apps|/opt/spark-apps|Used to make available your app's jars on all workers & master 106 | /mnt/spark-data|/opt/spark-data| Used to make available your app's data on all workers & master 107 | 108 | This is basically a dummy DFS created from docker Volumes...(maybe not...) 109 | 110 | # Run a sample application 111 | 112 | Now let`s make a **wild spark submit** to validate the distributed nature of our new toy following these steps: 113 | 114 | ## Create a Scala spark app 115 | 116 | The first thing you need to do is to make a spark application. Our spark-submit image is designed to run scala code (soon will ship pyspark support guess I was just lazy to do so..). 117 | 118 | In my case I am using an app called [crimes-app](https://). You can make or use your own scala app, I 've just used this one because I had it at hand. 119 | 120 | 121 | ## Ship your jar & dependencies on the Workers and Master 122 | 123 | A necesary step to make a **spark-submit** is to copy your application bundle into all workers, also any configuration file or input file you need. 124 | 125 | Luckily for us we are using docker volumes so, you just have to copy your app and configs into /mnt/spark-apps, and your input files into /mnt/spark-files. 126 | 127 | ```bash 128 | #Copy spark application into all workers's app folder 129 | cp /home/workspace/crimes-app/build/libs/crimes-app.jar /mnt/spark-apps 130 | 131 | #Copy spark application configs into all workers's app folder 132 | cp -r /home/workspace/crimes-app/config /mnt/spark-apps 133 | 134 | # Copy the file to be processed to all workers's data folder 135 | cp /home/Crimes_-_2001_to_present.csv /mnt/spark-files 136 | ``` 137 | 138 | ## Check the successful copy of the data and app jar (Optional) 139 | 140 | This is not a necessary step, just if you are curious you can check if your app code and files are in place before running the spark-submit. 141 | 142 | ```sh 143 | # Worker 1 Validations 144 | docker exec -ti spark-worker-1 ls -l /opt/spark-apps 145 | 146 | docker exec -ti spark-worker-1 ls -l /opt/spark-data 147 | 148 | # Worker 2 Validations 149 | docker exec -ti spark-worker-2 ls -l /opt/spark-apps 150 | 151 | docker exec -ti spark-worker-2 ls -l /opt/spark-data 152 | 153 | # Worker 3 Validations 154 | docker exec -ti spark-worker-3 ls -l /opt/spark-apps 155 | 156 | docker exec -ti spark-worker-3 ls -l /opt/spark-data 157 | ``` 158 | After running one of this commands you have to see your app's jar and files. 159 | 160 | 161 | ## Use docker spark-submit 162 | 163 | ```bash 164 | #Creating some variables to make the docker run command more readable 165 | #App jar environment used by the spark-submit image 166 | SPARK_APPLICATION_JAR_LOCATION="/opt/spark-apps/crimes-app.jar" 167 | #App main class environment used by the spark-submit image 168 | SPARK_APPLICATION_MAIN_CLASS="org.mvb.applications.CrimesApp" 169 | #Extra submit args used by the spark-submit image 170 | SPARK_SUBMIT_ARGS="--conf spark.executor.extraJavaOptions='-Dconfig-path=/opt/spark-apps/dev/config.conf'" 171 | 172 | #We have to use the same network as the spark cluster(internally the image resolves spark master as spark://spark-master:7077) 173 | docker run --network docker-spark-cluster_spark-network \ 174 | -v /mnt/spark-apps:/opt/spark-apps \ 175 | --env SPARK_APPLICATION_JAR_LOCATION=$SPARK_APPLICATION_JAR_LOCATION \ 176 | --env SPARK_APPLICATION_MAIN_CLASS=$SPARK_APPLICATION_MAIN_CLASS \ 177 | spark-submit:3.5.0 178 | 179 | ``` 180 | 181 | After running this you will see an output pretty much like this: 182 | 183 | ```bash 184 | Running Spark using the REST application submission protocol. 185 | 2018-09-23 15:17:52 INFO RestSubmissionClient:54 - Submitting a request to launch an application in spark://spark-master:6066. 186 | 2018-09-23 15:17:53 INFO RestSubmissionClient:54 - Submission successfully created as driver-20180923151753-0000. Polling submission state... 187 | 2018-09-23 15:17:53 INFO RestSubmissionClient:54 - Submitting a request for the status of submission driver-20180923151753-0000 in spark://spark-master:6066. 188 | 2018-09-23 15:17:53 INFO RestSubmissionClient:54 - State of driver driver-20180923151753-0000 is now RUNNING. 189 | 2018-09-23 15:17:53 INFO RestSubmissionClient:54 - Driver is running on worker worker-20180923151711-10.5.0.4-45381 at 10.5.0.4:45381. 190 | 2018-09-23 15:17:53 INFO RestSubmissionClient:54 - Server responded with CreateSubmissionResponse: 191 | { 192 | "action" : "CreateSubmissionResponse", 193 | "message" : "Driver successfully submitted as driver-20180923151753-0000", 194 | "serverSparkVersion" : "3.5.0", 195 | "submissionId" : "driver-20180923151753-0000", 196 | "success" : true 197 | } 198 | ``` 199 | 200 | # Summary (What have I done :O?) 201 | 202 | * We compiled the necessary docker images to run spark master and worker containers. 203 | 204 | * We created a spark standalone cluster using 3 worker nodes and 1 master node using docker && docker-compose. 205 | 206 | * Copied the resources necessary to run a sample application. 207 | 208 | * Submitted an application to the cluster using a **spark-submit** docker image. 209 | 210 | * We ran a distributed application at home(just need enough cpu cores and RAM to do so). 211 | 212 | # Why a standalone cluster? 213 | 214 | * This is intended to be used for test purposes, basically a way of running distributed spark apps on your laptop or desktop. 215 | 216 | * Right now I don't have enough resources to make a Yarn, Mesos or Kubernetes based cluster :(. 217 | 218 | * This will be useful to use CI/CD pipelines for your spark apps(A really difficult and hot topic) 219 | -------------------------------------------------------------------------------- /spark-cluster/build-images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | docker build -t spark-base:latest ./docker/base 6 | docker build -t spark-master:latest ./docker/spark-master 7 | docker build -t spark-worker:latest ./docker/spark-worker 8 | docker build -t spark-submit:latest ./docker/spark-submit -------------------------------------------------------------------------------- /spark-cluster/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | spark-master: 4 | image: spark-master:latest 5 | ports: 6 | - "4040:4040" 7 | - "9090:8080" 8 | - "7077:7077" 9 | volumes: 10 | - ./apps:/opt/spark-apps 11 | - ./data:/opt/spark-data 12 | environment: 13 | - "SPARK_LOCAL_IP=spark-master" 14 | spark-worker: 15 | image: spark-worker:latest 16 | depends_on: 17 | - spark-master 18 | environment: 19 | - SPARK_MASTER=spark://spark-master:7077 20 | - SPARK_WORKER_CORES=1 21 | - SPARK_WORKER_MEMORY=2G 22 | - SPARK_DRIVER_MEMORY=256m 23 | - SPARK_EXECUTOR_MEMORY=1G 24 | volumes: 25 | - ./apps:/opt/spark-apps 26 | - ./data:/opt/spark-data 27 | -------------------------------------------------------------------------------- /spark-cluster/docker/base/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM eclipse-temurin:17-jdk 2 | LABEL author="Daniel Ciocirlan" email="daniel@rockthejvm.com" 3 | LABEL version="0.3" 4 | 5 | ENV DAEMON_RUN=true 6 | ENV SPARK_VERSION=3.5.0 7 | ENV HADOOP_VERSION=3 8 | ENV SCALA_VERSION_BASE=2.13 9 | ENV SCALA_VERSION=2.13.12 10 | ENV SCALA_HOME=/usr/share/scala 11 | ENV SPARK_HOME=/spark 12 | 13 | 14 | RUN apt-get update && apt-get install -y curl vim wget software-properties-common ssh net-tools ca-certificates jq dbus-x11 15 | RUN echo exit 0 > /usr/sbin/policy-rc.d 16 | 17 | RUN cd "/tmp" && \ 18 | wget --no-verbose "https://downloads.typesafe.com/scala/${SCALA_VERSION}/scala-${SCALA_VERSION}.tgz" && \ 19 | tar xzf "scala-${SCALA_VERSION}.tgz" && \ 20 | mkdir "${SCALA_HOME}" && \ 21 | rm "/tmp/scala-${SCALA_VERSION}/bin/"*.bat && \ 22 | mv "/tmp/scala-${SCALA_VERSION}/bin" "/tmp/scala-${SCALA_VERSION}/lib" "${SCALA_HOME}" && \ 23 | ln -s "${SCALA_HOME}/bin/"* "/usr/bin/" && \ 24 | rm -rf "/tmp/"* 25 | 26 | # Add Dependencies for PySpark 27 | RUN apt-get install -y python3 python3-pip python3-numpy python3-matplotlib python3-scipy python3-pandas python3-simpy 28 | RUN update-alternatives --install "/usr/bin/python" "python" "$(which python3)" 1 29 | 30 | 31 | #Scala instalation 32 | RUN export PATH="/usr/local/sbt/bin:$PATH" && apt update && apt install ca-certificates wget tar && mkdir -p "/usr/local/sbt" && wget -qO - --no-check-certificate "https://github.com/sbt/sbt/releases/download/v1.9.6/sbt-1.9.6.tgz" | tar xz -C /usr/local/sbt --strip-components=1 && sbt sbtVersion -Dsbt.rootdir=true 33 | 34 | RUN wget --no-verbose https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${SCALA_VERSION_BASE}.tgz && tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${SCALA_VERSION_BASE}.tgz \ 35 | && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${SCALA_VERSION_BASE} spark \ 36 | && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${SCALA_VERSION_BASE}.tgz 37 | 38 | 39 | 40 | 41 | # Fix the value of PYTHONHASHSEED 42 | # Note: this is needed when you use Python 3.3 or greater 43 | ENV PYTHONHASHSEED 1 44 | -------------------------------------------------------------------------------- /spark-cluster/docker/spark-master/Dockerfile: -------------------------------------------------------------------------------- 1 | # This assumes spark-base was built first. 2 | # Usually we'd run the build-images.sh script which builds spark-base 3 | 4 | FROM spark-base:latest 5 | 6 | COPY start-master.sh / 7 | 8 | ENV SPARK_MASTER_PORT 7077 9 | ENV SPARK_MASTER_WEBUI_PORT 8080 10 | ENV SPARK_MASTER_LOG /spark/logs 11 | 12 | EXPOSE 8080 7077 6066 13 | 14 | CMD ["/bin/bash", "/start-master.sh"] 15 | -------------------------------------------------------------------------------- /spark-cluster/docker/spark-master/start-master.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export SPARK_MASTER_HOST=`hostname` 4 | 5 | . "/spark/sbin/spark-config.sh" 6 | 7 | . "/spark/bin/load-spark-env.sh" 8 | 9 | mkdir -p $SPARK_MASTER_LOG 10 | 11 | export SPARK_HOME=/spark 12 | 13 | ln -sf /dev/stdout $SPARK_MASTER_LOG/spark-master.out 14 | 15 | cd /spark/bin && /spark/sbin/../bin/spark-class org.apache.spark.deploy.master.Master --ip $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT >> $SPARK_MASTER_LOG/spark-master.out 16 | -------------------------------------------------------------------------------- /spark-cluster/docker/spark-submit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM spark-base:latest 2 | 3 | COPY spark-submit.sh / 4 | 5 | ENV SPARK_MASTER_URL="spark://spark-master:7077" 6 | ENV SPARK_SUBMIT_ARGS="" 7 | ENV SPARK_APPLICATION_ARGS "" 8 | #ENV SPARK_APPLICATION_JAR_LOCATION /opt/spark-apps/myjar.jar 9 | #ENV SPARK_APPLICATION_MAIN_CLASS my.main.Application 10 | 11 | 12 | CMD ["/bin/bash", "/spark-submit.sh"] 13 | -------------------------------------------------------------------------------- /spark-cluster/docker/spark-submit/spark-submit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /spark/bin/spark-submit \ 4 | --class ${SPARK_APPLICATION_MAIN_CLASS} \ 5 | --master ${SPARK_MASTER_URL} \ 6 | --deploy-mode cluster \ 7 | --total-executor-cores 1 \ 8 | ${SPARK_SUBMIT_ARGS} \ 9 | ${SPARK_APPLICATION_JAR_LOCATION} \ 10 | ${SPARK_APPLICATION_ARGS} \ -------------------------------------------------------------------------------- /spark-cluster/docker/spark-worker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM spark-base:latest 2 | 3 | COPY start-worker.sh / 4 | 5 | ENV SPARK_WORKER_WEBUI_PORT 8081 6 | ENV SPARK_WORKER_LOG /spark/logs 7 | ENV SPARK_MASTER "spark://spark-master:7077" 8 | 9 | EXPOSE 8081 10 | 11 | CMD ["/bin/bash", "/start-worker.sh"] 12 | -------------------------------------------------------------------------------- /spark-cluster/docker/spark-worker/start-worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . "/spark/sbin/spark-config.sh" 4 | . "/spark/bin/load-spark-env.sh" 5 | 6 | mkdir -p $SPARK_WORKER_LOG 7 | 8 | export SPARK_HOME=/spark 9 | 10 | ln -sf /dev/stdout $SPARK_WORKER_LOG/spark-worker.out 11 | 12 | /spark/sbin/../bin/spark-class org.apache.spark.deploy.worker.Worker --webui-port $SPARK_WORKER_WEBUI_PORT $SPARK_MASTER >> $SPARK_WORKER_LOG/spark-worker.out -------------------------------------------------------------------------------- /spark-cluster/env/spark-worker.sh: -------------------------------------------------------------------------------- 1 | #Environment variables used by the spark workers 2 | #Do not touch this unless you modify the compose master 3 | SPARK_MASTER=spark://spark-master:7077 4 | #Allocation Parameters 5 | SPARK_WORKER_CORES=1 6 | SPARK_WORKER_MEMORY=1G 7 | SPARK_DRIVER_MEMORY=128m 8 | SPARK_EXECUTOR_MEMORY=256m -------------------------------------------------------------------------------- /src/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Class-Path: commons-compiler-3.0.15.jar hadoop-mapreduce-client-common 3 | -2.7.4.jar hadoop-yarn-server-nodemanager-2.7.4.jar hadoop-yarn-api-2 4 | .7.4.jar avro-1.8.2.jar avro-mapred-1.8.2-hadoop2.jar hadoop-mapreduc 5 | e-client-jobclient-2.7.4.jar jackson-mapper-asl-1.9.13.jar scala-xml_ 6 | 2.12-1.2.0.jar commons-compress-1.8.1.jar javassist-3.22.0-CR2.jar ha 7 | doop-yarn-common-2.7.4.jar commons-httpclient-3.1.jar spark-catalyst_ 8 | 2.12-3.0.0-preview2.jar jersey-common-2.29.1.jar jackson-core-2.10.0. 9 | jar spark-tags_2.12-3.0.0-preview2.jar parquet-column-1.10.1.jar json 10 | 4s-scalap_2.12-3.6.6.jar javax.servlet-api-3.1.0.jar jsr305-3.0.2.jar 11 | jackson-module-scala_2.12-2.10.0.jar metrics-graphite-4.1.1.jar metr 12 | ics-jmx-4.1.1.jar leveldbjni-all-1.8.jar guice-3.0.jar curator-recipe 13 | s-2.7.1.jar avro-ipc-1.8.2.jar hadoop-mapreduce-client-core-2.7.4.jar 14 | jersey-hk2-2.29.1.jar spark-core_2.12-3.0.0-preview2.jar RoaringBitm 15 | ap-0.7.45.jar hadoop-yarn-server-common-2.7.4.jar metrics-json-4.1.1. 16 | jar jackson-core-asl-1.9.13.jar hadoop-annotations-2.7.4.jar pyrolite 17 | -4.30.jar orc-shims-1.5.8.jar jakarta.inject-2.6.1.jar jetty-util-6.1 18 | .26.jar httpcore-4.2.4.jar hk2-locator-2.6.1.jar xz-1.5.jar commons-m 19 | ath3-3.4.1.jar commons-cli-1.2.jar gson-2.2.4.jar jsp-api-2.1.jar act 20 | ivation-1.1.1.jar curator-framework-2.7.1.jar parquet-hadoop-1.10.1.j 21 | ar hadoop-common-2.7.4.jar slf4j-api-1.7.28.jar jersey-container-serv 22 | let-2.29.1.jar jetty-sslengine-6.1.26.jar commons-crypto-1.0.0.jar ao 23 | palliance-repackaged-2.6.1.jar jakarta.ws.rs-api-2.1.6.jar jcl-over-s 24 | lf4j-1.7.16.jar jackson-databind-2.10.0.jar osgi-resource-locator-1.0 25 | .3.jar arrow-memory-0.15.1.jar aopalliance-1.0.jar orc-mapreduce-1.5. 26 | 8.jar kryo-shaded-4.0.2.jar commons-io-2.4.jar stax-api-1.0-2.jar par 27 | quet-jackson-1.10.1.jar log4j-1.2.17.jar jersey-client-2.29.1.jar sna 28 | ppy-java-1.1.7.3.jar parquet-format-2.4.0.jar flatbuffers-java-1.9.0. 29 | jar metrics-core-4.1.1.jar slf4j-log4j12-1.7.25.jar xercesImpl-2.9.1. 30 | jar chill-java-0.9.3.jar jakarta.validation-api-2.0.2.jar jakarta.ann 31 | otation-api-1.3.5.jar jersey-server-2.29.1.jar jersey-container-servl 32 | et-core-2.29.1.jar zstd-jni-1.4.4-3.jar jackson-annotations-2.10.0.ja 33 | r objenesis-2.5.1.jar scala-parser-combinators_2.12-1.1.2.jar commons 34 | -beanutils-1.7.0.jar ivy-2.4.0.jar json4s-core_2.12-3.6.6.jar commons 35 | -net-3.1.jar oro-2.0.8.jar spark-launcher_2.12-3.0.0-preview2.jar ant 36 | lr4-runtime-4.7.1.jar hadoop-mapreduce-client-app-2.7.4.jar hadoop-cl 37 | ient-2.7.4.jar hk2-api-2.6.1.jar stream-2.9.6.jar commons-configurati 38 | on-1.6.jar zookeeper-3.4.14.jar orc-core-1.5.8.jar xbean-asm7-shaded- 39 | 4.15.jar log4j-api-2.4.1.jar api-asn1-api-1.0.0-M20.jar curator-clien 40 | t-2.7.1.jar protobuf-java-2.5.0.jar compress-lzf-1.0.3.jar jackson-ja 41 | xrs-1.9.13.jar arrow-format-0.15.1.jar scala-library-2.12.4.jar spark 42 | -unsafe_2.12-3.0.0-preview2.jar spark-sql_2.12-3.0.0-preview2.jar air 43 | compressor-0.10.jar jline-0.9.94.jar minlog-1.3.0.jar lz4-java-1.7.0. 44 | jar unused-1.0.0.jar chill_2.12-0.9.3.jar commons-text-1.6.jar py4j-0 45 | .10.8.1.jar parquet-encoding-1.10.1.jar jackson-xc-1.9.13.jar hadoop- 46 | mapreduce-client-shuffle-2.7.4.jar audience-annotations-0.5.0.jar jet 47 | tison-1.1.jar netty-all-4.1.42.Final.jar jaxb-api-2.2.2.jar jersey-me 48 | dia-jaxb-2.29.1.jar apacheds-kerberos-codec-2.0.0-M15.jar janino-3.0. 49 | 15.jar hadoop-yarn-client-2.7.4.jar arrow-vector-0.15.1.jar log4j-cor 50 | e-2.4.1.jar hive-storage-api-2.6.0.jar guava-16.0.1.jar spotbugs-anno 51 | tations-3.1.9.jar spark-sketch_2.12-3.0.0-preview2.jar xmlenc-0.52.ja 52 | r json4s-ast_2.12-3.6.6.jar scala-reflect-2.12.4.jar hk2-utils-2.6.1. 53 | jar spark-network-common_2.12-3.0.0-preview2.jar paranamer-2.8.jar ap 54 | acheds-i18n-2.0.0-M15.jar jul-to-slf4j-1.7.16.jar commons-lang3-3.9.j 55 | ar metrics-jvm-4.1.1.jar jackson-module-paranamer-2.10.0.jar hadoop-h 56 | dfs-2.7.4.jar spark-network-shuffle_2.12-3.0.0-preview2.jar xml-apis- 57 | 1.3.04.jar json4s-jackson_2.12-3.6.6.jar htrace-core-3.1.0-incubating 58 | .jar javax.inject-1.jar httpclient-4.2.5.jar hadoop-auth-2.7.4.jar co 59 | mmons-codec-1.10.jar commons-collections-3.2.2.jar shims-0.7.45.jar s 60 | park-kvstore_2.12-3.0.0-preview2.jar netty-3.10.6.Final.jar parquet-c 61 | ommon-1.10.1.jar univocity-parsers-2.8.3.jar api-util-1.0.0-M20.jar c 62 | ommons-lang-2.6.jar commons-digester-1.8.jar 63 | Main-Class: 64 | 65 | -------------------------------------------------------------------------------- /src/main/resources/data/bands/bands.json: -------------------------------------------------------------------------------- 1 | {"id":1,"name":"AC/DC","hometown":"Sydney","year":1973} 2 | {"id":0,"name":"Led Zeppelin","hometown":"London","year":1968} 3 | {"id":3,"name":"Metallica","hometown":"Los Angeles","year":1981} 4 | {"id":4,"name":"The Beatles","hometown":"Liverpool","year":1960} 5 | -------------------------------------------------------------------------------- /src/main/resources/data/cars/cars.json: -------------------------------------------------------------------------------- 1 | {"Name":"chevrolet chevelle malibu", "Miles_per_Gallon":18, "Cylinders":8, "Displacement":307, "Horsepower":130, "Weight_in_lbs":3504, "Acceleration":12, "Year":"1970-01-01", "Origin":"USA"} 2 | {"Name":"buick skylark 320", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":350, "Horsepower":165, "Weight_in_lbs":3693, "Acceleration":11.5, "Year":"1970-01-01", "Origin":"USA"} 3 | {"Name":"plymouth satellite", "Miles_per_Gallon":18, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":3436, "Acceleration":11, "Year":"1970-01-01", "Origin":"USA"} 4 | {"Name":"amc rebel sst", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":304, "Horsepower":150, "Weight_in_lbs":3433, "Acceleration":12, "Year":"1970-01-01", "Origin":"USA"} 5 | {"Name":"ford torino", "Miles_per_Gallon":17, "Cylinders":8, "Displacement":302, "Horsepower":140, "Weight_in_lbs":3449, "Acceleration":10.5, "Year":"1970-01-01", "Origin":"USA"} 6 | {"Name":"ford galaxie 500", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":429, "Horsepower":198, "Weight_in_lbs":4341, "Acceleration":10, "Year":"1970-01-01", "Origin":"USA"} 7 | {"Name":"chevrolet impala", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":454, "Horsepower":220, "Weight_in_lbs":4354, "Acceleration":9, "Year":"1970-01-01", "Origin":"USA"} 8 | {"Name":"plymouth fury iii", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":440, "Horsepower":215, "Weight_in_lbs":4312, "Acceleration":8.5, "Year":"1970-01-01", "Origin":"USA"} 9 | {"Name":"pontiac catalina", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":455, "Horsepower":225, "Weight_in_lbs":4425, "Acceleration":10, "Year":"1970-01-01", "Origin":"USA"} 10 | {"Name":"amc ambassador dpl", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":390, "Horsepower":190, "Weight_in_lbs":3850, "Acceleration":8.5, "Year":"1970-01-01", "Origin":"USA"} 11 | {"Name":"citroen ds-21 pallas", "Miles_per_Gallon":null, "Cylinders":4, "Displacement":133, "Horsepower":115, "Weight_in_lbs":3090, "Acceleration":17.5, "Year":"1970-01-01", "Origin":"Europe"} 12 | {"Name":"chevrolet chevelle concours (sw)", "Miles_per_Gallon":null, "Cylinders":8, "Displacement":350, "Horsepower":165, "Weight_in_lbs":4142, "Acceleration":11.5, "Year":"1970-01-01", "Origin":"USA"} 13 | {"Name":"ford torino (sw)", "Miles_per_Gallon":null, "Cylinders":8, "Displacement":351, "Horsepower":153, "Weight_in_lbs":4034, "Acceleration":11, "Year":"1970-01-01", "Origin":"USA"} 14 | {"Name":"plymouth satellite (sw)", "Miles_per_Gallon":null, "Cylinders":8, "Displacement":383, "Horsepower":175, "Weight_in_lbs":4166, "Acceleration":10.5, "Year":"1970-01-01", "Origin":"USA"} 15 | {"Name":"amc rebel sst (sw)", "Miles_per_Gallon":null, "Cylinders":8, "Displacement":360, "Horsepower":175, "Weight_in_lbs":3850, "Acceleration":11, "Year":"1970-01-01", "Origin":"USA"} 16 | {"Name":"dodge challenger se", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":383, "Horsepower":170, "Weight_in_lbs":3563, "Acceleration":10, "Year":"1970-01-01", "Origin":"USA"} 17 | {"Name":"plymouth 'cuda 340", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":340, "Horsepower":160, "Weight_in_lbs":3609, "Acceleration":8, "Year":"1970-01-01", "Origin":"USA"} 18 | {"Name":"ford mustang boss 302", "Miles_per_Gallon":null, "Cylinders":8, "Displacement":302, "Horsepower":140, "Weight_in_lbs":3353, "Acceleration":8, "Year":"1970-01-01", "Origin":"USA"} 19 | {"Name":"chevrolet monte carlo", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":400, "Horsepower":150, "Weight_in_lbs":3761, "Acceleration":9.5, "Year":"1970-01-01", "Origin":"USA"} 20 | {"Name":"buick estate wagon (sw)", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":455, "Horsepower":225, "Weight_in_lbs":3086, "Acceleration":10, "Year":"1970-01-01", "Origin":"USA"} 21 | {"Name":"toyota corona mark ii", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":113, "Horsepower":95, "Weight_in_lbs":2372, "Acceleration":15, "Year":"1970-01-01", "Origin":"Japan"} 22 | {"Name":"plymouth duster", "Miles_per_Gallon":22, "Cylinders":6, "Displacement":198, "Horsepower":95, "Weight_in_lbs":2833, "Acceleration":15.5, "Year":"1970-01-01", "Origin":"USA"} 23 | {"Name":"amc hornet", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":199, "Horsepower":97, "Weight_in_lbs":2774, "Acceleration":15.5, "Year":"1970-01-01", "Origin":"USA"} 24 | {"Name":"ford maverick", "Miles_per_Gallon":21, "Cylinders":6, "Displacement":200, "Horsepower":85, "Weight_in_lbs":2587, "Acceleration":16, "Year":"1970-01-01", "Origin":"USA"} 25 | {"Name":"datsun pl510", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":97, "Horsepower":88, "Weight_in_lbs":2130, "Acceleration":14.5, "Year":"1970-01-01", "Origin":"Japan"} 26 | {"Name":"volkswagen 1131 deluxe sedan", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":97, "Horsepower":46, "Weight_in_lbs":1835, "Acceleration":20.5, "Year":"1970-01-01", "Origin":"Europe"} 27 | {"Name":"peugeot 504", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":110, "Horsepower":87, "Weight_in_lbs":2672, "Acceleration":17.5, "Year":"1970-01-01", "Origin":"Europe"} 28 | {"Name":"audi 100 ls", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":107, "Horsepower":90, "Weight_in_lbs":2430, "Acceleration":14.5, "Year":"1970-01-01", "Origin":"Europe"} 29 | {"Name":"saab 99e", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":104, "Horsepower":95, "Weight_in_lbs":2375, "Acceleration":17.5, "Year":"1970-01-01", "Origin":"Europe"} 30 | {"Name":"bmw 2002", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":121, "Horsepower":113, "Weight_in_lbs":2234, "Acceleration":12.5, "Year":"1970-01-01", "Origin":"Europe"} 31 | {"Name":"amc gremlin", "Miles_per_Gallon":21, "Cylinders":6, "Displacement":199, "Horsepower":90, "Weight_in_lbs":2648, "Acceleration":15, "Year":"1970-01-01", "Origin":"USA"} 32 | {"Name":"ford f250", "Miles_per_Gallon":10, "Cylinders":8, "Displacement":360, "Horsepower":215, "Weight_in_lbs":4615, "Acceleration":14, "Year":"1970-01-01", "Origin":"USA"} 33 | {"Name":"chevy c20", "Miles_per_Gallon":10, "Cylinders":8, "Displacement":307, "Horsepower":200, "Weight_in_lbs":4376, "Acceleration":15, "Year":"1970-01-01", "Origin":"USA"} 34 | {"Name":"dodge d200", "Miles_per_Gallon":11, "Cylinders":8, "Displacement":318, "Horsepower":210, "Weight_in_lbs":4382, "Acceleration":13.5, "Year":"1970-01-01", "Origin":"USA"} 35 | {"Name":"hi 1200d", "Miles_per_Gallon":9, "Cylinders":8, "Displacement":304, "Horsepower":193, "Weight_in_lbs":4732, "Acceleration":18.5, "Year":"1970-01-01", "Origin":"USA"} 36 | {"Name":"datsun pl510", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":97, "Horsepower":88, "Weight_in_lbs":2130, "Acceleration":14.5, "Year":"1971-01-01", "Origin":"Japan"} 37 | {"Name":"chevrolet vega 2300", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":140, "Horsepower":90, "Weight_in_lbs":2264, "Acceleration":15.5, "Year":"1971-01-01", "Origin":"USA"} 38 | {"Name":"toyota corona", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":113, "Horsepower":95, "Weight_in_lbs":2228, "Acceleration":14, "Year":"1971-01-01", "Origin":"Japan"} 39 | {"Name":"ford pinto", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":98, "Horsepower":null, "Weight_in_lbs":2046, "Acceleration":19, "Year":"1971-01-01", "Origin":"USA"} 40 | {"Name":"volkswagen super beetle 117", "Miles_per_Gallon":null, "Cylinders":4, "Displacement":97, "Horsepower":48, "Weight_in_lbs":1978, "Acceleration":20, "Year":"1971-01-01", "Origin":"Europe"} 41 | {"Name":"amc gremlin", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":232, "Horsepower":100, "Weight_in_lbs":2634, "Acceleration":13, "Year":"1971-01-01", "Origin":"USA"} 42 | {"Name":"plymouth satellite custom", "Miles_per_Gallon":16, "Cylinders":6, "Displacement":225, "Horsepower":105, "Weight_in_lbs":3439, "Acceleration":15.5, "Year":"1971-01-01", "Origin":"USA"} 43 | {"Name":"chevrolet chevelle malibu", "Miles_per_Gallon":17, "Cylinders":6, "Displacement":250, "Horsepower":100, "Weight_in_lbs":3329, "Acceleration":15.5, "Year":"1971-01-01", "Origin":"USA"} 44 | {"Name":"ford torino 500", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":250, "Horsepower":88, "Weight_in_lbs":3302, "Acceleration":15.5, "Year":"1971-01-01", "Origin":"USA"} 45 | {"Name":"amc matador", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":232, "Horsepower":100, "Weight_in_lbs":3288, "Acceleration":15.5, "Year":"1971-01-01", "Origin":"USA"} 46 | {"Name":"chevrolet impala", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":350, "Horsepower":165, "Weight_in_lbs":4209, "Acceleration":12, "Year":"1971-01-01", "Origin":"USA"} 47 | {"Name":"pontiac catalina brougham", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":400, "Horsepower":175, "Weight_in_lbs":4464, "Acceleration":11.5, "Year":"1971-01-01", "Origin":"USA"} 48 | {"Name":"ford galaxie 500", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":351, "Horsepower":153, "Weight_in_lbs":4154, "Acceleration":13.5, "Year":"1971-01-01", "Origin":"USA"} 49 | {"Name":"plymouth fury iii", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":4096, "Acceleration":13, "Year":"1971-01-01", "Origin":"USA"} 50 | {"Name":"dodge monaco (sw)", "Miles_per_Gallon":12, "Cylinders":8, "Displacement":383, "Horsepower":180, "Weight_in_lbs":4955, "Acceleration":11.5, "Year":"1971-01-01", "Origin":"USA"} 51 | {"Name":"ford country squire (sw)", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":400, "Horsepower":170, "Weight_in_lbs":4746, "Acceleration":12, "Year":"1971-01-01", "Origin":"USA"} 52 | {"Name":"pontiac safari (sw)", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":400, "Horsepower":175, "Weight_in_lbs":5140, "Acceleration":12, "Year":"1971-01-01", "Origin":"USA"} 53 | {"Name":"amc hornet sportabout (sw)", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":258, "Horsepower":110, "Weight_in_lbs":2962, "Acceleration":13.5, "Year":"1971-01-01", "Origin":"USA"} 54 | {"Name":"chevrolet vega (sw)", "Miles_per_Gallon":22, "Cylinders":4, "Displacement":140, "Horsepower":72, "Weight_in_lbs":2408, "Acceleration":19, "Year":"1971-01-01", "Origin":"USA"} 55 | {"Name":"pontiac firebird", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":250, "Horsepower":100, "Weight_in_lbs":3282, "Acceleration":15, "Year":"1971-01-01", "Origin":"USA"} 56 | {"Name":"ford mustang", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":250, "Horsepower":88, "Weight_in_lbs":3139, "Acceleration":14.5, "Year":"1971-01-01", "Origin":"USA"} 57 | {"Name":"mercury capri 2000", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":122, "Horsepower":86, "Weight_in_lbs":2220, "Acceleration":14, "Year":"1971-01-01", "Origin":"USA"} 58 | {"Name":"opel 1900", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":116, "Horsepower":90, "Weight_in_lbs":2123, "Acceleration":14, "Year":"1971-01-01", "Origin":"Europe"} 59 | {"Name":"peugeot 304", "Miles_per_Gallon":30, "Cylinders":4, "Displacement":79, "Horsepower":70, "Weight_in_lbs":2074, "Acceleration":19.5, "Year":"1971-01-01", "Origin":"Europe"} 60 | {"Name":"fiat 124b", "Miles_per_Gallon":30, "Cylinders":4, "Displacement":88, "Horsepower":76, "Weight_in_lbs":2065, "Acceleration":14.5, "Year":"1971-01-01", "Origin":"Europe"} 61 | {"Name":"toyota corolla 1200", "Miles_per_Gallon":31, "Cylinders":4, "Displacement":71, "Horsepower":65, "Weight_in_lbs":1773, "Acceleration":19, "Year":"1971-01-01", "Origin":"Japan"} 62 | {"Name":"datsun 1200", "Miles_per_Gallon":35, "Cylinders":4, "Displacement":72, "Horsepower":69, "Weight_in_lbs":1613, "Acceleration":18, "Year":"1971-01-01", "Origin":"Japan"} 63 | {"Name":"volkswagen model 111", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":97, "Horsepower":60, "Weight_in_lbs":1834, "Acceleration":19, "Year":"1971-01-01", "Origin":"Europe"} 64 | {"Name":"plymouth cricket", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":91, "Horsepower":70, "Weight_in_lbs":1955, "Acceleration":20.5, "Year":"1971-01-01", "Origin":"USA"} 65 | {"Name":"toyota corona hardtop", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":113, "Horsepower":95, "Weight_in_lbs":2278, "Acceleration":15.5, "Year":"1972-01-01", "Origin":"Japan"} 66 | {"Name":"dodge colt hardtop", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":97.5, "Horsepower":80, "Weight_in_lbs":2126, "Acceleration":17, "Year":"1972-01-01", "Origin":"USA"} 67 | {"Name":"volkswagen type 3", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":97, "Horsepower":54, "Weight_in_lbs":2254, "Acceleration":23.5, "Year":"1972-01-01", "Origin":"Europe"} 68 | {"Name":"chevrolet vega", "Miles_per_Gallon":20, "Cylinders":4, "Displacement":140, "Horsepower":90, "Weight_in_lbs":2408, "Acceleration":19.5, "Year":"1972-01-01", "Origin":"USA"} 69 | {"Name":"ford pinto runabout", "Miles_per_Gallon":21, "Cylinders":4, "Displacement":122, "Horsepower":86, "Weight_in_lbs":2226, "Acceleration":16.5, "Year":"1972-01-01", "Origin":"USA"} 70 | {"Name":"chevrolet impala", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":350, "Horsepower":165, "Weight_in_lbs":4274, "Acceleration":12, "Year":"1972-01-01", "Origin":"USA"} 71 | {"Name":"pontiac catalina", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":400, "Horsepower":175, "Weight_in_lbs":4385, "Acceleration":12, "Year":"1972-01-01", "Origin":"USA"} 72 | {"Name":"plymouth fury iii", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":4135, "Acceleration":13.5, "Year":"1972-01-01", "Origin":"USA"} 73 | {"Name":"ford galaxie 500", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":351, "Horsepower":153, "Weight_in_lbs":4129, "Acceleration":13, "Year":"1972-01-01", "Origin":"USA"} 74 | {"Name":"amc ambassador sst", "Miles_per_Gallon":17, "Cylinders":8, "Displacement":304, "Horsepower":150, "Weight_in_lbs":3672, "Acceleration":11.5, "Year":"1972-01-01", "Origin":"USA"} 75 | {"Name":"mercury marquis", "Miles_per_Gallon":11, "Cylinders":8, "Displacement":429, "Horsepower":208, "Weight_in_lbs":4633, "Acceleration":11, "Year":"1972-01-01", "Origin":"USA"} 76 | {"Name":"buick lesabre custom", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":350, "Horsepower":155, "Weight_in_lbs":4502, "Acceleration":13.5, "Year":"1972-01-01", "Origin":"USA"} 77 | {"Name":"oldsmobile delta 88 royale", "Miles_per_Gallon":12, "Cylinders":8, "Displacement":350, "Horsepower":160, "Weight_in_lbs":4456, "Acceleration":13.5, "Year":"1972-01-01", "Origin":"USA"} 78 | {"Name":"chrysler newport royal", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":400, "Horsepower":190, "Weight_in_lbs":4422, "Acceleration":12.5, "Year":"1972-01-01", "Origin":"USA"} 79 | {"Name":"mazda rx2 coupe", "Miles_per_Gallon":19, "Cylinders":3, "Displacement":70, "Horsepower":97, "Weight_in_lbs":2330, "Acceleration":13.5, "Year":"1972-01-01", "Origin":"Japan"} 80 | {"Name":"amc matador (sw)", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":304, "Horsepower":150, "Weight_in_lbs":3892, "Acceleration":12.5, "Year":"1972-01-01", "Origin":"USA"} 81 | {"Name":"chevrolet chevelle concours (sw)", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":307, "Horsepower":130, "Weight_in_lbs":4098, "Acceleration":14, "Year":"1972-01-01", "Origin":"USA"} 82 | {"Name":"ford gran torino (sw)", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":302, "Horsepower":140, "Weight_in_lbs":4294, "Acceleration":16, "Year":"1972-01-01", "Origin":"USA"} 83 | {"Name":"plymouth satellite custom (sw)", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":4077, "Acceleration":14, "Year":"1972-01-01", "Origin":"USA"} 84 | {"Name":"volvo 145e (sw)", "Miles_per_Gallon":18, "Cylinders":4, "Displacement":121, "Horsepower":112, "Weight_in_lbs":2933, "Acceleration":14.5, "Year":"1972-01-01", "Origin":"Europe"} 85 | {"Name":"volkswagen 411 (sw)", "Miles_per_Gallon":22, "Cylinders":4, "Displacement":121, "Horsepower":76, "Weight_in_lbs":2511, "Acceleration":18, "Year":"1972-01-01", "Origin":"Europe"} 86 | {"Name":"peugeot 504 (sw)", "Miles_per_Gallon":21, "Cylinders":4, "Displacement":120, "Horsepower":87, "Weight_in_lbs":2979, "Acceleration":19.5, "Year":"1972-01-01", "Origin":"Europe"} 87 | {"Name":"renault 12 (sw)", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":96, "Horsepower":69, "Weight_in_lbs":2189, "Acceleration":18, "Year":"1972-01-01", "Origin":"Europe"} 88 | {"Name":"ford pinto (sw)", "Miles_per_Gallon":22, "Cylinders":4, "Displacement":122, "Horsepower":86, "Weight_in_lbs":2395, "Acceleration":16, "Year":"1972-01-01", "Origin":"USA"} 89 | {"Name":"datsun 510 (sw)", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":97, "Horsepower":92, "Weight_in_lbs":2288, "Acceleration":17, "Year":"1972-01-01", "Origin":"Japan"} 90 | {"Name":"toyouta corona mark ii (sw)", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":120, "Horsepower":97, "Weight_in_lbs":2506, "Acceleration":14.5, "Year":"1972-01-01", "Origin":"Japan"} 91 | {"Name":"dodge colt (sw)", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":98, "Horsepower":80, "Weight_in_lbs":2164, "Acceleration":15, "Year":"1972-01-01", "Origin":"USA"} 92 | {"Name":"toyota corolla 1600 (sw)", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":97, "Horsepower":88, "Weight_in_lbs":2100, "Acceleration":16.5, "Year":"1972-01-01", "Origin":"Japan"} 93 | {"Name":"buick century 350", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":350, "Horsepower":175, "Weight_in_lbs":4100, "Acceleration":13, "Year":"1973-01-01", "Origin":"USA"} 94 | {"Name":"amc matador", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":304, "Horsepower":150, "Weight_in_lbs":3672, "Acceleration":11.5, "Year":"1973-01-01", "Origin":"USA"} 95 | {"Name":"chevrolet malibu", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":350, "Horsepower":145, "Weight_in_lbs":3988, "Acceleration":13, "Year":"1973-01-01", "Origin":"USA"} 96 | {"Name":"ford gran torino", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":302, "Horsepower":137, "Weight_in_lbs":4042, "Acceleration":14.5, "Year":"1973-01-01", "Origin":"USA"} 97 | {"Name":"dodge coronet custom", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":3777, "Acceleration":12.5, "Year":"1973-01-01", "Origin":"USA"} 98 | {"Name":"mercury marquis brougham", "Miles_per_Gallon":12, "Cylinders":8, "Displacement":429, "Horsepower":198, "Weight_in_lbs":4952, "Acceleration":11.5, "Year":"1973-01-01", "Origin":"USA"} 99 | {"Name":"chevrolet caprice classic", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":400, "Horsepower":150, "Weight_in_lbs":4464, "Acceleration":12, "Year":"1973-01-01", "Origin":"USA"} 100 | {"Name":"ford ltd", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":351, "Horsepower":158, "Weight_in_lbs":4363, "Acceleration":13, "Year":"1973-01-01", "Origin":"USA"} 101 | {"Name":"plymouth fury gran sedan", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":4237, "Acceleration":14.5, "Year":"1973-01-01", "Origin":"USA"} 102 | {"Name":"chrysler new yorker brougham", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":440, "Horsepower":215, "Weight_in_lbs":4735, "Acceleration":11, "Year":"1973-01-01", "Origin":"USA"} 103 | {"Name":"buick electra 225 custom", "Miles_per_Gallon":12, "Cylinders":8, "Displacement":455, "Horsepower":225, "Weight_in_lbs":4951, "Acceleration":11, "Year":"1973-01-01", "Origin":"USA"} 104 | {"Name":"amc ambassador brougham", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":360, "Horsepower":175, "Weight_in_lbs":3821, "Acceleration":11, "Year":"1973-01-01", "Origin":"USA"} 105 | {"Name":"plymouth valiant", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":225, "Horsepower":105, "Weight_in_lbs":3121, "Acceleration":16.5, "Year":"1973-01-01", "Origin":"USA"} 106 | {"Name":"chevrolet nova custom", "Miles_per_Gallon":16, "Cylinders":6, "Displacement":250, "Horsepower":100, "Weight_in_lbs":3278, "Acceleration":18, "Year":"1973-01-01", "Origin":"USA"} 107 | {"Name":"amc hornet", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":232, "Horsepower":100, "Weight_in_lbs":2945, "Acceleration":16, "Year":"1973-01-01", "Origin":"USA"} 108 | {"Name":"ford maverick", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":250, "Horsepower":88, "Weight_in_lbs":3021, "Acceleration":16.5, "Year":"1973-01-01", "Origin":"USA"} 109 | {"Name":"plymouth duster", "Miles_per_Gallon":23, "Cylinders":6, "Displacement":198, "Horsepower":95, "Weight_in_lbs":2904, "Acceleration":16, "Year":"1973-01-01", "Origin":"USA"} 110 | {"Name":"volkswagen super beetle", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":97, "Horsepower":46, "Weight_in_lbs":1950, "Acceleration":21, "Year":"1973-01-01", "Origin":"Europe"} 111 | {"Name":"chevrolet impala", "Miles_per_Gallon":11, "Cylinders":8, "Displacement":400, "Horsepower":150, "Weight_in_lbs":4997, "Acceleration":14, "Year":"1973-01-01", "Origin":"USA"} 112 | {"Name":"ford country", "Miles_per_Gallon":12, "Cylinders":8, "Displacement":400, "Horsepower":167, "Weight_in_lbs":4906, "Acceleration":12.5, "Year":"1973-01-01", "Origin":"USA"} 113 | {"Name":"plymouth custom suburb", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":360, "Horsepower":170, "Weight_in_lbs":4654, "Acceleration":13, "Year":"1973-01-01", "Origin":"USA"} 114 | {"Name":"oldsmobile vista cruiser", "Miles_per_Gallon":12, "Cylinders":8, "Displacement":350, "Horsepower":180, "Weight_in_lbs":4499, "Acceleration":12.5, "Year":"1973-01-01", "Origin":"USA"} 115 | {"Name":"amc gremlin", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":232, "Horsepower":100, "Weight_in_lbs":2789, "Acceleration":15, "Year":"1973-01-01", "Origin":"USA"} 116 | {"Name":"toyota carina", "Miles_per_Gallon":20, "Cylinders":4, "Displacement":97, "Horsepower":88, "Weight_in_lbs":2279, "Acceleration":19, "Year":"1973-01-01", "Origin":"Japan"} 117 | {"Name":"chevrolet vega", "Miles_per_Gallon":21, "Cylinders":4, "Displacement":140, "Horsepower":72, "Weight_in_lbs":2401, "Acceleration":19.5, "Year":"1973-01-01", "Origin":"USA"} 118 | {"Name":"datsun 610", "Miles_per_Gallon":22, "Cylinders":4, "Displacement":108, "Horsepower":94, "Weight_in_lbs":2379, "Acceleration":16.5, "Year":"1973-01-01", "Origin":"Japan"} 119 | {"Name":"maxda rx3", "Miles_per_Gallon":18, "Cylinders":3, "Displacement":70, "Horsepower":90, "Weight_in_lbs":2124, "Acceleration":13.5, "Year":"1973-01-01", "Origin":"Japan"} 120 | {"Name":"ford pinto", "Miles_per_Gallon":19, "Cylinders":4, "Displacement":122, "Horsepower":85, "Weight_in_lbs":2310, "Acceleration":18.5, "Year":"1973-01-01", "Origin":"USA"} 121 | {"Name":"mercury capri v6", "Miles_per_Gallon":21, "Cylinders":6, "Displacement":155, "Horsepower":107, "Weight_in_lbs":2472, "Acceleration":14, "Year":"1973-01-01", "Origin":"USA"} 122 | {"Name":"fiat 124 sport coupe", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":98, "Horsepower":90, "Weight_in_lbs":2265, "Acceleration":15.5, "Year":"1973-01-01", "Origin":"Europe"} 123 | {"Name":"chevrolet monte carlo s", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":350, "Horsepower":145, "Weight_in_lbs":4082, "Acceleration":13, "Year":"1973-01-01", "Origin":"USA"} 124 | {"Name":"pontiac grand prix", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":400, "Horsepower":230, "Weight_in_lbs":4278, "Acceleration":9.5, "Year":"1973-01-01", "Origin":"USA"} 125 | {"Name":"fiat 128", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":68, "Horsepower":49, "Weight_in_lbs":1867, "Acceleration":19.5, "Year":"1973-01-01", "Origin":"Europe"} 126 | {"Name":"opel manta", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":116, "Horsepower":75, "Weight_in_lbs":2158, "Acceleration":15.5, "Year":"1973-01-01", "Origin":"Europe"} 127 | {"Name":"audi 100ls", "Miles_per_Gallon":20, "Cylinders":4, "Displacement":114, "Horsepower":91, "Weight_in_lbs":2582, "Acceleration":14, "Year":"1973-01-01", "Origin":"Europe"} 128 | {"Name":"volvo 144ea", "Miles_per_Gallon":19, "Cylinders":4, "Displacement":121, "Horsepower":112, "Weight_in_lbs":2868, "Acceleration":15.5, "Year":"1973-01-01", "Origin":"Europe"} 129 | {"Name":"dodge dart custom", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":3399, "Acceleration":11, "Year":"1973-01-01", "Origin":"USA"} 130 | {"Name":"saab 99le", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":121, "Horsepower":110, "Weight_in_lbs":2660, "Acceleration":14, "Year":"1973-01-01", "Origin":"Europe"} 131 | {"Name":"toyota mark ii", "Miles_per_Gallon":20, "Cylinders":6, "Displacement":156, "Horsepower":122, "Weight_in_lbs":2807, "Acceleration":13.5, "Year":"1973-01-01", "Origin":"Japan"} 132 | {"Name":"oldsmobile omega", "Miles_per_Gallon":11, "Cylinders":8, "Displacement":350, "Horsepower":180, "Weight_in_lbs":3664, "Acceleration":11, "Year":"1973-01-01", "Origin":"USA"} 133 | {"Name":"plymouth duster", "Miles_per_Gallon":20, "Cylinders":6, "Displacement":198, "Horsepower":95, "Weight_in_lbs":3102, "Acceleration":16.5, "Year":"1974-01-01", "Origin":"USA"} 134 | {"Name":"ford maverick", "Miles_per_Gallon":21, "Cylinders":6, "Displacement":200, "Horsepower":null, "Weight_in_lbs":2875, "Acceleration":17, "Year":"1974-01-01", "Origin":"USA"} 135 | {"Name":"amc hornet", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":232, "Horsepower":100, "Weight_in_lbs":2901, "Acceleration":16, "Year":"1974-01-01", "Origin":"USA"} 136 | {"Name":"chevrolet nova", "Miles_per_Gallon":15, "Cylinders":6, "Displacement":250, "Horsepower":100, "Weight_in_lbs":3336, "Acceleration":17, "Year":"1974-01-01", "Origin":"USA"} 137 | {"Name":"datsun b210", "Miles_per_Gallon":31, "Cylinders":4, "Displacement":79, "Horsepower":67, "Weight_in_lbs":1950, "Acceleration":19, "Year":"1974-01-01", "Origin":"Japan"} 138 | {"Name":"ford pinto", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":122, "Horsepower":80, "Weight_in_lbs":2451, "Acceleration":16.5, "Year":"1974-01-01", "Origin":"USA"} 139 | {"Name":"toyota corolla 1200", "Miles_per_Gallon":32, "Cylinders":4, "Displacement":71, "Horsepower":65, "Weight_in_lbs":1836, "Acceleration":21, "Year":"1974-01-01", "Origin":"Japan"} 140 | {"Name":"chevrolet vega", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":140, "Horsepower":75, "Weight_in_lbs":2542, "Acceleration":17, "Year":"1974-01-01", "Origin":"USA"} 141 | {"Name":"chevrolet chevelle malibu classic", "Miles_per_Gallon":16, "Cylinders":6, "Displacement":250, "Horsepower":100, "Weight_in_lbs":3781, "Acceleration":17, "Year":"1974-01-01", "Origin":"USA"} 142 | {"Name":"amc matador", "Miles_per_Gallon":16, "Cylinders":6, "Displacement":258, "Horsepower":110, "Weight_in_lbs":3632, "Acceleration":18, "Year":"1974-01-01", "Origin":"USA"} 143 | {"Name":"plymouth satellite sebring", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":225, "Horsepower":105, "Weight_in_lbs":3613, "Acceleration":16.5, "Year":"1974-01-01", "Origin":"USA"} 144 | {"Name":"ford gran torino", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":302, "Horsepower":140, "Weight_in_lbs":4141, "Acceleration":14, "Year":"1974-01-01", "Origin":"USA"} 145 | {"Name":"buick century luxus (sw)", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":350, "Horsepower":150, "Weight_in_lbs":4699, "Acceleration":14.5, "Year":"1974-01-01", "Origin":"USA"} 146 | {"Name":"dodge coronet custom (sw)", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":4457, "Acceleration":13.5, "Year":"1974-01-01", "Origin":"USA"} 147 | {"Name":"ford gran torino (sw)", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":302, "Horsepower":140, "Weight_in_lbs":4638, "Acceleration":16, "Year":"1974-01-01", "Origin":"USA"} 148 | {"Name":"amc matador (sw)", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":304, "Horsepower":150, "Weight_in_lbs":4257, "Acceleration":15.5, "Year":"1974-01-01", "Origin":"USA"} 149 | {"Name":"audi fox", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":98, "Horsepower":83, "Weight_in_lbs":2219, "Acceleration":16.5, "Year":"1974-01-01", "Origin":"Europe"} 150 | {"Name":"volkswagen dasher", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":79, "Horsepower":67, "Weight_in_lbs":1963, "Acceleration":15.5, "Year":"1974-01-01", "Origin":"Europe"} 151 | {"Name":"opel manta", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":97, "Horsepower":78, "Weight_in_lbs":2300, "Acceleration":14.5, "Year":"1974-01-01", "Origin":"Europe"} 152 | {"Name":"toyota corona", "Miles_per_Gallon":31, "Cylinders":4, "Displacement":76, "Horsepower":52, "Weight_in_lbs":1649, "Acceleration":16.5, "Year":"1974-01-01", "Origin":"Japan"} 153 | {"Name":"datsun 710", "Miles_per_Gallon":32, "Cylinders":4, "Displacement":83, "Horsepower":61, "Weight_in_lbs":2003, "Acceleration":19, "Year":"1974-01-01", "Origin":"Japan"} 154 | {"Name":"dodge colt", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":90, "Horsepower":75, "Weight_in_lbs":2125, "Acceleration":14.5, "Year":"1974-01-01", "Origin":"USA"} 155 | {"Name":"fiat 128", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":90, "Horsepower":75, "Weight_in_lbs":2108, "Acceleration":15.5, "Year":"1974-01-01", "Origin":"Europe"} 156 | {"Name":"fiat 124 tc", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":116, "Horsepower":75, "Weight_in_lbs":2246, "Acceleration":14, "Year":"1974-01-01", "Origin":"Europe"} 157 | {"Name":"honda civic", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":120, "Horsepower":97, "Weight_in_lbs":2489, "Acceleration":15, "Year":"1974-01-01", "Origin":"Japan"} 158 | {"Name":"subaru", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":108, "Horsepower":93, "Weight_in_lbs":2391, "Acceleration":15.5, "Year":"1974-01-01", "Origin":"Japan"} 159 | {"Name":"fiat x1.9", "Miles_per_Gallon":31, "Cylinders":4, "Displacement":79, "Horsepower":67, "Weight_in_lbs":2000, "Acceleration":16, "Year":"1974-01-01", "Origin":"Europe"} 160 | {"Name":"plymouth valiant custom", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":225, "Horsepower":95, "Weight_in_lbs":3264, "Acceleration":16, "Year":"1975-01-01", "Origin":"USA"} 161 | {"Name":"chevrolet nova", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":250, "Horsepower":105, "Weight_in_lbs":3459, "Acceleration":16, "Year":"1975-01-01", "Origin":"USA"} 162 | {"Name":"mercury monarch", "Miles_per_Gallon":15, "Cylinders":6, "Displacement":250, "Horsepower":72, "Weight_in_lbs":3432, "Acceleration":21, "Year":"1975-01-01", "Origin":"USA"} 163 | {"Name":"ford maverick", "Miles_per_Gallon":15, "Cylinders":6, "Displacement":250, "Horsepower":72, "Weight_in_lbs":3158, "Acceleration":19.5, "Year":"1975-01-01", "Origin":"USA"} 164 | {"Name":"pontiac catalina", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":400, "Horsepower":170, "Weight_in_lbs":4668, "Acceleration":11.5, "Year":"1975-01-01", "Origin":"USA"} 165 | {"Name":"chevrolet bel air", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":350, "Horsepower":145, "Weight_in_lbs":4440, "Acceleration":14, "Year":"1975-01-01", "Origin":"USA"} 166 | {"Name":"plymouth grand fury", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":4498, "Acceleration":14.5, "Year":"1975-01-01", "Origin":"USA"} 167 | {"Name":"ford ltd", "Miles_per_Gallon":14, "Cylinders":8, "Displacement":351, "Horsepower":148, "Weight_in_lbs":4657, "Acceleration":13.5, "Year":"1975-01-01", "Origin":"USA"} 168 | {"Name":"buick century", "Miles_per_Gallon":17, "Cylinders":6, "Displacement":231, "Horsepower":110, "Weight_in_lbs":3907, "Acceleration":21, "Year":"1975-01-01", "Origin":"USA"} 169 | {"Name":"chevroelt chevelle malibu", "Miles_per_Gallon":16, "Cylinders":6, "Displacement":250, "Horsepower":105, "Weight_in_lbs":3897, "Acceleration":18.5, "Year":"1975-01-01", "Origin":"USA"} 170 | {"Name":"amc matador", "Miles_per_Gallon":15, "Cylinders":6, "Displacement":258, "Horsepower":110, "Weight_in_lbs":3730, "Acceleration":19, "Year":"1975-01-01", "Origin":"USA"} 171 | {"Name":"plymouth fury", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":225, "Horsepower":95, "Weight_in_lbs":3785, "Acceleration":19, "Year":"1975-01-01", "Origin":"USA"} 172 | {"Name":"buick skyhawk", "Miles_per_Gallon":21, "Cylinders":6, "Displacement":231, "Horsepower":110, "Weight_in_lbs":3039, "Acceleration":15, "Year":"1975-01-01", "Origin":"USA"} 173 | {"Name":"chevrolet monza 2+2", "Miles_per_Gallon":20, "Cylinders":8, "Displacement":262, "Horsepower":110, "Weight_in_lbs":3221, "Acceleration":13.5, "Year":"1975-01-01", "Origin":"USA"} 174 | {"Name":"ford mustang ii", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":302, "Horsepower":129, "Weight_in_lbs":3169, "Acceleration":12, "Year":"1975-01-01", "Origin":"USA"} 175 | {"Name":"toyota corolla", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":97, "Horsepower":75, "Weight_in_lbs":2171, "Acceleration":16, "Year":"1975-01-01", "Origin":"Japan"} 176 | {"Name":"ford pinto", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":140, "Horsepower":83, "Weight_in_lbs":2639, "Acceleration":17, "Year":"1975-01-01", "Origin":"USA"} 177 | {"Name":"amc gremlin", "Miles_per_Gallon":20, "Cylinders":6, "Displacement":232, "Horsepower":100, "Weight_in_lbs":2914, "Acceleration":16, "Year":"1975-01-01", "Origin":"USA"} 178 | {"Name":"pontiac astro", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":140, "Horsepower":78, "Weight_in_lbs":2592, "Acceleration":18.5, "Year":"1975-01-01", "Origin":"USA"} 179 | {"Name":"toyota corona", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":134, "Horsepower":96, "Weight_in_lbs":2702, "Acceleration":13.5, "Year":"1975-01-01", "Origin":"Japan"} 180 | {"Name":"volkswagen dasher", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":90, "Horsepower":71, "Weight_in_lbs":2223, "Acceleration":16.5, "Year":"1975-01-01", "Origin":"Europe"} 181 | {"Name":"datsun 710", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":119, "Horsepower":97, "Weight_in_lbs":2545, "Acceleration":17, "Year":"1975-01-01", "Origin":"Japan"} 182 | {"Name":"ford pinto", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":171, "Horsepower":97, "Weight_in_lbs":2984, "Acceleration":14.5, "Year":"1975-01-01", "Origin":"USA"} 183 | {"Name":"volkswagen rabbit", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":90, "Horsepower":70, "Weight_in_lbs":1937, "Acceleration":14, "Year":"1975-01-01", "Origin":"Europe"} 184 | {"Name":"amc pacer", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":232, "Horsepower":90, "Weight_in_lbs":3211, "Acceleration":17, "Year":"1975-01-01", "Origin":"USA"} 185 | {"Name":"audi 100ls", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":115, "Horsepower":95, "Weight_in_lbs":2694, "Acceleration":15, "Year":"1975-01-01", "Origin":"Europe"} 186 | {"Name":"peugeot 504", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":120, "Horsepower":88, "Weight_in_lbs":2957, "Acceleration":17, "Year":"1975-01-01", "Origin":"Europe"} 187 | {"Name":"volvo 244dl", "Miles_per_Gallon":22, "Cylinders":4, "Displacement":121, "Horsepower":98, "Weight_in_lbs":2945, "Acceleration":14.5, "Year":"1975-01-01", "Origin":"Europe"} 188 | {"Name":"saab 99le", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":121, "Horsepower":115, "Weight_in_lbs":2671, "Acceleration":13.5, "Year":"1975-01-01", "Origin":"Europe"} 189 | {"Name":"honda civic cvcc", "Miles_per_Gallon":33, "Cylinders":4, "Displacement":91, "Horsepower":53, "Weight_in_lbs":1795, "Acceleration":17.5, "Year":"1975-01-01", "Origin":"Japan"} 190 | {"Name":"fiat 131", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":107, "Horsepower":86, "Weight_in_lbs":2464, "Acceleration":15.5, "Year":"1976-01-01", "Origin":"Europe"} 191 | {"Name":"opel 1900", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":116, "Horsepower":81, "Weight_in_lbs":2220, "Acceleration":16.9, "Year":"1976-01-01", "Origin":"Europe"} 192 | {"Name":"capri ii", "Miles_per_Gallon":25, "Cylinders":4, "Displacement":140, "Horsepower":92, "Weight_in_lbs":2572, "Acceleration":14.9, "Year":"1976-01-01", "Origin":"USA"} 193 | {"Name":"dodge colt", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":98, "Horsepower":79, "Weight_in_lbs":2255, "Acceleration":17.7, "Year":"1976-01-01", "Origin":"USA"} 194 | {"Name":"renault 12tl", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":101, "Horsepower":83, "Weight_in_lbs":2202, "Acceleration":15.3, "Year":"1976-01-01", "Origin":"Europe"} 195 | {"Name":"chevrolet chevelle malibu classic", "Miles_per_Gallon":17.5, "Cylinders":8, "Displacement":305, "Horsepower":140, "Weight_in_lbs":4215, "Acceleration":13, "Year":"1976-01-01", "Origin":"USA"} 196 | {"Name":"dodge coronet brougham", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":4190, "Acceleration":13, "Year":"1976-01-01", "Origin":"USA"} 197 | {"Name":"amc matador", "Miles_per_Gallon":15.5, "Cylinders":8, "Displacement":304, "Horsepower":120, "Weight_in_lbs":3962, "Acceleration":13.9, "Year":"1976-01-01", "Origin":"USA"} 198 | {"Name":"ford gran torino", "Miles_per_Gallon":14.5, "Cylinders":8, "Displacement":351, "Horsepower":152, "Weight_in_lbs":4215, "Acceleration":12.8, "Year":"1976-01-01", "Origin":"USA"} 199 | {"Name":"plymouth valiant", "Miles_per_Gallon":22, "Cylinders":6, "Displacement":225, "Horsepower":100, "Weight_in_lbs":3233, "Acceleration":15.4, "Year":"1976-01-01", "Origin":"USA"} 200 | {"Name":"chevrolet nova", "Miles_per_Gallon":22, "Cylinders":6, "Displacement":250, "Horsepower":105, "Weight_in_lbs":3353, "Acceleration":14.5, "Year":"1976-01-01", "Origin":"USA"} 201 | {"Name":"ford maverick", "Miles_per_Gallon":24, "Cylinders":6, "Displacement":200, "Horsepower":81, "Weight_in_lbs":3012, "Acceleration":17.6, "Year":"1976-01-01", "Origin":"USA"} 202 | {"Name":"amc hornet", "Miles_per_Gallon":22.5, "Cylinders":6, "Displacement":232, "Horsepower":90, "Weight_in_lbs":3085, "Acceleration":17.6, "Year":"1976-01-01", "Origin":"USA"} 203 | {"Name":"chevrolet chevette", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":85, "Horsepower":52, "Weight_in_lbs":2035, "Acceleration":22.2, "Year":"1976-01-01", "Origin":"USA"} 204 | {"Name":"chevrolet woody", "Miles_per_Gallon":24.5, "Cylinders":4, "Displacement":98, "Horsepower":60, "Weight_in_lbs":2164, "Acceleration":22.1, "Year":"1976-01-01", "Origin":"USA"} 205 | {"Name":"vw rabbit", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":90, "Horsepower":70, "Weight_in_lbs":1937, "Acceleration":14.2, "Year":"1976-01-01", "Origin":"Europe"} 206 | {"Name":"honda civic", "Miles_per_Gallon":33, "Cylinders":4, "Displacement":91, "Horsepower":53, "Weight_in_lbs":1795, "Acceleration":17.4, "Year":"1976-01-01", "Origin":"Japan"} 207 | {"Name":"dodge aspen se", "Miles_per_Gallon":20, "Cylinders":6, "Displacement":225, "Horsepower":100, "Weight_in_lbs":3651, "Acceleration":17.7, "Year":"1976-01-01", "Origin":"USA"} 208 | {"Name":"ford granada ghia", "Miles_per_Gallon":18, "Cylinders":6, "Displacement":250, "Horsepower":78, "Weight_in_lbs":3574, "Acceleration":21, "Year":"1976-01-01", "Origin":"USA"} 209 | {"Name":"pontiac ventura sj", "Miles_per_Gallon":18.5, "Cylinders":6, "Displacement":250, "Horsepower":110, "Weight_in_lbs":3645, "Acceleration":16.2, "Year":"1976-01-01", "Origin":"USA"} 210 | {"Name":"amc pacer d/l", "Miles_per_Gallon":17.5, "Cylinders":6, "Displacement":258, "Horsepower":95, "Weight_in_lbs":3193, "Acceleration":17.8, "Year":"1976-01-01", "Origin":"USA"} 211 | {"Name":"volkswagen rabbit", "Miles_per_Gallon":29.5, "Cylinders":4, "Displacement":97, "Horsepower":71, "Weight_in_lbs":1825, "Acceleration":12.2, "Year":"1976-01-01", "Origin":"Europe"} 212 | {"Name":"datsun b-210", "Miles_per_Gallon":32, "Cylinders":4, "Displacement":85, "Horsepower":70, "Weight_in_lbs":1990, "Acceleration":17, "Year":"1976-01-01", "Origin":"Japan"} 213 | {"Name":"toyota corolla", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":97, "Horsepower":75, "Weight_in_lbs":2155, "Acceleration":16.4, "Year":"1976-01-01", "Origin":"Japan"} 214 | {"Name":"ford pinto", "Miles_per_Gallon":26.5, "Cylinders":4, "Displacement":140, "Horsepower":72, "Weight_in_lbs":2565, "Acceleration":13.6, "Year":"1976-01-01", "Origin":"USA"} 215 | {"Name":"volvo 245", "Miles_per_Gallon":20, "Cylinders":4, "Displacement":130, "Horsepower":102, "Weight_in_lbs":3150, "Acceleration":15.7, "Year":"1976-01-01", "Origin":"Europe"} 216 | {"Name":"plymouth volare premier v8", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":3940, "Acceleration":13.2, "Year":"1976-01-01", "Origin":"USA"} 217 | {"Name":"peugeot 504", "Miles_per_Gallon":19, "Cylinders":4, "Displacement":120, "Horsepower":88, "Weight_in_lbs":3270, "Acceleration":21.9, "Year":"1976-01-01", "Origin":"Europe"} 218 | {"Name":"toyota mark ii", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":156, "Horsepower":108, "Weight_in_lbs":2930, "Acceleration":15.5, "Year":"1976-01-01", "Origin":"Japan"} 219 | {"Name":"mercedes-benz 280s", "Miles_per_Gallon":16.5, "Cylinders":6, "Displacement":168, "Horsepower":120, "Weight_in_lbs":3820, "Acceleration":16.7, "Year":"1976-01-01", "Origin":"Europe"} 220 | {"Name":"cadillac seville", "Miles_per_Gallon":16.5, "Cylinders":8, "Displacement":350, "Horsepower":180, "Weight_in_lbs":4380, "Acceleration":12.1, "Year":"1976-01-01", "Origin":"USA"} 221 | {"Name":"chevy c10", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":350, "Horsepower":145, "Weight_in_lbs":4055, "Acceleration":12, "Year":"1976-01-01", "Origin":"USA"} 222 | {"Name":"ford f108", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":302, "Horsepower":130, "Weight_in_lbs":3870, "Acceleration":15, "Year":"1976-01-01", "Origin":"USA"} 223 | {"Name":"dodge d100", "Miles_per_Gallon":13, "Cylinders":8, "Displacement":318, "Horsepower":150, "Weight_in_lbs":3755, "Acceleration":14, "Year":"1976-01-01", "Origin":"USA"} 224 | {"Name":"honda Accelerationord cvcc", "Miles_per_Gallon":31.5, "Cylinders":4, "Displacement":98, "Horsepower":68, "Weight_in_lbs":2045, "Acceleration":18.5, "Year":"1977-01-01", "Origin":"Japan"} 225 | {"Name":"buick opel isuzu deluxe", "Miles_per_Gallon":30, "Cylinders":4, "Displacement":111, "Horsepower":80, "Weight_in_lbs":2155, "Acceleration":14.8, "Year":"1977-01-01", "Origin":"USA"} 226 | {"Name":"renault 5 gtl", "Miles_per_Gallon":36, "Cylinders":4, "Displacement":79, "Horsepower":58, "Weight_in_lbs":1825, "Acceleration":18.6, "Year":"1977-01-01", "Origin":"Europe"} 227 | {"Name":"plymouth arrow gs", "Miles_per_Gallon":25.5, "Cylinders":4, "Displacement":122, "Horsepower":96, "Weight_in_lbs":2300, "Acceleration":15.5, "Year":"1977-01-01", "Origin":"USA"} 228 | {"Name":"datsun f-10 hatchback", "Miles_per_Gallon":33.5, "Cylinders":4, "Displacement":85, "Horsepower":70, "Weight_in_lbs":1945, "Acceleration":16.8, "Year":"1977-01-01", "Origin":"Japan"} 229 | {"Name":"chevrolet caprice classic", "Miles_per_Gallon":17.5, "Cylinders":8, "Displacement":305, "Horsepower":145, "Weight_in_lbs":3880, "Acceleration":12.5, "Year":"1977-01-01", "Origin":"USA"} 230 | {"Name":"oldsmobile cutlass supreme", "Miles_per_Gallon":17, "Cylinders":8, "Displacement":260, "Horsepower":110, "Weight_in_lbs":4060, "Acceleration":19, "Year":"1977-01-01", "Origin":"USA"} 231 | {"Name":"dodge monaco brougham", "Miles_per_Gallon":15.5, "Cylinders":8, "Displacement":318, "Horsepower":145, "Weight_in_lbs":4140, "Acceleration":13.7, "Year":"1977-01-01", "Origin":"USA"} 232 | {"Name":"mercury cougar brougham", "Miles_per_Gallon":15, "Cylinders":8, "Displacement":302, "Horsepower":130, "Weight_in_lbs":4295, "Acceleration":14.9, "Year":"1977-01-01", "Origin":"USA"} 233 | {"Name":"chevrolet concours", "Miles_per_Gallon":17.5, "Cylinders":6, "Displacement":250, "Horsepower":110, "Weight_in_lbs":3520, "Acceleration":16.4, "Year":"1977-01-01", "Origin":"USA"} 234 | {"Name":"buick skylark", "Miles_per_Gallon":20.5, "Cylinders":6, "Displacement":231, "Horsepower":105, "Weight_in_lbs":3425, "Acceleration":16.9, "Year":"1977-01-01", "Origin":"USA"} 235 | {"Name":"plymouth volare custom", "Miles_per_Gallon":19, "Cylinders":6, "Displacement":225, "Horsepower":100, "Weight_in_lbs":3630, "Acceleration":17.7, "Year":"1977-01-01", "Origin":"USA"} 236 | {"Name":"ford granada", "Miles_per_Gallon":18.5, "Cylinders":6, "Displacement":250, "Horsepower":98, "Weight_in_lbs":3525, "Acceleration":19, "Year":"1977-01-01", "Origin":"USA"} 237 | {"Name":"pontiac grand prix lj", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":400, "Horsepower":180, "Weight_in_lbs":4220, "Acceleration":11.1, "Year":"1977-01-01", "Origin":"USA"} 238 | {"Name":"chevrolet monte carlo landau", "Miles_per_Gallon":15.5, "Cylinders":8, "Displacement":350, "Horsepower":170, "Weight_in_lbs":4165, "Acceleration":11.4, "Year":"1977-01-01", "Origin":"USA"} 239 | {"Name":"chrysler cordoba", "Miles_per_Gallon":15.5, "Cylinders":8, "Displacement":400, "Horsepower":190, "Weight_in_lbs":4325, "Acceleration":12.2, "Year":"1977-01-01", "Origin":"USA"} 240 | {"Name":"ford thunderbird", "Miles_per_Gallon":16, "Cylinders":8, "Displacement":351, "Horsepower":149, "Weight_in_lbs":4335, "Acceleration":14.5, "Year":"1977-01-01", "Origin":"USA"} 241 | {"Name":"volkswagen rabbit custom", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":97, "Horsepower":78, "Weight_in_lbs":1940, "Acceleration":14.5, "Year":"1977-01-01", "Origin":"Europe"} 242 | {"Name":"pontiac sunbird coupe", "Miles_per_Gallon":24.5, "Cylinders":4, "Displacement":151, "Horsepower":88, "Weight_in_lbs":2740, "Acceleration":16, "Year":"1977-01-01", "Origin":"USA"} 243 | {"Name":"toyota corolla liftback", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":97, "Horsepower":75, "Weight_in_lbs":2265, "Acceleration":18.2, "Year":"1977-01-01", "Origin":"Japan"} 244 | {"Name":"ford mustang ii 2+2", "Miles_per_Gallon":25.5, "Cylinders":4, "Displacement":140, "Horsepower":89, "Weight_in_lbs":2755, "Acceleration":15.8, "Year":"1977-01-01", "Origin":"USA"} 245 | {"Name":"chevrolet chevette", "Miles_per_Gallon":30.5, "Cylinders":4, "Displacement":98, "Horsepower":63, "Weight_in_lbs":2051, "Acceleration":17, "Year":"1977-01-01", "Origin":"USA"} 246 | {"Name":"dodge colt m/m", "Miles_per_Gallon":33.5, "Cylinders":4, "Displacement":98, "Horsepower":83, "Weight_in_lbs":2075, "Acceleration":15.9, "Year":"1977-01-01", "Origin":"USA"} 247 | {"Name":"subaru dl", "Miles_per_Gallon":30, "Cylinders":4, "Displacement":97, "Horsepower":67, "Weight_in_lbs":1985, "Acceleration":16.4, "Year":"1977-01-01", "Origin":"Japan"} 248 | {"Name":"volkswagen dasher", "Miles_per_Gallon":30.5, "Cylinders":4, "Displacement":97, "Horsepower":78, "Weight_in_lbs":2190, "Acceleration":14.1, "Year":"1977-01-01", "Origin":"Europe"} 249 | {"Name":"datsun 810", "Miles_per_Gallon":22, "Cylinders":6, "Displacement":146, "Horsepower":97, "Weight_in_lbs":2815, "Acceleration":14.5, "Year":"1977-01-01", "Origin":"Japan"} 250 | {"Name":"bmw 320i", "Miles_per_Gallon":21.5, "Cylinders":4, "Displacement":121, "Horsepower":110, "Weight_in_lbs":2600, "Acceleration":12.8, "Year":"1977-01-01", "Origin":"Europe"} 251 | {"Name":"mazda rx-4", "Miles_per_Gallon":21.5, "Cylinders":3, "Displacement":80, "Horsepower":110, "Weight_in_lbs":2720, "Acceleration":13.5, "Year":"1977-01-01", "Origin":"Japan"} 252 | {"Name":"volkswagen rabbit custom diesel", "Miles_per_Gallon":43.1, "Cylinders":4, "Displacement":90, "Horsepower":48, "Weight_in_lbs":1985, "Acceleration":21.5, "Year":"1978-01-01", "Origin":"Europe"} 253 | {"Name":"ford fiesta", "Miles_per_Gallon":36.1, "Cylinders":4, "Displacement":98, "Horsepower":66, "Weight_in_lbs":1800, "Acceleration":14.4, "Year":"1978-01-01", "Origin":"USA"} 254 | {"Name":"mazda glc deluxe", "Miles_per_Gallon":32.8, "Cylinders":4, "Displacement":78, "Horsepower":52, "Weight_in_lbs":1985, "Acceleration":19.4, "Year":"1978-01-01", "Origin":"Japan"} 255 | {"Name":"datsun b210 gx", "Miles_per_Gallon":39.4, "Cylinders":4, "Displacement":85, "Horsepower":70, "Weight_in_lbs":2070, "Acceleration":18.6, "Year":"1978-01-01", "Origin":"Japan"} 256 | {"Name":"honda civic cvcc", "Miles_per_Gallon":36.1, "Cylinders":4, "Displacement":91, "Horsepower":60, "Weight_in_lbs":1800, "Acceleration":16.4, "Year":"1978-01-01", "Origin":"Japan"} 257 | {"Name":"oldsmobile cutlass salon brougham", "Miles_per_Gallon":19.9, "Cylinders":8, "Displacement":260, "Horsepower":110, "Weight_in_lbs":3365, "Acceleration":15.5, "Year":"1978-01-01", "Origin":"USA"} 258 | {"Name":"dodge diplomat", "Miles_per_Gallon":19.4, "Cylinders":8, "Displacement":318, "Horsepower":140, "Weight_in_lbs":3735, "Acceleration":13.2, "Year":"1978-01-01", "Origin":"USA"} 259 | {"Name":"mercury monarch ghia", "Miles_per_Gallon":20.2, "Cylinders":8, "Displacement":302, "Horsepower":139, "Weight_in_lbs":3570, "Acceleration":12.8, "Year":"1978-01-01", "Origin":"USA"} 260 | {"Name":"pontiac phoenix lj", "Miles_per_Gallon":19.2, "Cylinders":6, "Displacement":231, "Horsepower":105, "Weight_in_lbs":3535, "Acceleration":19.2, "Year":"1978-01-01", "Origin":"USA"} 261 | {"Name":"chevrolet malibu", "Miles_per_Gallon":20.5, "Cylinders":6, "Displacement":200, "Horsepower":95, "Weight_in_lbs":3155, "Acceleration":18.2, "Year":"1978-01-01", "Origin":"USA"} 262 | {"Name":"ford fairmont (auto)", "Miles_per_Gallon":20.2, "Cylinders":6, "Displacement":200, "Horsepower":85, "Weight_in_lbs":2965, "Acceleration":15.8, "Year":"1978-01-01", "Origin":"USA"} 263 | {"Name":"ford fairmont (man)", "Miles_per_Gallon":25.1, "Cylinders":4, "Displacement":140, "Horsepower":88, "Weight_in_lbs":2720, "Acceleration":15.4, "Year":"1978-01-01", "Origin":"USA"} 264 | {"Name":"plymouth volare", "Miles_per_Gallon":20.5, "Cylinders":6, "Displacement":225, "Horsepower":100, "Weight_in_lbs":3430, "Acceleration":17.2, "Year":"1978-01-01", "Origin":"USA"} 265 | {"Name":"amc concord", "Miles_per_Gallon":19.4, "Cylinders":6, "Displacement":232, "Horsepower":90, "Weight_in_lbs":3210, "Acceleration":17.2, "Year":"1978-01-01", "Origin":"USA"} 266 | {"Name":"buick century special", "Miles_per_Gallon":20.6, "Cylinders":6, "Displacement":231, "Horsepower":105, "Weight_in_lbs":3380, "Acceleration":15.8, "Year":"1978-01-01", "Origin":"USA"} 267 | {"Name":"mercury zephyr", "Miles_per_Gallon":20.8, "Cylinders":6, "Displacement":200, "Horsepower":85, "Weight_in_lbs":3070, "Acceleration":16.7, "Year":"1978-01-01", "Origin":"USA"} 268 | {"Name":"dodge aspen", "Miles_per_Gallon":18.6, "Cylinders":6, "Displacement":225, "Horsepower":110, "Weight_in_lbs":3620, "Acceleration":18.7, "Year":"1978-01-01", "Origin":"USA"} 269 | {"Name":"amc concord d/l", "Miles_per_Gallon":18.1, "Cylinders":6, "Displacement":258, "Horsepower":120, "Weight_in_lbs":3410, "Acceleration":15.1, "Year":"1978-01-01", "Origin":"USA"} 270 | {"Name":"chevrolet monte carlo landau", "Miles_per_Gallon":19.2, "Cylinders":8, "Displacement":305, "Horsepower":145, "Weight_in_lbs":3425, "Acceleration":13.2, "Year":"1978-01-01", "Origin":"USA"} 271 | {"Name":"buick regal sport coupe (turbo)", "Miles_per_Gallon":17.7, "Cylinders":6, "Displacement":231, "Horsepower":165, "Weight_in_lbs":3445, "Acceleration":13.4, "Year":"1978-01-01", "Origin":"USA"} 272 | {"Name":"ford futura", "Miles_per_Gallon":18.1, "Cylinders":8, "Displacement":302, "Horsepower":139, "Weight_in_lbs":3205, "Acceleration":11.2, "Year":"1978-01-01", "Origin":"USA"} 273 | {"Name":"dodge magnum xe", "Miles_per_Gallon":17.5, "Cylinders":8, "Displacement":318, "Horsepower":140, "Weight_in_lbs":4080, "Acceleration":13.7, "Year":"1978-01-01", "Origin":"USA"} 274 | {"Name":"chevrolet chevette", "Miles_per_Gallon":30, "Cylinders":4, "Displacement":98, "Horsepower":68, "Weight_in_lbs":2155, "Acceleration":16.5, "Year":"1978-01-01", "Origin":"USA"} 275 | {"Name":"toyota corona", "Miles_per_Gallon":27.5, "Cylinders":4, "Displacement":134, "Horsepower":95, "Weight_in_lbs":2560, "Acceleration":14.2, "Year":"1978-01-01", "Origin":"Japan"} 276 | {"Name":"datsun 510", "Miles_per_Gallon":27.2, "Cylinders":4, "Displacement":119, "Horsepower":97, "Weight_in_lbs":2300, "Acceleration":14.7, "Year":"1978-01-01", "Origin":"Japan"} 277 | {"Name":"dodge omni", "Miles_per_Gallon":30.9, "Cylinders":4, "Displacement":105, "Horsepower":75, "Weight_in_lbs":2230, "Acceleration":14.5, "Year":"1978-01-01", "Origin":"USA"} 278 | {"Name":"toyota celica gt liftback", "Miles_per_Gallon":21.1, "Cylinders":4, "Displacement":134, "Horsepower":95, "Weight_in_lbs":2515, "Acceleration":14.8, "Year":"1978-01-01", "Origin":"Japan"} 279 | {"Name":"plymouth sapporo", "Miles_per_Gallon":23.2, "Cylinders":4, "Displacement":156, "Horsepower":105, "Weight_in_lbs":2745, "Acceleration":16.7, "Year":"1978-01-01", "Origin":"USA"} 280 | {"Name":"oldsmobile starfire sx", "Miles_per_Gallon":23.8, "Cylinders":4, "Displacement":151, "Horsepower":85, "Weight_in_lbs":2855, "Acceleration":17.6, "Year":"1978-01-01", "Origin":"USA"} 281 | {"Name":"datsun 200-sx", "Miles_per_Gallon":23.9, "Cylinders":4, "Displacement":119, "Horsepower":97, "Weight_in_lbs":2405, "Acceleration":14.9, "Year":"1978-01-01", "Origin":"Japan"} 282 | {"Name":"audi 5000", "Miles_per_Gallon":20.3, "Cylinders":5, "Displacement":131, "Horsepower":103, "Weight_in_lbs":2830, "Acceleration":15.9, "Year":"1978-01-01", "Origin":"Europe"} 283 | {"Name":"volvo 264gl", "Miles_per_Gallon":17, "Cylinders":6, "Displacement":163, "Horsepower":125, "Weight_in_lbs":3140, "Acceleration":13.6, "Year":"1978-01-01", "Origin":"Europe"} 284 | {"Name":"saab 99gle", "Miles_per_Gallon":21.6, "Cylinders":4, "Displacement":121, "Horsepower":115, "Weight_in_lbs":2795, "Acceleration":15.7, "Year":"1978-01-01", "Origin":"Europe"} 285 | {"Name":"peugeot 604sl", "Miles_per_Gallon":16.2, "Cylinders":6, "Displacement":163, "Horsepower":133, "Weight_in_lbs":3410, "Acceleration":15.8, "Year":"1978-01-01", "Origin":"Europe"} 286 | {"Name":"volkswagen scirocco", "Miles_per_Gallon":31.5, "Cylinders":4, "Displacement":89, "Horsepower":71, "Weight_in_lbs":1990, "Acceleration":14.9, "Year":"1978-01-01", "Origin":"Europe"} 287 | {"Name":"honda Accelerationord lx", "Miles_per_Gallon":29.5, "Cylinders":4, "Displacement":98, "Horsepower":68, "Weight_in_lbs":2135, "Acceleration":16.6, "Year":"1978-01-01", "Origin":"Japan"} 288 | {"Name":"pontiac lemans v6", "Miles_per_Gallon":21.5, "Cylinders":6, "Displacement":231, "Horsepower":115, "Weight_in_lbs":3245, "Acceleration":15.4, "Year":"1979-01-01", "Origin":"USA"} 289 | {"Name":"mercury zephyr 6", "Miles_per_Gallon":19.8, "Cylinders":6, "Displacement":200, "Horsepower":85, "Weight_in_lbs":2990, "Acceleration":18.2, "Year":"1979-01-01", "Origin":"USA"} 290 | {"Name":"ford fairmont 4", "Miles_per_Gallon":22.3, "Cylinders":4, "Displacement":140, "Horsepower":88, "Weight_in_lbs":2890, "Acceleration":17.3, "Year":"1979-01-01", "Origin":"USA"} 291 | {"Name":"amc concord dl 6", "Miles_per_Gallon":20.2, "Cylinders":6, "Displacement":232, "Horsepower":90, "Weight_in_lbs":3265, "Acceleration":18.2, "Year":"1979-01-01", "Origin":"USA"} 292 | {"Name":"dodge aspen 6", "Miles_per_Gallon":20.6, "Cylinders":6, "Displacement":225, "Horsepower":110, "Weight_in_lbs":3360, "Acceleration":16.6, "Year":"1979-01-01", "Origin":"USA"} 293 | {"Name":"chevrolet caprice classic", "Miles_per_Gallon":17, "Cylinders":8, "Displacement":305, "Horsepower":130, "Weight_in_lbs":3840, "Acceleration":15.4, "Year":"1979-01-01", "Origin":"USA"} 294 | {"Name":"ford ltd landau", "Miles_per_Gallon":17.6, "Cylinders":8, "Displacement":302, "Horsepower":129, "Weight_in_lbs":3725, "Acceleration":13.4, "Year":"1979-01-01", "Origin":"USA"} 295 | {"Name":"mercury grand marquis", "Miles_per_Gallon":16.5, "Cylinders":8, "Displacement":351, "Horsepower":138, "Weight_in_lbs":3955, "Acceleration":13.2, "Year":"1979-01-01", "Origin":"USA"} 296 | {"Name":"dodge st. regis", "Miles_per_Gallon":18.2, "Cylinders":8, "Displacement":318, "Horsepower":135, "Weight_in_lbs":3830, "Acceleration":15.2, "Year":"1979-01-01", "Origin":"USA"} 297 | {"Name":"buick estate wagon (sw)", "Miles_per_Gallon":16.9, "Cylinders":8, "Displacement":350, "Horsepower":155, "Weight_in_lbs":4360, "Acceleration":14.9, "Year":"1979-01-01", "Origin":"USA"} 298 | {"Name":"ford country squire (sw)", "Miles_per_Gallon":15.5, "Cylinders":8, "Displacement":351, "Horsepower":142, "Weight_in_lbs":4054, "Acceleration":14.3, "Year":"1979-01-01", "Origin":"USA"} 299 | {"Name":"chevrolet malibu classic (sw)", "Miles_per_Gallon":19.2, "Cylinders":8, "Displacement":267, "Horsepower":125, "Weight_in_lbs":3605, "Acceleration":15, "Year":"1979-01-01", "Origin":"USA"} 300 | {"Name":"chrysler lebaron town @ country (sw)", "Miles_per_Gallon":18.5, "Cylinders":8, "Displacement":360, "Horsepower":150, "Weight_in_lbs":3940, "Acceleration":13, "Year":"1979-01-01", "Origin":"USA"} 301 | {"Name":"vw rabbit custom", "Miles_per_Gallon":31.9, "Cylinders":4, "Displacement":89, "Horsepower":71, "Weight_in_lbs":1925, "Acceleration":14, "Year":"1979-01-01", "Origin":"Europe"} 302 | {"Name":"maxda glc deluxe", "Miles_per_Gallon":34.1, "Cylinders":4, "Displacement":86, "Horsepower":65, "Weight_in_lbs":1975, "Acceleration":15.2, "Year":"1979-01-01", "Origin":"Japan"} 303 | {"Name":"dodge colt hatchback custom", "Miles_per_Gallon":35.7, "Cylinders":4, "Displacement":98, "Horsepower":80, "Weight_in_lbs":1915, "Acceleration":14.4, "Year":"1979-01-01", "Origin":"USA"} 304 | {"Name":"amc spirit dl", "Miles_per_Gallon":27.4, "Cylinders":4, "Displacement":121, "Horsepower":80, "Weight_in_lbs":2670, "Acceleration":15, "Year":"1979-01-01", "Origin":"USA"} 305 | {"Name":"mercedes benz 300d", "Miles_per_Gallon":25.4, "Cylinders":5, "Displacement":183, "Horsepower":77, "Weight_in_lbs":3530, "Acceleration":20.1, "Year":"1979-01-01", "Origin":"Europe"} 306 | {"Name":"cadillac eldorado", "Miles_per_Gallon":23, "Cylinders":8, "Displacement":350, "Horsepower":125, "Weight_in_lbs":3900, "Acceleration":17.4, "Year":"1979-01-01", "Origin":"USA"} 307 | {"Name":"peugeot 504", "Miles_per_Gallon":27.2, "Cylinders":4, "Displacement":141, "Horsepower":71, "Weight_in_lbs":3190, "Acceleration":24.8, "Year":"1979-01-01", "Origin":"Europe"} 308 | {"Name":"oldsmobile cutlass salon brougham", "Miles_per_Gallon":23.9, "Cylinders":8, "Displacement":260, "Horsepower":90, "Weight_in_lbs":3420, "Acceleration":22.2, "Year":"1979-01-01", "Origin":"USA"} 309 | {"Name":"plymouth horizon", "Miles_per_Gallon":34.2, "Cylinders":4, "Displacement":105, "Horsepower":70, "Weight_in_lbs":2200, "Acceleration":13.2, "Year":"1979-01-01", "Origin":"USA"} 310 | {"Name":"plymouth horizon tc3", "Miles_per_Gallon":34.5, "Cylinders":4, "Displacement":105, "Horsepower":70, "Weight_in_lbs":2150, "Acceleration":14.9, "Year":"1979-01-01", "Origin":"USA"} 311 | {"Name":"datsun 210", "Miles_per_Gallon":31.8, "Cylinders":4, "Displacement":85, "Horsepower":65, "Weight_in_lbs":2020, "Acceleration":19.2, "Year":"1979-01-01", "Origin":"Japan"} 312 | {"Name":"fiat strada custom", "Miles_per_Gallon":37.3, "Cylinders":4, "Displacement":91, "Horsepower":69, "Weight_in_lbs":2130, "Acceleration":14.7, "Year":"1979-01-01", "Origin":"Europe"} 313 | {"Name":"buick skylark limited", "Miles_per_Gallon":28.4, "Cylinders":4, "Displacement":151, "Horsepower":90, "Weight_in_lbs":2670, "Acceleration":16, "Year":"1979-01-01", "Origin":"USA"} 314 | {"Name":"chevrolet citation", "Miles_per_Gallon":28.8, "Cylinders":6, "Displacement":173, "Horsepower":115, "Weight_in_lbs":2595, "Acceleration":11.3, "Year":"1979-01-01", "Origin":"USA"} 315 | {"Name":"oldsmobile omega brougham", "Miles_per_Gallon":26.8, "Cylinders":6, "Displacement":173, "Horsepower":115, "Weight_in_lbs":2700, "Acceleration":12.9, "Year":"1979-01-01", "Origin":"USA"} 316 | {"Name":"pontiac phoenix", "Miles_per_Gallon":33.5, "Cylinders":4, "Displacement":151, "Horsepower":90, "Weight_in_lbs":2556, "Acceleration":13.2, "Year":"1979-01-01", "Origin":"USA"} 317 | {"Name":"vw rabbit", "Miles_per_Gallon":41.5, "Cylinders":4, "Displacement":98, "Horsepower":76, "Weight_in_lbs":2144, "Acceleration":14.7, "Year":"1980-01-01", "Origin":"Europe"} 318 | {"Name":"toyota corolla tercel", "Miles_per_Gallon":38.1, "Cylinders":4, "Displacement":89, "Horsepower":60, "Weight_in_lbs":1968, "Acceleration":18.8, "Year":"1980-01-01", "Origin":"Japan"} 319 | {"Name":"chevrolet chevette", "Miles_per_Gallon":32.1, "Cylinders":4, "Displacement":98, "Horsepower":70, "Weight_in_lbs":2120, "Acceleration":15.5, "Year":"1980-01-01", "Origin":"USA"} 320 | {"Name":"datsun 310", "Miles_per_Gallon":37.2, "Cylinders":4, "Displacement":86, "Horsepower":65, "Weight_in_lbs":2019, "Acceleration":16.4, "Year":"1980-01-01", "Origin":"Japan"} 321 | {"Name":"chevrolet citation", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":151, "Horsepower":90, "Weight_in_lbs":2678, "Acceleration":16.5, "Year":"1980-01-01", "Origin":"USA"} 322 | {"Name":"ford fairmont", "Miles_per_Gallon":26.4, "Cylinders":4, "Displacement":140, "Horsepower":88, "Weight_in_lbs":2870, "Acceleration":18.1, "Year":"1980-01-01", "Origin":"USA"} 323 | {"Name":"amc concord", "Miles_per_Gallon":24.3, "Cylinders":4, "Displacement":151, "Horsepower":90, "Weight_in_lbs":3003, "Acceleration":20.1, "Year":"1980-01-01", "Origin":"USA"} 324 | {"Name":"dodge aspen", "Miles_per_Gallon":19.1, "Cylinders":6, "Displacement":225, "Horsepower":90, "Weight_in_lbs":3381, "Acceleration":18.7, "Year":"1980-01-01", "Origin":"USA"} 325 | {"Name":"audi 4000", "Miles_per_Gallon":34.3, "Cylinders":4, "Displacement":97, "Horsepower":78, "Weight_in_lbs":2188, "Acceleration":15.8, "Year":"1980-01-01", "Origin":"Europe"} 326 | {"Name":"toyota corona liftback", "Miles_per_Gallon":29.8, "Cylinders":4, "Displacement":134, "Horsepower":90, "Weight_in_lbs":2711, "Acceleration":15.5, "Year":"1980-01-01", "Origin":"Japan"} 327 | {"Name":"mazda 626", "Miles_per_Gallon":31.3, "Cylinders":4, "Displacement":120, "Horsepower":75, "Weight_in_lbs":2542, "Acceleration":17.5, "Year":"1980-01-01", "Origin":"Japan"} 328 | {"Name":"datsun 510 hatchback", "Miles_per_Gallon":37, "Cylinders":4, "Displacement":119, "Horsepower":92, "Weight_in_lbs":2434, "Acceleration":15, "Year":"1980-01-01", "Origin":"Japan"} 329 | {"Name":"toyota corolla", "Miles_per_Gallon":32.2, "Cylinders":4, "Displacement":108, "Horsepower":75, "Weight_in_lbs":2265, "Acceleration":15.2, "Year":"1980-01-01", "Origin":"Japan"} 330 | {"Name":"mazda glc", "Miles_per_Gallon":46.6, "Cylinders":4, "Displacement":86, "Horsepower":65, "Weight_in_lbs":2110, "Acceleration":17.9, "Year":"1980-01-01", "Origin":"Japan"} 331 | {"Name":"dodge colt", "Miles_per_Gallon":27.9, "Cylinders":4, "Displacement":156, "Horsepower":105, "Weight_in_lbs":2800, "Acceleration":14.4, "Year":"1980-01-01", "Origin":"USA"} 332 | {"Name":"datsun 210", "Miles_per_Gallon":40.8, "Cylinders":4, "Displacement":85, "Horsepower":65, "Weight_in_lbs":2110, "Acceleration":19.2, "Year":"1980-01-01", "Origin":"Japan"} 333 | {"Name":"vw rabbit c (diesel)", "Miles_per_Gallon":44.3, "Cylinders":4, "Displacement":90, "Horsepower":48, "Weight_in_lbs":2085, "Acceleration":21.7, "Year":"1980-01-01", "Origin":"Europe"} 334 | {"Name":"vw dasher (diesel)", "Miles_per_Gallon":43.4, "Cylinders":4, "Displacement":90, "Horsepower":48, "Weight_in_lbs":2335, "Acceleration":23.7, "Year":"1980-01-01", "Origin":"Europe"} 335 | {"Name":"audi 5000s (diesel)", "Miles_per_Gallon":36.4, "Cylinders":5, "Displacement":121, "Horsepower":67, "Weight_in_lbs":2950, "Acceleration":19.9, "Year":"1980-01-01", "Origin":"Europe"} 336 | {"Name":"mercedes-benz 240d", "Miles_per_Gallon":30, "Cylinders":4, "Displacement":146, "Horsepower":67, "Weight_in_lbs":3250, "Acceleration":21.8, "Year":"1980-01-01", "Origin":"Europe"} 337 | {"Name":"honda civic 1500 gl", "Miles_per_Gallon":44.6, "Cylinders":4, "Displacement":91, "Horsepower":67, "Weight_in_lbs":1850, "Acceleration":13.8, "Year":"1980-01-01", "Origin":"Japan"} 338 | {"Name":"renault lecar deluxe", "Miles_per_Gallon":40.9, "Cylinders":4, "Displacement":85, "Horsepower":null, "Weight_in_lbs":1835, "Acceleration":17.3, "Year":"1980-01-01", "Origin":"Europe"} 339 | {"Name":"subaru dl", "Miles_per_Gallon":33.8, "Cylinders":4, "Displacement":97, "Horsepower":67, "Weight_in_lbs":2145, "Acceleration":18, "Year":"1980-01-01", "Origin":"Japan"} 340 | {"Name":"vokswagen rabbit", "Miles_per_Gallon":29.8, "Cylinders":4, "Displacement":89, "Horsepower":62, "Weight_in_lbs":1845, "Acceleration":15.3, "Year":"1980-01-01", "Origin":"Europe"} 341 | {"Name":"datsun 280-zx", "Miles_per_Gallon":32.7, "Cylinders":6, "Displacement":168, "Horsepower":132, "Weight_in_lbs":2910, "Acceleration":11.4, "Year":"1980-01-01", "Origin":"Japan"} 342 | {"Name":"mazda rx-7 gs", "Miles_per_Gallon":23.7, "Cylinders":3, "Displacement":70, "Horsepower":100, "Weight_in_lbs":2420, "Acceleration":12.5, "Year":"1980-01-01", "Origin":"Japan"} 343 | {"Name":"triumph tr7 coupe", "Miles_per_Gallon":35, "Cylinders":4, "Displacement":122, "Horsepower":88, "Weight_in_lbs":2500, "Acceleration":15.1, "Year":"1980-01-01", "Origin":"Europe"} 344 | {"Name":"ford mustang cobra", "Miles_per_Gallon":23.6, "Cylinders":4, "Displacement":140, "Horsepower":null, "Weight_in_lbs":2905, "Acceleration":14.3, "Year":"1980-01-01", "Origin":"USA"} 345 | {"Name":"honda Accelerationord", "Miles_per_Gallon":32.4, "Cylinders":4, "Displacement":107, "Horsepower":72, "Weight_in_lbs":2290, "Acceleration":17, "Year":"1980-01-01", "Origin":"Japan"} 346 | {"Name":"plymouth reliant", "Miles_per_Gallon":27.2, "Cylinders":4, "Displacement":135, "Horsepower":84, "Weight_in_lbs":2490, "Acceleration":15.7, "Year":"1982-01-01", "Origin":"USA"} 347 | {"Name":"buick skylark", "Miles_per_Gallon":26.6, "Cylinders":4, "Displacement":151, "Horsepower":84, "Weight_in_lbs":2635, "Acceleration":16.4, "Year":"1982-01-01", "Origin":"USA"} 348 | {"Name":"dodge aries wagon (sw)", "Miles_per_Gallon":25.8, "Cylinders":4, "Displacement":156, "Horsepower":92, "Weight_in_lbs":2620, "Acceleration":14.4, "Year":"1982-01-01", "Origin":"USA"} 349 | {"Name":"chevrolet citation", "Miles_per_Gallon":23.5, "Cylinders":6, "Displacement":173, "Horsepower":110, "Weight_in_lbs":2725, "Acceleration":12.6, "Year":"1982-01-01", "Origin":"USA"} 350 | {"Name":"plymouth reliant", "Miles_per_Gallon":30, "Cylinders":4, "Displacement":135, "Horsepower":84, "Weight_in_lbs":2385, "Acceleration":12.9, "Year":"1982-01-01", "Origin":"USA"} 351 | {"Name":"toyota starlet", "Miles_per_Gallon":39.1, "Cylinders":4, "Displacement":79, "Horsepower":58, "Weight_in_lbs":1755, "Acceleration":16.9, "Year":"1982-01-01", "Origin":"Japan"} 352 | {"Name":"plymouth champ", "Miles_per_Gallon":39, "Cylinders":4, "Displacement":86, "Horsepower":64, "Weight_in_lbs":1875, "Acceleration":16.4, "Year":"1982-01-01", "Origin":"USA"} 353 | {"Name":"honda civic 1300", "Miles_per_Gallon":35.1, "Cylinders":4, "Displacement":81, "Horsepower":60, "Weight_in_lbs":1760, "Acceleration":16.1, "Year":"1982-01-01", "Origin":"Japan"} 354 | {"Name":"subaru", "Miles_per_Gallon":32.3, "Cylinders":4, "Displacement":97, "Horsepower":67, "Weight_in_lbs":2065, "Acceleration":17.8, "Year":"1982-01-01", "Origin":"Japan"} 355 | {"Name":"datsun 210", "Miles_per_Gallon":37, "Cylinders":4, "Displacement":85, "Horsepower":65, "Weight_in_lbs":1975, "Acceleration":19.4, "Year":"1982-01-01", "Origin":"Japan"} 356 | {"Name":"toyota tercel", "Miles_per_Gallon":37.7, "Cylinders":4, "Displacement":89, "Horsepower":62, "Weight_in_lbs":2050, "Acceleration":17.3, "Year":"1982-01-01", "Origin":"Japan"} 357 | {"Name":"mazda glc 4", "Miles_per_Gallon":34.1, "Cylinders":4, "Displacement":91, "Horsepower":68, "Weight_in_lbs":1985, "Acceleration":16, "Year":"1982-01-01", "Origin":"Japan"} 358 | {"Name":"plymouth horizon 4", "Miles_per_Gallon":34.7, "Cylinders":4, "Displacement":105, "Horsepower":63, "Weight_in_lbs":2215, "Acceleration":14.9, "Year":"1982-01-01", "Origin":"USA"} 359 | {"Name":"ford escort 4w", "Miles_per_Gallon":34.4, "Cylinders":4, "Displacement":98, "Horsepower":65, "Weight_in_lbs":2045, "Acceleration":16.2, "Year":"1982-01-01", "Origin":"USA"} 360 | {"Name":"ford escort 2h", "Miles_per_Gallon":29.9, "Cylinders":4, "Displacement":98, "Horsepower":65, "Weight_in_lbs":2380, "Acceleration":20.7, "Year":"1982-01-01", "Origin":"USA"} 361 | {"Name":"volkswagen jetta", "Miles_per_Gallon":33, "Cylinders":4, "Displacement":105, "Horsepower":74, "Weight_in_lbs":2190, "Acceleration":14.2, "Year":"1982-01-01", "Origin":"Europe"} 362 | {"Name":"renault 18i", "Miles_per_Gallon":34.5, "Cylinders":4, "Displacement":100, "Horsepower":null, "Weight_in_lbs":2320, "Acceleration":15.8, "Year":"1982-01-01", "Origin":"Europe"} 363 | {"Name":"honda prelude", "Miles_per_Gallon":33.7, "Cylinders":4, "Displacement":107, "Horsepower":75, "Weight_in_lbs":2210, "Acceleration":14.4, "Year":"1982-01-01", "Origin":"Japan"} 364 | {"Name":"toyota corolla", "Miles_per_Gallon":32.4, "Cylinders":4, "Displacement":108, "Horsepower":75, "Weight_in_lbs":2350, "Acceleration":16.8, "Year":"1982-01-01", "Origin":"Japan"} 365 | {"Name":"datsun 200sx", "Miles_per_Gallon":32.9, "Cylinders":4, "Displacement":119, "Horsepower":100, "Weight_in_lbs":2615, "Acceleration":14.8, "Year":"1982-01-01", "Origin":"Japan"} 366 | {"Name":"mazda 626", "Miles_per_Gallon":31.6, "Cylinders":4, "Displacement":120, "Horsepower":74, "Weight_in_lbs":2635, "Acceleration":18.3, "Year":"1982-01-01", "Origin":"Japan"} 367 | {"Name":"peugeot 505s turbo diesel", "Miles_per_Gallon":28.1, "Cylinders":4, "Displacement":141, "Horsepower":80, "Weight_in_lbs":3230, "Acceleration":20.4, "Year":"1982-01-01", "Origin":"Europe"} 368 | {"Name":"saab 900s", "Miles_per_Gallon":null, "Cylinders":4, "Displacement":121, "Horsepower":110, "Weight_in_lbs":2800, "Acceleration":15.4, "Year":"1982-01-01", "Origin":"Europe"} 369 | {"Name":"volvo diesel", "Miles_per_Gallon":30.7, "Cylinders":6, "Displacement":145, "Horsepower":76, "Weight_in_lbs":3160, "Acceleration":19.6, "Year":"1982-01-01", "Origin":"Europe"} 370 | {"Name":"toyota cressida", "Miles_per_Gallon":25.4, "Cylinders":6, "Displacement":168, "Horsepower":116, "Weight_in_lbs":2900, "Acceleration":12.6, "Year":"1982-01-01", "Origin":"Japan"} 371 | {"Name":"datsun 810 maxima", "Miles_per_Gallon":24.2, "Cylinders":6, "Displacement":146, "Horsepower":120, "Weight_in_lbs":2930, "Acceleration":13.8, "Year":"1982-01-01", "Origin":"Japan"} 372 | {"Name":"buick century", "Miles_per_Gallon":22.4, "Cylinders":6, "Displacement":231, "Horsepower":110, "Weight_in_lbs":3415, "Acceleration":15.8, "Year":"1982-01-01", "Origin":"USA"} 373 | {"Name":"oldsmobile cutlass ls", "Miles_per_Gallon":26.6, "Cylinders":8, "Displacement":350, "Horsepower":105, "Weight_in_lbs":3725, "Acceleration":19, "Year":"1982-01-01", "Origin":"USA"} 374 | {"Name":"ford granada gl", "Miles_per_Gallon":20.2, "Cylinders":6, "Displacement":200, "Horsepower":88, "Weight_in_lbs":3060, "Acceleration":17.1, "Year":"1982-01-01", "Origin":"USA"} 375 | {"Name":"chrysler lebaron salon", "Miles_per_Gallon":17.6, "Cylinders":6, "Displacement":225, "Horsepower":85, "Weight_in_lbs":3465, "Acceleration":16.6, "Year":"1982-01-01", "Origin":"USA"} 376 | {"Name":"chevrolet cavalier", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":112, "Horsepower":88, "Weight_in_lbs":2605, "Acceleration":19.6, "Year":"1982-01-01", "Origin":"USA"} 377 | {"Name":"chevrolet cavalier wagon", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":112, "Horsepower":88, "Weight_in_lbs":2640, "Acceleration":18.6, "Year":"1982-01-01", "Origin":"USA"} 378 | {"Name":"chevrolet cavalier 2-door", "Miles_per_Gallon":34, "Cylinders":4, "Displacement":112, "Horsepower":88, "Weight_in_lbs":2395, "Acceleration":18, "Year":"1982-01-01", "Origin":"USA"} 379 | {"Name":"pontiac j2000 se hatchback", "Miles_per_Gallon":31, "Cylinders":4, "Displacement":112, "Horsepower":85, "Weight_in_lbs":2575, "Acceleration":16.2, "Year":"1982-01-01", "Origin":"USA"} 380 | {"Name":"dodge aries se", "Miles_per_Gallon":29, "Cylinders":4, "Displacement":135, "Horsepower":84, "Weight_in_lbs":2525, "Acceleration":16, "Year":"1982-01-01", "Origin":"USA"} 381 | {"Name":"pontiac phoenix", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":151, "Horsepower":90, "Weight_in_lbs":2735, "Acceleration":18, "Year":"1982-01-01", "Origin":"USA"} 382 | {"Name":"ford fairmont futura", "Miles_per_Gallon":24, "Cylinders":4, "Displacement":140, "Horsepower":92, "Weight_in_lbs":2865, "Acceleration":16.4, "Year":"1982-01-01", "Origin":"USA"} 383 | {"Name":"amc concord dl", "Miles_per_Gallon":23, "Cylinders":4, "Displacement":151, "Horsepower":null, "Weight_in_lbs":3035, "Acceleration":20.5, "Year":"1982-01-01", "Origin":"USA"} 384 | {"Name":"volkswagen rabbit l", "Miles_per_Gallon":36, "Cylinders":4, "Displacement":105, "Horsepower":74, "Weight_in_lbs":1980, "Acceleration":15.3, "Year":"1982-01-01", "Origin":"Europe"} 385 | {"Name":"mazda glc custom l", "Miles_per_Gallon":37, "Cylinders":4, "Displacement":91, "Horsepower":68, "Weight_in_lbs":2025, "Acceleration":18.2, "Year":"1982-01-01", "Origin":"Japan"} 386 | {"Name":"mazda glc custom", "Miles_per_Gallon":31, "Cylinders":4, "Displacement":91, "Horsepower":68, "Weight_in_lbs":1970, "Acceleration":17.6, "Year":"1982-01-01", "Origin":"Japan"} 387 | {"Name":"plymouth horizon miser", "Miles_per_Gallon":38, "Cylinders":4, "Displacement":105, "Horsepower":63, "Weight_in_lbs":2125, "Acceleration":14.7, "Year":"1982-01-01", "Origin":"USA"} 388 | {"Name":"mercury lynx l", "Miles_per_Gallon":36, "Cylinders":4, "Displacement":98, "Horsepower":70, "Weight_in_lbs":2125, "Acceleration":17.3, "Year":"1982-01-01", "Origin":"USA"} 389 | {"Name":"nissan stanza xe", "Miles_per_Gallon":36, "Cylinders":4, "Displacement":120, "Horsepower":88, "Weight_in_lbs":2160, "Acceleration":14.5, "Year":"1982-01-01", "Origin":"Japan"} 390 | {"Name":"honda Accelerationord", "Miles_per_Gallon":36, "Cylinders":4, "Displacement":107, "Horsepower":75, "Weight_in_lbs":2205, "Acceleration":14.5, "Year":"1982-01-01", "Origin":"Japan"} 391 | {"Name":"toyota corolla", "Miles_per_Gallon":34, "Cylinders":4, "Displacement":108, "Horsepower":70, "Weight_in_lbs":2245, "Acceleration":16.9, "Year":"1982-01-01", "Origin":"Japan"} 392 | {"Name":"honda civic", "Miles_per_Gallon":38, "Cylinders":4, "Displacement":91, "Horsepower":67, "Weight_in_lbs":1965, "Acceleration":15, "Year":"1982-01-01", "Origin":"Japan"} 393 | {"Name":"honda civic (auto)", "Miles_per_Gallon":32, "Cylinders":4, "Displacement":91, "Horsepower":67, "Weight_in_lbs":1965, "Acceleration":15.7, "Year":"1982-01-01", "Origin":"Japan"} 394 | {"Name":"datsun 310 gx", "Miles_per_Gallon":38, "Cylinders":4, "Displacement":91, "Horsepower":67, "Weight_in_lbs":1995, "Acceleration":16.2, "Year":"1982-01-01", "Origin":"Japan"} 395 | {"Name":"buick century limited", "Miles_per_Gallon":25, "Cylinders":6, "Displacement":181, "Horsepower":110, "Weight_in_lbs":2945, "Acceleration":16.4, "Year":"1982-01-01", "Origin":"USA"} 396 | {"Name":"oldsmobile cutlass ciera (diesel)", "Miles_per_Gallon":38, "Cylinders":6, "Displacement":262, "Horsepower":85, "Weight_in_lbs":3015, "Acceleration":17, "Year":"1982-01-01", "Origin":"USA"} 397 | {"Name":"chrysler lebaron medallion", "Miles_per_Gallon":26, "Cylinders":4, "Displacement":156, "Horsepower":92, "Weight_in_lbs":2585, "Acceleration":14.5, "Year":"1982-01-01", "Origin":"USA"} 398 | {"Name":"ford granada l", "Miles_per_Gallon":22, "Cylinders":6, "Displacement":232, "Horsepower":112, "Weight_in_lbs":2835, "Acceleration":14.7, "Year":"1982-01-01", "Origin":"USA"} 399 | {"Name":"toyota celica gt", "Miles_per_Gallon":32, "Cylinders":4, "Displacement":144, "Horsepower":96, "Weight_in_lbs":2665, "Acceleration":13.9, "Year":"1982-01-01", "Origin":"Japan"} 400 | {"Name":"dodge charger 2.2", "Miles_per_Gallon":36, "Cylinders":4, "Displacement":135, "Horsepower":84, "Weight_in_lbs":2370, "Acceleration":13, "Year":"1982-01-01", "Origin":"USA"} 401 | {"Name":"chevrolet camaro", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":151, "Horsepower":90, "Weight_in_lbs":2950, "Acceleration":17.3, "Year":"1982-01-01", "Origin":"USA"} 402 | {"Name":"ford mustang gl", "Miles_per_Gallon":27, "Cylinders":4, "Displacement":140, "Horsepower":86, "Weight_in_lbs":2790, "Acceleration":15.6, "Year":"1982-01-01", "Origin":"USA"} 403 | {"Name":"vw pickup", "Miles_per_Gallon":44, "Cylinders":4, "Displacement":97, "Horsepower":52, "Weight_in_lbs":2130, "Acceleration":24.6, "Year":"1982-01-01", "Origin":"Europe"} 404 | {"Name":"dodge rampage", "Miles_per_Gallon":32, "Cylinders":4, "Displacement":135, "Horsepower":84, "Weight_in_lbs":2295, "Acceleration":11.6, "Year":"1982-01-01", "Origin":"USA"} 405 | {"Name":"ford ranger", "Miles_per_Gallon":28, "Cylinders":4, "Displacement":120, "Horsepower":79, "Weight_in_lbs":2625, "Acceleration":18.6, "Year":"1982-01-01", "Origin":"USA"} 406 | {"Name":"chevy s-10", "Miles_per_Gallon":31, "Cylinders":4, "Displacement":119, "Horsepower":82, "Weight_in_lbs":2720, "Acceleration":19.4, "Year":"1982-01-01", "Origin":"USA"} -------------------------------------------------------------------------------- /src/main/resources/data/employees/employees.csv: -------------------------------------------------------------------------------- 1 | 7584740,Devin,Jeramy,Vedenichev,103,1951-12-31T05:00:00.000Z,958-67-2937,55214 2 | 7677553,Marcus,Arlie,Tibb,103,1951-12-31T05:00:00.000Z,999-90-6698,47746 3 | 7736171,Pat,Johnie,De Keep,103,1951-12-31T05:00:00.000Z,960-92-7355,98868 4 | 7813417,Brooks,Dannie,Lemmens,103,1951-12-31T05:00:00.000Z,914-82-1490,69065 5 | 7929094,Milan,Alex,Setterfield,103,1951-12-31T05:00:00.000Z,989-42-1725,79877 6 | 8037265,Dexter,Otis,Brahmer,103,1951-12-31T05:00:00.000Z,959-50-1621,91473 7 | 8050195,Anibal,Nicky,Springford,103,1951-12-31T05:00:00.000Z,998-90-1376,62784 8 | 8053263,Karl,Horace,Thompson,103,1951-12-31T05:00:00.000Z,930-70-7868,90481 9 | 8125007,Quinton,Eduardo,Rooze,103,1951-12-31T05:00:00.000Z,903-21-6089,87389 10 | 8169581,Jc,Edison,Fruchter,103,1951-12-31T05:00:00.000Z,905-35-8052,58120 11 | 8206921,Sylvester,Faustino,Glasgow,103,1951-12-31T05:00:00.000Z,907-30-2443,43279 12 | 8230707,Juan,Dewayne,Leggitt,103,1951-12-31T05:00:00.000Z,965-56-5537,74107 13 | 8353995,Bill,Dirk,Negri,103,1951-12-31T05:00:00.000Z,905-76-4894,79389 14 | 8430873,Oren,August,Lachaize,103,1951-12-31T05:00:00.000Z,990-14-4688,82777 15 | 8448144,Dewayne,Hubert,Sturton,103,1951-12-31T05:00:00.000Z,917-37-8590,91109 16 | 8479988,Courtney,Long,Bellin,103,1951-12-31T05:00:00.000Z,906-26-2150,51099 17 | 8481731,Sung,Jamie,Haslin,103,1951-12-31T05:00:00.000Z,982-29-1779,75742 18 | 8596468,Walker,Francesco,O'Fallone,103,1951-12-31T05:00:00.000Z,962-42-6521,107650 19 | 8650756,Clyde,Royce,Readshall,103,1951-12-31T05:00:00.000Z,983-38-1342,88851 20 | 5073404,Dale,Brice,Casolla,105,1951-12-31T05:00:00.000Z,963-52-1303,102445 21 | 5106546,Lupe,Rod,Bullard,105,1951-12-31T05:00:00.000Z,901-76-7197,84274 22 | 5178737,Douglass,Seth,Cummine,105,1951-12-31T05:00:00.000Z,958-74-4222,81743 23 | 5265931,Abram,Eddie,Izzett,105,1951-12-31T05:00:00.000Z,962-45-3424,63223 24 | 5268793,Wilson,Landon,Chellam,105,1951-12-31T05:00:00.000Z,911-11-7847,88966 25 | 5371143,Kieth,Malcolm,Formigli,105,1951-12-31T05:00:00.000Z,666-37-6950,70655 26 | 5407756,Ted,Pablo,Lidgely,105,1951-12-31T05:00:00.000Z,994-52-7326,63100 27 | 5413070,Reid,Reyes,Kabos,105,1951-12-31T05:00:00.000Z,918-28-4915,90488 28 | 5422998,Buford,Leonel,Stanbro,105,1951-12-31T05:00:00.000Z,927-29-6346,100472 29 | 5530003,Milford,Samual,Manis,105,1951-12-31T05:00:00.000Z,907-28-9627,40950 30 | 5550873,Wm,Fredrick,Filshin,105,1951-12-31T05:00:00.000Z,934-69-5826,39809 31 | 5597718,Jeromy,Von,Daley,105,1951-12-31T05:00:00.000Z,991-24-2641,29644 32 | 5889924,Tyrell,Alonso,Hagyard,105,1951-12-31T05:00:00.000Z,903-55-7969,39093 33 | 5890431,George,Moises,Shurville,105,1951-12-31T05:00:00.000Z,956-30-5990,44500 34 | 6130212,Percy,Les,Lafontaine,105,1951-12-31T05:00:00.000Z,922-95-3154,73006 35 | -------------------------------------------------------------------------------- /src/main/resources/data/employees_headers/employees_headers.csv: -------------------------------------------------------------------------------- 1 | id,firstName,middleName,lastName,dept,birthDate,ssn,salary 2 | 7584740,Devin,Jeramy,Vedenichev,103,1951-12-31T05:00:00.000Z,958-67-2937,55214 3 | 7677553,Marcus,Arlie,Tibb,103,1951-12-31T05:00:00.000Z,999-90-6698,47746 4 | 7736171,Pat,Johnie,De Keep,103,1951-12-31T05:00:00.000Z,960-92-7355,98868 5 | 7813417,Brooks,Dannie,Lemmens,103,1951-12-31T05:00:00.000Z,914-82-1490,69065 6 | 7929094,Milan,Alex,Setterfield,103,1951-12-31T05:00:00.000Z,989-42-1725,79877 7 | 8037265,Dexter,Otis,Brahmer,103,1951-12-31T05:00:00.000Z,959-50-1621,91473 8 | 8050195,Anibal,Nicky,Springford,103,1951-12-31T05:00:00.000Z,998-90-1376,62784 9 | 8053263,Karl,Horace,Thompson,103,1951-12-31T05:00:00.000Z,930-70-7868,90481 10 | 8125007,Quinton,Eduardo,Rooze,103,1951-12-31T05:00:00.000Z,903-21-6089,87389 11 | 8169581,Jc,Edison,Fruchter,103,1951-12-31T05:00:00.000Z,905-35-8052,58120 12 | 8206921,Sylvester,Faustino,Glasgow,103,1951-12-31T05:00:00.000Z,907-30-2443,43279 13 | 8230707,Juan,Dewayne,Leggitt,103,1951-12-31T05:00:00.000Z,965-56-5537,74107 14 | 8353995,Bill,Dirk,Negri,103,1951-12-31T05:00:00.000Z,905-76-4894,79389 15 | 8430873,Oren,August,Lachaize,103,1951-12-31T05:00:00.000Z,990-14-4688,82777 16 | 8448144,Dewayne,Hubert,Sturton,103,1951-12-31T05:00:00.000Z,917-37-8590,91109 17 | 8479988,Courtney,Long,Bellin,103,1951-12-31T05:00:00.000Z,906-26-2150,51099 18 | 8481731,Sung,Jamie,Haslin,103,1951-12-31T05:00:00.000Z,982-29-1779,75742 19 | 8596468,Walker,Francesco,O'Fallone,103,1951-12-31T05:00:00.000Z,962-42-6521,107650 20 | 8650756,Clyde,Royce,Readshall,103,1951-12-31T05:00:00.000Z,983-38-1342,88851 21 | 5073404,Dale,Brice,Casolla,105,1951-12-31T05:00:00.000Z,963-52-1303,102445 22 | 5106546,Lupe,Rod,Bullard,105,1951-12-31T05:00:00.000Z,901-76-7197,84274 23 | 5178737,Douglass,Seth,Cummine,105,1951-12-31T05:00:00.000Z,958-74-4222,81743 24 | 5265931,Abram,Eddie,Izzett,105,1951-12-31T05:00:00.000Z,962-45-3424,63223 25 | 5268793,Wilson,Landon,Chellam,105,1951-12-31T05:00:00.000Z,911-11-7847,88966 26 | 5371143,Kieth,Malcolm,Formigli,105,1951-12-31T05:00:00.000Z,666-37-6950,70655 27 | 5407756,Ted,Pablo,Lidgely,105,1951-12-31T05:00:00.000Z,994-52-7326,63100 28 | 5413070,Reid,Reyes,Kabos,105,1951-12-31T05:00:00.000Z,918-28-4915,90488 29 | 5422998,Buford,Leonel,Stanbro,105,1951-12-31T05:00:00.000Z,927-29-6346,100472 30 | 5530003,Milford,Samual,Manis,105,1951-12-31T05:00:00.000Z,907-28-9627,40950 31 | 5550873,Wm,Fredrick,Filshin,105,1951-12-31T05:00:00.000Z,934-69-5826,39809 32 | 5597718,Jeromy,Von,Daley,105,1951-12-31T05:00:00.000Z,991-24-2641,29644 33 | 5889924,Tyrell,Alonso,Hagyard,105,1951-12-31T05:00:00.000Z,903-55-7969,39093 34 | 5890431,George,Moises,Shurville,105,1951-12-31T05:00:00.000Z,956-30-5990,44500 35 | 6130212,Percy,Les,Lafontaine,105,1951-12-31T05:00:00.000Z,922-95-3154,73006 36 | -------------------------------------------------------------------------------- /src/main/resources/data/guitarPlayers/guitarPlayers.json: -------------------------------------------------------------------------------- 1 | {"id":0,"name":"Jimmy Page","guitars":[0],"band":0} 2 | {"id":1,"name":"Angus Young","guitars":[1],"band":1} 3 | {"id":2,"name":"Eric Clapton","guitars":[1,5],"band":2} 4 | {"id":3,"name":"Kirk Hammett","guitars":[3],"band":3} 5 | -------------------------------------------------------------------------------- /src/main/resources/data/guitars/guitars.json: -------------------------------------------------------------------------------- 1 | {"id":0,"model":"EDS-1275","make":"Gibson","guitarType":"Electric double-necked"} 2 | {"id":5,"model":"Stratocaster","make":"Fender","guitarType":"Electric"} 3 | {"id":1,"model":"SG","make":"Gibson","guitarType":"Electric"} 4 | {"id":2,"model":"914","make":"Taylor","guitarType":"Acoustic"} 5 | {"id":3,"model":"M-II","make":"ESP","guitarType":"Electric"} 6 | -------------------------------------------------------------------------------- /src/main/resources/data/lipsum/words.txt: -------------------------------------------------------------------------------- 1 | elit 2 | est 3 | consequat 4 | pulvinar 5 | tortor 6 | fringilla 7 | consectetur 8 | sed 9 | aliquet 10 | taciti 11 | in 12 | accumsan 13 | sapien 14 | sagittis 15 | torquent 16 | molestie 17 | volutpat 18 | dui 19 | auctor 20 | eu 21 | ultricies 22 | nam 23 | aliquam 24 | nec 25 | justo 26 | laoreet 27 | sit 28 | mattis 29 | quis 30 | ultrices 31 | vitae 32 | risus 33 | fusce 34 | dapibus 35 | ipsum 36 | felis 37 | cubilia 38 | conubia 39 | vel 40 | ligula 41 | per 42 | mollis 43 | tellus 44 | orci 45 | aenean 46 | purus 47 | scelerisque 48 | malesuada 49 | inceptos 50 | luctus 51 | himenaeos 52 | curabitur 53 | potenti 54 | cursus 55 | suspendisse 56 | nisl 57 | lorem 58 | a 59 | eget 60 | convallis 61 | metus 62 | amet 63 | nullam 64 | enim 65 | praesent 66 | primis 67 | cras 68 | consectetuer 69 | commodo 70 | vestibulum 71 | condimentum 72 | blandit 73 | ut 74 | neque 75 | fermentum 76 | viverra 77 | ante 78 | et 79 | faucibus 80 | massa 81 | egestas 82 | porttitor 83 | facilisi 84 | sodales 85 | magna 86 | suscipit 87 | iaculis 88 | dolor 89 | at 90 | nisi 91 | sem 92 | semper 93 | id 94 | arcu 95 | dignissim 96 | ac 97 | nostra 98 | nunc 99 | lacus 100 | euismod 101 | pharetra 102 | aptent 103 | tristique 104 | posuere 105 | proin 106 | nibh 107 | pede 108 | facilisis 109 | etiam 110 | morbi 111 | nulla 112 | ad 113 | turpis 114 | class 115 | curae 116 | sollicitudin 117 | venenatis 118 | ullamcorper 119 | litora 120 | lectus 121 | integer 122 | mi 123 | quam 124 | vivamus 125 | pretium 126 | imperdiet 127 | odio 128 | porta 129 | mauris 130 | lacinia 131 | donec 132 | pellentesque 133 | duis 134 | quisque 135 | maecenas 136 | augue 137 | velit 138 | congue 139 | diam 140 | tincidunt 141 | libero 142 | interdum 143 | non 144 | urna 145 | sociosqu 146 | feugiat 147 | adipiscing 148 | elementum -------------------------------------------------------------------------------- /src/main/scala/common/DataGenerator.scala: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import scala.util.Random 4 | 5 | object DataGenerator { 6 | 7 | val random = new Random() 8 | 9 | ///////////////////////////////////////////////////////////////////////////////// 10 | // General data generation 11 | ///////////////////////////////////////////////////////////////////////////////// 12 | 13 | def randomDouble(limit: Double): Double = { 14 | assert(limit >= 0) 15 | random.nextDouble() * limit 16 | } 17 | 18 | def randomLong(limit: Long = Long.MaxValue): Long = { 19 | assert(limit >= 0) 20 | Math.abs(random.nextLong()) % limit 21 | } 22 | 23 | def randomInt(limit: Int = Int.MaxValue): Int = { 24 | assert(limit >= 0) 25 | random.nextInt(limit) 26 | } 27 | 28 | def randomIntBetween(low: Int, high: Int) = { 29 | assert(low <= high) 30 | random.nextInt(high - low) + low 31 | } 32 | 33 | def randomString(n: Int) = random.alphanumeric.take(n).mkString("") 34 | 35 | def pickFrom[T](seq: Seq[T]): T = { 36 | assert(seq.nonEmpty) 37 | seq(randomInt(seq.length)) 38 | } 39 | 40 | 41 | ///////////////////////////////////////////////////////////////////////////////// 42 | // Guitars generation - fixing skewed data lecture 43 | ///////////////////////////////////////////////////////////////////////////////// 44 | 45 | val guitarModelSet: Seq[(String, String)] = Seq( 46 | ("Taylor", "914"), 47 | ("Martin", "D-18"), 48 | ("Takamine", "P7D"), 49 | ("Gibson", "L-00"), 50 | ("Tanglewood", "TW45"), 51 | ("Fender", "CD-280S"), 52 | ("Yamaha", "LJ16BC") 53 | ) 54 | 55 | /** 56 | * Generates a tuple of (make, model) from the available set. 57 | * If 'uniform' is false, it will have a 50% chance of picking one pair, and 50% chance for all the others. 58 | */ 59 | def randomGuitarModel(uniform: Boolean = false): (String, String) = { 60 | val makeModelIndex = if (!uniform && random.nextBoolean()) 0 else random.nextInt(guitarModelSet.size) 61 | guitarModelSet(makeModelIndex) 62 | } 63 | 64 | def randomSoundQuality() = s"4.${random.nextInt(9)}".toDouble 65 | def randomGuitarRegistration(): String = randomString(8) 66 | def randomGuitarModelType(): String = s"${randomString(4)}-${randomString(4)}" 67 | def randomGuitarPrice() = 500 + random.nextInt(1500) 68 | 69 | def randomGuitar(uniformDist: Boolean = false): Guitar = { 70 | val makeModel = randomGuitarModel(uniformDist) 71 | Guitar(randomGuitarModelType(), makeModel._1, makeModel._2, randomSoundQuality()) 72 | } 73 | 74 | def randomGuitarSale(uniformDist: Boolean = false): GuitarSale = { 75 | val makeModel = randomGuitarModel(uniformDist) 76 | GuitarSale(randomGuitarRegistration(), makeModel._1, makeModel._2, randomSoundQuality(), randomGuitarPrice()) 77 | } 78 | } -------------------------------------------------------------------------------- /src/main/scala/common/GuitarsDomain.scala: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | // Scenario: managing the data of a factory of handmade guitars like Taylor, Martin or Tanglewood 4 | // Factories produce lots of guitars, and you can customize everything: wood types, decorations, materials, bracing etc 5 | // Every instance of Guitar is a reference description of a guitar with certain features and a sound score from 0 to 5, considered "objective" 6 | case class Guitar( 7 | configurationId: String, 8 | make: String, 9 | model: String, 10 | /* you can add additional fields here, I left this space blank for brevity */ 11 | soundScore: Double // factory ("new") score 12 | ) 13 | 14 | // A GuitarSale object describes a certain guitar (identified by its registration) with a sound score (again, "objective") and the sale price on the store 15 | case class GuitarSale( 16 | registration: String, 17 | make: String, 18 | model: String, 19 | soundScore: Double, // this can be higher or lower than "new" score (properly aged wood or badly treated guitar) 20 | salePrice: Double 21 | ) 22 | -------------------------------------------------------------------------------- /src/main/scala/part1recap/ScalaRecap.scala: -------------------------------------------------------------------------------- 1 | package part1recap 2 | 3 | import scala.concurrent.Future 4 | import scala.util.{Failure, Success} 5 | 6 | object ScalaRecap extends App { 7 | 8 | // values and variables 9 | val aBoolean: Boolean = false 10 | 11 | // expressions 12 | val anIfExpression = if(2 > 3) "bigger" else "smaller" 13 | 14 | // instructions vs expressions 15 | val theUnit = println("Hello, Scala") // Unit = "no meaningful value" = void in other languages 16 | 17 | // functions 18 | def myFunction(x: Int) = 42 19 | 20 | // OOP 21 | class Animal 22 | class Cat extends Animal 23 | trait Carnivore { 24 | def eat(animal: Animal): Unit 25 | } 26 | 27 | class Crocodile extends Animal with Carnivore { 28 | override def eat(animal: Animal): Unit = println("Crunch!") 29 | } 30 | 31 | // singleton pattern 32 | object MySingleton 33 | 34 | // companions 35 | object Carnivore 36 | 37 | // generics 38 | trait MyList[A] 39 | 40 | // method notation 41 | val x = 1 + 2 42 | val y = 1.+(2) 43 | 44 | // Functional Programming 45 | val incrementer: Int => Int = x => x + 1 46 | val incremented = incrementer(42) 47 | 48 | // map, flatMap, filter 49 | val processedList = List(1,2,3).map(incrementer) 50 | 51 | // Pattern Matching 52 | val unknown: Any = 45 53 | val ordinal = unknown match { 54 | case 1 => "first" 55 | case 2 => "second" 56 | case _ => "unknown" 57 | } 58 | 59 | // try-catch 60 | try { 61 | throw new NullPointerException 62 | } catch { 63 | case _: NullPointerException => "some returned value" 64 | case _: Throwable => "something else" 65 | } 66 | 67 | // Future 68 | import scala.concurrent.ExecutionContext.Implicits.global 69 | val aFuture = Future { 70 | // some expensive computation, runs on another thread 71 | 42 72 | } 73 | 74 | aFuture.onComplete { 75 | case Success(meaningOfLife) => println(s"I've found $meaningOfLife") 76 | case Failure(ex) => println(s"I have failed: $ex") 77 | } 78 | 79 | // Partial functions 80 | val aPartialFunction: PartialFunction[Int, Int] = { 81 | case 1 => 43 82 | case 8 => 56 83 | case _ => 999 84 | } 85 | 86 | // Implicits 87 | 88 | // auto-injection by the compiler 89 | def methodWithImplicitArgument(implicit x: Int) = x + 43 90 | implicit val implicitInt = 67 91 | val implicitCall = methodWithImplicitArgument 92 | 93 | // implicit conversions - implicit defs 94 | case class Person(name: String) { 95 | def greet = println(s"Hi, my name is $name") 96 | } 97 | 98 | implicit def fromStringToPerson(name: String) = Person(name) 99 | "Bob".greet // fromStringToPerson("Bob").greet 100 | 101 | // implicit conversion - implicit classes 102 | implicit class Dog(name: String) { 103 | def bark = println("Bark!") 104 | } 105 | "Lassie".bark 106 | 107 | /* 108 | - local scope 109 | - imported scope 110 | - companion objects of the types involved in the method call 111 | */ 112 | 113 | } 114 | -------------------------------------------------------------------------------- /src/main/scala/part1recap/SparkRecap.scala: -------------------------------------------------------------------------------- 1 | package part1recap 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.apache.spark.sql.SparkSession 5 | import org.apache.spark.sql.functions._ 6 | 7 | object SparkRecap { 8 | 9 | // the entry point to the Spark structured API 10 | val spark = SparkSession.builder() 11 | .appName("Spark Recap") 12 | .master("local[2]") 13 | .getOrCreate() 14 | 15 | // read a DF 16 | val cars = spark.read 17 | .format("json") 18 | .option("inferSchema", "true") 19 | .load("src/main/resources/data/cars") 20 | 21 | import spark.implicits._ 22 | 23 | // select 24 | val usefulCarsData = cars.select( 25 | col("Name"), // column object 26 | $"Year", // another column object (needs spark implicits) 27 | (col("Weight_in_lbs") / 2.2).as("Weight_in_kg"), 28 | expr("Weight_in_lbs / 2.2").as("Weight_in_kg_2") 29 | ) 30 | 31 | val carsWeights = cars.selectExpr("Weight_in_lbs / 2.2") 32 | 33 | // filter 34 | val europeanCars = cars.where(col("Origin") =!= "USA") 35 | 36 | // aggregations 37 | val averageHP = cars.select(avg(col("Horsepower")).as("average_hp")) // sum, meam, stddev, min, max 38 | 39 | // grouping 40 | val countByOrigin = cars 41 | .groupBy(col("Origin")) // a RelationalGroupedDataset 42 | .count() 43 | 44 | // joining 45 | val guitarPlayers = spark.read 46 | .option("inferSchema", "true") 47 | .json("src/main/resources/data/guitarPlayers") 48 | 49 | val bands = spark.read 50 | .option("inferSchema", "true") 51 | .json("src/main/resources/data/bands") 52 | 53 | val guitaristsBands = guitarPlayers.join(bands, guitarPlayers.col("band") === bands.col("id")) 54 | /* 55 | join types 56 | - inner: only the matching rows are kept 57 | - left/right/full outer join 58 | - semi/anti 59 | */ 60 | 61 | // datasets = typed distributed collection of objects 62 | case class GuitarPlayer(id: Long, name: String, guitars: Seq[Long], band: Long) 63 | val guitarPlayersDS = guitarPlayers.as[GuitarPlayer] // needs spark.implicits 64 | guitarPlayersDS.map(_.name) 65 | 66 | // Spark SQL 67 | cars.createOrReplaceTempView("cars") 68 | val americanCars = spark.sql( 69 | """ 70 | |select Name from cars where Origin = 'USA' 71 | """.stripMargin 72 | ) 73 | 74 | // low-level API: RDDs 75 | val sc = spark.sparkContext 76 | val numbersRDD: RDD[Int] = sc.parallelize(1 to 1000000) 77 | 78 | // functional operators 79 | val doubles = numbersRDD.map(_ * 2) 80 | 81 | // RDD -> DF 82 | val numbersDF = numbersRDD.toDF("number") // you lose type info, you get SQL capability 83 | 84 | // RDD -> DS 85 | val numbersDS = spark.createDataset(numbersRDD) 86 | 87 | // DS -> RDD 88 | val guitarPlayersRDD = guitarPlayersDS.rdd 89 | 90 | // DF -> RDD 91 | val carsRDD = cars.rdd // RDD[Row] 92 | 93 | def main(args: Array[String]): Unit = { 94 | // showing a DF to the console 95 | cars.show() 96 | cars.printSchema() 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/scala/part2foundations/CatalystDemo.scala: -------------------------------------------------------------------------------- 1 | package part2foundations 2 | 3 | import org.apache.spark.sql.{DataFrame, SparkSession} 4 | 5 | object CatalystDemo { 6 | 7 | val spark = SparkSession.builder() 8 | .appName("Catalyst Demo") 9 | .master("local") 10 | .getOrCreate() 11 | 12 | import spark.implicits._ 13 | 14 | val flights = spark.read 15 | .option("inferSchema", "true") 16 | .json("src/main/resources/data/flights") 17 | 18 | // Catalyst plays nice with chained filters 19 | val notFromHere = flights 20 | .where($"origin" =!= "LGA") 21 | .where($"origin" =!= "ORD") 22 | .where($"origin" =!= "SFO") 23 | .where($"origin" =!= "DEN") 24 | .where($"origin" =!= "BOS") 25 | .where($"origin" =!= "EWR") 26 | 27 | notFromHere.explain(true) 28 | 29 | // sometimes we do something redundant, out of ignorance or lack of communication with the rest of our team 30 | def filterTeam1(flights: DataFrame) = flights.where($"origin" =!= "LGA").where($"dest" === "DEN") 31 | def filterTeam2(flights: DataFrame) = flights.where($"origin" =!= "EWR").where($"dest" === "DEN") 32 | 33 | val filterBoth = filterTeam1(filterTeam2(flights)) 34 | filterBoth.explain(true) 35 | 36 | // pushing down filters all the way to the data source - do not read records in the first place 37 | flights.write.save("src/main/resources/data/flights_parquet") 38 | 39 | val notFromLGA = spark.read.load("src/main/resources/data/flights_parquet") 40 | .where($"origin" =!= "LGA") 41 | 42 | notFromLGA.explain 43 | 44 | def main(args: Array[String]): Unit = { 45 | // empty 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/part2foundations/ReadingDAGs.scala: -------------------------------------------------------------------------------- 1 | package part2foundations 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object ReadingDAGs { 6 | 7 | ///////////////////////////////////////////////////////////////////// Boilerplate 8 | // you don't need this code in the Spark shell 9 | // this code is needed if you want to run it locally in IntelliJ 10 | 11 | val spark = SparkSession.builder() 12 | .config("spark.master", "local") 13 | .appName("Reading Query Plans") 14 | .getOrCreate() 15 | 16 | val sc = spark.sparkContext 17 | 18 | ///////////////////////////////////////////////////////////////////// Boilerplate 19 | 20 | // job 1 21 | sc.parallelize(1 to 1000000).count() 22 | // DAG with a single "box" - the creation of the RDD 23 | 24 | val rdd1 = sc.parallelize(1 to 1000000) 25 | 26 | // job 2 27 | rdd1.map(_ * 2).count() 28 | // DAG with one stage and two "boxes": one for creating the RDD and one for the map 29 | 30 | // job 3 31 | rdd1.repartition(23).count() 32 | // DAG with two stages: 33 | // stage 1 - the creation of the RDD + exchange 34 | // stage 2 - computation of the count 35 | 36 | // job 4 - same as query plans: 37 | val ds1 = spark.range(1, 10000000) 38 | val ds2 = spark.range(1, 20000000, 2) 39 | val ds3 = ds1.repartition(7) 40 | val ds4 = ds2.repartition(9) 41 | val ds5 = ds3.selectExpr("id * 3 as id") 42 | val joined = ds5.join(ds4, "id") 43 | val sum = joined.selectExpr("sum(id)") 44 | // complex DAG 45 | 46 | /** 47 | * Takeaway: the DAG is a visual representation of the steps Spark will perform to run a job. 48 | * It's the "drawing" version of the physical query plan. 49 | * Unlike query plans, which are only available for DataFrames/Spark SQL, DAGs show up for ANY job. 50 | */ 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/part2foundations/ReadingQueryPlans.scala: -------------------------------------------------------------------------------- 1 | package part2foundations 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object ReadingQueryPlans { 6 | ///////////////////////////////////////////////////////////////////// Boilerplate 7 | // you don't need this code in the Spark shell 8 | // this code is needed if you want to run it locally in IntelliJ 9 | 10 | val spark = SparkSession.builder() 11 | .config("spark.master", "local") 12 | .appName("Reading Query Plans") 13 | .getOrCreate() 14 | 15 | val sc = spark.sparkContext 16 | 17 | ///////////////////////////////////////////////////////////////////// Boilerplate 18 | 19 | // plan 1 - a simple transformation 20 | val simpleNumbers = spark.range(1, 1000000) 21 | val times5 = simpleNumbers.selectExpr("id * 5 as id") 22 | times5.explain() // this is how you show a query plan 23 | /* 24 | == Physical Plan == 25 | *(1) Project [(id#0L * 5) AS id#2L] 26 | +- *(1) Range (1, 1000000, step=1, splits=6) 27 | */ 28 | 29 | // plan 2 - a shuffle 30 | val moreNumbers = spark.range(1, 1000000, 2) 31 | val split7 = moreNumbers.repartition(7) 32 | 33 | split7.explain() 34 | /* 35 | == Physical Plan == 36 | Exchange RoundRobinPartitioning(7), false, [id=#16] 37 | +- *(1) Range (1, 1000000, step=2, splits=6) 38 | */ 39 | 40 | // plan 3 - shuffle + transformation 41 | split7.selectExpr("id * 5 as id").explain() 42 | /* 43 | == Physical Plan == 44 | *(2) Project [(id#4L * 5) AS id#8L] 45 | +- Exchange RoundRobinPartitioning(7), false, [id=#29] 46 | +- *(1) Range (1, 1000000, step=2, splits=6) 47 | */ 48 | 49 | 50 | // plan 4 - a more complex job with a join 51 | val ds1 = spark.range(1, 10000000) 52 | val ds2 = spark.range(1, 20000000, 2) 53 | val ds3 = ds1.repartition(7) 54 | val ds4 = ds2.repartition(9) 55 | val ds5 = ds3.selectExpr("id * 3 as id") 56 | val joined = ds5.join(ds4, "id") 57 | val sum = joined.selectExpr("sum(id)") 58 | sum.explain() 59 | /* 60 | 61 | == Physical Plan == 62 | *(7) HashAggregate(keys=[], functions=[sum(id#18L)]) 63 | +- Exchange SinglePartition, true, [id=#99] 64 | +- *(6) HashAggregate(keys=[], functions=[partial_sum(id#18L)]) 65 | +- *(6) Project [id#18L] 66 | +- *(6) SortMergeJoin [id#18L], [id#12L], Inner 67 | :- *(3) Sort [id#18L ASC NULLS FIRST], false, 0 68 | : +- Exchange hashpartitioning(id#18L, 200), true, [id=#83] 69 | : +- *(2) Project [(id#10L * 3) AS id#18L] 70 | : +- Exchange RoundRobinPartitioning(7), false, [id=#79] 71 | : +- *(1) Range (1, 10000000, step=1, splits=6) 72 | +- *(5) Sort [id#12L ASC NULLS FIRST], false, 0 73 | +- Exchange hashpartitioning(id#12L, 200), true, [id=#90] 74 | +- Exchange RoundRobinPartitioning(9), false, [id=#89] 75 | +- *(4) Range (1, 20000000, step=2, splits=6) 76 | */ 77 | 78 | /** 79 | * Exercises - read the Query Plans and try to understand the code that generated them. 80 | */ 81 | 82 | // exercise 1 83 | /* 84 | == Physical Plan == 85 | *(1) Project [firstName#153, lastName#155, (cast(salary#159 as double) / 1.1) AS salary_EUR#168] 86 | +- *(1) FileScan csv [firstName#153,lastName#155,salary#159] Batched: false, Format: CSV, Location: InMemoryFileIndex[file:/tmp/employees_headers.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct 87 | */ 88 | val employeesDF = spark.read.option("header", true).csv("/tmp/employees_headers.csv") 89 | val empEur = employeesDF.selectExpr("firstName", "lastName", "salary / 1.1 as salary_EUR") 90 | 91 | // exercise 2 92 | /* 93 | == Physical Plan == 94 | *(2) HashAggregate(keys=[dept#156], functions=[avg(cast(salary#181 as bigint))]) 95 | +- Exchange hashpartitioning(dept#156, 200) 96 | +- *(1) HashAggregate(keys=[dept#156], functions=[partial_avg(cast(salary#181 as bigint))]) 97 | +- *(1) Project [dept#156, cast(salary#159 as int) AS salary#181] 98 | +- *(1) FileScan csv [dept#156,salary#159] Batched: false, Format: CSV, Location: InMemoryFileIndex[file:/tmp/employees_headers.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct 99 | */ 100 | val avgSals = employeesDF 101 | .selectExpr("dept", "cast(salary as int) as salary") 102 | .groupBy("dept") 103 | .avg("salary") 104 | 105 | 106 | // exercise 3 107 | /* 108 | == Physical Plan == 109 | *(5) Project [id#195L] 110 | +- *(5) SortMergeJoin [id#195L], [id#197L], Inner 111 | :- *(2) Sort [id#195L ASC NULLS FIRST], false, 0 112 | : +- Exchange hashpartitioning(id#195L, 200) 113 | : +- *(1) Range (1, 10000000, step=3, splits=6) 114 | +- *(4) Sort [id#197L ASC NULLS FIRST], false, 0 115 | +- Exchange hashpartitioning(id#197L, 200) 116 | +- *(3) Range (1, 10000000, step=5, splits=6) 117 | */ 118 | val d1 = spark.range(1, 10000000, 3) 119 | val d2 = spark.range(1, 10000000, 5) 120 | val j1 = d1.join(d2, "id") 121 | 122 | } 123 | -------------------------------------------------------------------------------- /src/main/scala/part2foundations/SparkAPIs.scala: -------------------------------------------------------------------------------- 1 | package part2foundations 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object SparkAPIs { 6 | 7 | /** 8 | * This application contains the code we wrote during the "Different Spark APIs" video. 9 | */ 10 | 11 | val spark = SparkSession.builder() 12 | .config("spark.master", "local") 13 | .appName("Different Spark APIs") 14 | .getOrCreate() 15 | 16 | // for toDF 17 | import spark.implicits._ 18 | 19 | val sc = spark.sparkContext 20 | 21 | // small count comparison 22 | val numbers = 1 to 1000000000 23 | val rdd = sc.parallelize(1 to 1000000000) 24 | rdd.count() // ~10s on camera - might vary on your PC 25 | 26 | val df = rdd.toDF("id") 27 | df.count() // ~16s - might vary 28 | val dfCount = df.selectExpr("count(*)") // same 29 | // look at the Spark UI - there's a wholestagecodegen step in the stage - that's Spark generating the appropriate bytecode to process RDDs behind the scenes 30 | // most of the time taken is just the RDD transformation - look at the time taken in stage 1 31 | 32 | val ds = spark.range(1, 1000000000) 33 | ds.count() // instant, 0.1s 34 | val dsCount = ds.selectExpr("count(*)") 35 | dsCount.show() // same 36 | ds.toDF("value").count() // same 37 | 38 | ds.rdd.count() // ~25s 39 | // cmd-click on the `rdd` implementation to see why this is so slow. 40 | 41 | /** 42 | * Notice that inside the same "realm", i.e. RDDs or DFs, the computation time is small. 43 | * Converting between them takes a long time. 44 | * That's because each row is processed individually. 45 | * Conversions are particularly bad in Python, because the data needs to go from the Python interpreter to the JVM AND back. 46 | * 47 | * Lesson 1: once decided on the API level, STAY THERE. 48 | */ 49 | 50 | val rddTimes5 = rdd.map(_ * 5) 51 | rddTimes5.count() // ~20s 52 | // one stage 53 | 54 | val dfTimes5 = df.select("id * 5 as id") 55 | val dfTimes5Count = dfTimes5.selectExpr("count(*)") 56 | dfTimes5Count.show() // still 11-12s 57 | /* 58 | Notice there's no difference in the time taken, comparing with the original count. 59 | The RDD version multiplied every single row, but here, the multiplication is instant. 60 | Or is it? 61 | 62 | WHY? 63 | 64 | scala> dfTimes5Count.explain 65 | == Physical Plan == 66 | *(2) HashAggregate(keys=[], functions=[count(1)]) 67 | +- Exchange SinglePartition 68 | +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) 69 | +- *(1) Project 70 | +- *(1) SerializeFromObject [input[0, int, false] AS value#2] 71 | +- Scan[obj#1] 72 | 73 | scala> dfCount.explain 74 | == Physical Plan == 75 | *(2) HashAggregate(keys=[], functions=[count(1)]) 76 | +- Exchange SinglePartition 77 | +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) 78 | +- *(1) Project 79 | +- *(1) SerializeFromObject [input[0, int, false] AS value#2] 80 | +- Scan[obj#1] 81 | 82 | Same query plan! Spark removed the select altogether. 83 | */ 84 | 85 | /** 86 | * Exercise: measure the time it takes to count the number of elements from the DS, multiplied by 5. 87 | * Try to explain the difference. It's ok if you have like an 80% explanation. 88 | */ 89 | val dsTimes5 = ds.map(_ * 5) 90 | val dsTimes5Count = dsTimes5.selectExpr("count(*)") 91 | dsTimes5Count.show() 92 | /* 93 | 7 seconds from 0.1 seconds! That's a 70x time increase. 94 | Let's explain: 95 | 96 | scala> dsCount.explain 97 | == Physical Plan == 98 | *(2) HashAggregate(keys=[], functions=[count(1)]) 99 | +- Exchange SinglePartition 100 | +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) 101 | +- *(1) Project 102 | +- *(1) Range (1, 1000000000, step=1, splits=6) 103 | 104 | scala> dsTimes5Count.explain 105 | == Physical Plan == 106 | *(2) HashAggregate(keys=[], functions=[count(1)]) 107 | +- Exchange SinglePartition 108 | +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) 109 | +- *(1) Project 110 | +- *(1) SerializeFromObject [input[0, bigint, false] AS value#71L] 111 | +- *(1) MapElements , obj#70: bigint 112 | +- *(1) DeserializeToObject staticinvoke(class java.lang.Long, ObjectType(class java.lang.Long), valueOf, id#13L, true, false), obj#69: java.lang.Long 113 | +- *(1) Range (1, 1000000000, step=1, splits=6) 114 | 115 | Different query plans. Because we're using a lambda there, Spark can't optimize it. 116 | So Spark has to "deserializeObject" by invoking Long.valueOf on each element in the DS, then map each element with the function, then serialize it back as a DS. 117 | 118 | The reason why Spark has to do that is that Spark doesn't have any information on the lambda, and thus is forced to apply it to each element. 119 | */ 120 | 121 | /** 122 | * Lesson 2: use DFs most of the time. Spark optimizes most stuff away. 123 | * Lesson 3: Lambdas are impossible to optimize. 124 | */ 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/main/scala/part2foundations/SparkJobAnatomy.scala: -------------------------------------------------------------------------------- 1 | package part2foundations 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | 6 | object SparkJobAnatomy { 7 | 8 | ///////////////////////////////////////////////////////////////////// Boilerplate 9 | // you don't need this code in the Spark shell 10 | // this code is needed if you want to run it locally in IntelliJ 11 | 12 | val spark = SparkSession.builder() 13 | .config("spark.master", "local") 14 | .appName("Spark Job Anatomy") 15 | .getOrCreate() 16 | 17 | val sc = spark.sparkContext 18 | 19 | ///////////////////////////////////////////////////////////////////// Boilerplate 20 | 21 | /** 22 | * Cluster prep 23 | * 24 | * 1. Navigate to the spark-optimization folder, go to spark-cluster/ 25 | * 2. docker-compose up --scale spark-worker=3 26 | * 3. In another terminal: 27 | * - docker-exec -it spark-cluster_spark-master_1 bash 28 | * - cd spark/bin 29 | * - ./spark-shell 30 | * 4. In (yet) another terminal: 31 | * - go to spark-optimization 32 | * - docker cp (the data folder) spark-cluster_spark-master_1:/tmp 33 | * 5. Open http//:localhost:4040 for the Spark UI 34 | */ 35 | 36 | // job 1 - a count 37 | val rdd1 = sc.parallelize(1 to 1000000) 38 | rdd1.count 39 | // inspect the UI, one stage with 6 tasks 40 | // task = a unit of computation applied to a unit of data (a partition) 41 | 42 | // job 2 - a count with a small transformation 43 | rdd1.map(_ * 2).count 44 | // inspect the UI, another job with (still) one stage, 6 tasks 45 | // all parallelizable computations (like maps) are done in a single stage 46 | 47 | // job 3 - a count with a shuffle 48 | rdd1.repartition(23).count 49 | // UI: 2 stages, one with 6 tasks, one with 23 tasks 50 | // each stage is delimited by shuffles 51 | 52 | // job 4, a more complex computation: load a file and compute the average salary of the employees by department 53 | val employees = sc.textFile("/tmp/employees.csv") 54 | // process the lines 55 | val empTokens = employees.map(line => line.split(",")) 56 | // extract relevant data 57 | val empDetails = empTokens.map(tokens => (tokens(4), tokens(7))) 58 | // group the elements 59 | val empGroups = empDetails.groupByKey(2) 60 | // process the values associated to each group 61 | val avgSalaries = empGroups.mapValues(salaries => salaries.map(_.toInt).sum / salaries.size) 62 | // show the result 63 | avgSalaries 64 | .collect() // this is an action 65 | .foreach(println) 66 | 67 | // look at the Spark UI: one job, 2 stages 68 | // the groupByKey triggers a shuffle, and thus the beginning of another stage 69 | // all other computations (maps, mapValues) are done in their respective stage 70 | // the number of tasks = the number of partitions processed in a given stage 71 | } -------------------------------------------------------------------------------- /src/main/scala/part2foundations/TestDeployApp.scala: -------------------------------------------------------------------------------- 1 | package part2foundations 2 | 3 | import org.apache.spark.sql.{SaveMode, SparkSession} 4 | 5 | object TestDeployApp { 6 | 7 | // TestDeployApp inputFile outputFile 8 | def main(args: Array[String]): Unit = { 9 | 10 | if (args.length != 2) { 11 | println("Need input file and output file") 12 | System.exit(1) 13 | } 14 | 15 | val spark = SparkSession.builder() 16 | .appName("Test Deploy App") 17 | // method 1 18 | .config("spark.executor.memory", "1g") 19 | .getOrCreate() 20 | 21 | import spark.implicits._ 22 | 23 | val moviesDF = spark.read 24 | .option("inferSchema", "true") 25 | .json(args(0)) 26 | 27 | val goodComediesDF = moviesDF.select( 28 | $"Title", 29 | $"IMDB_Rating".as("Rating"), 30 | $"Release_Date".as("Release") 31 | ) 32 | .where(($"Major_Genre" === "Comedy") and ($"IMDB_Rating" > 6.5)) 33 | .orderBy($"Rating".desc_nulls_last) 34 | 35 | // method 2 36 | spark.conf.set("spark.executor.memory", "1g") // warning - not all configurations available 37 | 38 | /* 39 | method 3: pass configs as command line arguments: 40 | 41 | spark-submit ... --conf spark.executor.memory 1g 42 | 43 | You can also use dedicated command line arguments for certain configurations: 44 | --master = spark.master 45 | --executor-memory = spark.executor.memory 46 | --driver-memory = spark.driver.memory 47 | 48 | and many more. 49 | */ 50 | goodComediesDF.show() 51 | 52 | goodComediesDF.write 53 | .mode(SaveMode.Overwrite) 54 | .format("json") 55 | .save(args(1)) 56 | 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/scala/part2foundations/TungstenDemo.scala: -------------------------------------------------------------------------------- 1 | package part2foundations 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object TungstenDemo { 6 | 7 | val spark = SparkSession.builder() 8 | .appName("Tungsten Demo") 9 | .master("local") 10 | .getOrCreate() 11 | 12 | val sc = spark.sparkContext 13 | 14 | val numbersRDD = sc.parallelize(1 to 10000000).cache() 15 | numbersRDD.count() 16 | numbersRDD.count() // much faster 17 | 18 | import spark.implicits._ 19 | val numbersDF = numbersRDD.toDF("value").cache() // cached with Tungsten 20 | numbersDF.count() 21 | numbersDF.count() // much faster 22 | 23 | // Tungsten is active in WholeStageCodegen 24 | 25 | /* 26 | == Physical Plan == 27 | HashAggregate(keys=[], functions=[sum(id#54L)]) 28 | +- HashAggregate(keys=[], functions=[partial_sum(id#54L)]) 29 | +- Range (0, 1000000, step=1, splits=1) 30 | */ 31 | spark.conf.set("spark.sql.codegen.wholeStage", "false") 32 | val noWholeStageSum = spark.range(1000000).selectExpr("sum(id)") 33 | noWholeStageSum.explain() 34 | noWholeStageSum.show() 35 | 36 | /* 37 | == Physical Plan == 38 | *(1) HashAggregate(keys=[], functions=[sum(id#67L)]) 39 | +- *(1) HashAggregate(keys=[], functions=[partial_sum(id#67L)]) 40 | +- *(1) Range (0, 1000000, step=1, splits=1) 41 | 42 | * means that Tungsten is present! 43 | */ 44 | spark.conf.set("spark.sql.codegen.wholeStage", "true") 45 | val wholeStageSum = spark.range(1000000).selectExpr("sum(id)") 46 | wholeStageSum.explain() 47 | wholeStageSum.show() 48 | 49 | 50 | 51 | def main(args: Array[String]): Unit = { 52 | Thread.sleep(1000000) 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/part3caching/Caching.scala: -------------------------------------------------------------------------------- 1 | package part3caching 2 | 3 | import org.apache.spark.sql.SparkSession 4 | import org.apache.spark.storage.StorageLevel 5 | 6 | object Caching { 7 | 8 | val spark = SparkSession.builder() 9 | .appName("Caching") 10 | .config("spark.memory.offHeap.enabled", "true") 11 | .config("spark.memory.offHeap.size", 10000000) 12 | .master("local") 13 | .getOrCreate() 14 | 15 | val flightsDF = spark.read 16 | .option("inferSchema", "true") 17 | .json("src/main/resources/data/flights") 18 | 19 | flightsDF.count() 20 | 21 | // simulate an "expensive" operation 22 | val orderedFlightsDF = flightsDF.orderBy("dist") 23 | 24 | // scenario: use this DF multiple times 25 | 26 | orderedFlightsDF.persist( 27 | // no argument = MEMORY_AND_DISK 28 | // StorageLevel.MEMORY_ONLY // cache the DF in memory EXACTLY - CPU efficient, memory expensive 29 | // StorageLevel.DISK_ONLY // cache the DF to DISK - CPU efficient and mem efficient, but slower 30 | // StorageLevel.MEMORY_AND_DISK // cache this DF to both the heap AND the disk - first caches to memory, but if the DF is EVICTED, will be written to disk 31 | 32 | /* modifiers: */ 33 | // StorageLevel.MEMORY_ONLY_SER // memory only, serialized - more CPU intensive, memory saving - more impactful for RDDs 34 | // StorageLevel.MEMORY_ONLY_2 // memory only, replicated twice - for resiliency, 2x memory usage 35 | // StorageLevel.MEMORY_ONLY_SER_2 // memory only, serialized, replicated 2x 36 | 37 | /* off-heap */ 38 | StorageLevel.OFF_HEAP // cache outside the JVM, done with Tungsten, still stored on the machine RAM, needs to be configured, CPU efficient and memory efficient 39 | ) 40 | 41 | orderedFlightsDF.count() 42 | orderedFlightsDF.count() 43 | 44 | // remove from cache 45 | orderedFlightsDF.unpersist() // remove this DF from cache 46 | 47 | // change cache name 48 | orderedFlightsDF.createOrReplaceTempView("orderedFlights") 49 | spark.catalog.cacheTable("orderedFlights") 50 | orderedFlightsDF.count() 51 | 52 | // RDDs 53 | val flightsRDD = orderedFlightsDF.rdd 54 | flightsRDD.persist(StorageLevel.MEMORY_ONLY_SER) 55 | flightsRDD.count() 56 | 57 | def main(args: Array[String]): Unit = { 58 | Thread.sleep(1000000) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/part3caching/Checkpointing.scala: -------------------------------------------------------------------------------- 1 | package part3caching 2 | 3 | import org.apache.spark.sql.SparkSession 4 | import org.apache.spark.storage.StorageLevel 5 | 6 | object Checkpointing { 7 | 8 | val spark = SparkSession.builder() 9 | .appName("Checkpointing") 10 | .master("local") 11 | .getOrCreate() 12 | 13 | val sc = spark.sparkContext 14 | 15 | def demoCheckpoint() = { 16 | val flightsDF = spark.read 17 | .option("inferSchema", "true") 18 | .json("src/main/resources/data/flights") 19 | 20 | // do some expensive computation 21 | val orderedFlights = flightsDF.orderBy("dist") 22 | 23 | // checkpointing is used to avoid failure in computations 24 | // needs to be configured 25 | sc.setCheckpointDir("spark-warehouse") 26 | 27 | // checkpoint a DF = save the DF to disk 28 | val checkpointedFlights = orderedFlights.checkpoint() // an action 29 | 30 | // query plan difference with checkpointed DFs 31 | /* 32 | == Physical Plan == 33 | *(1) Sort [dist#16 ASC NULLS FIRST], true, 0 34 | +- *(1) Project [_id#7, arrdelay#8, carrier#9, crsarrtime#10L, crsdephour#11L, crsdeptime#12L, crselapsedtime#13, depdelay#14, dest#15, dist#16, dofW#17L, origin#18] 35 | +- BatchScan[_id#7, arrdelay#8, carrier#9, crsarrtime#10L, crsdephour#11L, crsdeptime#12L, crselapsedtime#13, depdelay#14, dest#15, dist#16, dofW#17L, origin#18] JsonScan Location: InMemoryFileIndex[file:/Users/daniel/dev/rockthejvm/courses/spark-optimization-2/src/main/resourc..., ReadSchema: struct<_id:string,arrdelay:double,carrier:string,crsarrtime:bigint,crsdephour:bigint,crsdeptime:b... 36 | */ 37 | orderedFlights.explain() 38 | /* 39 | == Physical Plan == 40 | *(1) Scan ExistingRDD[_id#7,arrdelay#8,carrier#9,crsarrtime#10L,crsdephour#11L,crsdeptime#12L,crselapsedtime#13,depdelay#14,dest#15,dist#16,dofW#17L,origin#18] 41 | */ 42 | checkpointedFlights.explain() 43 | 44 | checkpointedFlights.show() 45 | } 46 | 47 | def cachingJobRDD() = { 48 | val numbers = sc.parallelize(1 to 10000000) 49 | val descNumbers = numbers.sortBy(-_).persist(StorageLevel.DISK_ONLY) 50 | descNumbers.sum() 51 | descNumbers.sum() // shorter time here 52 | } 53 | 54 | def checkpointingJobRDD() = { 55 | sc.setCheckpointDir("spark-warehouse") 56 | val numbers = sc.parallelize(1 to 10000000) 57 | val descNumbers = numbers.sortBy(-_) 58 | descNumbers.checkpoint() // returns Unit 59 | descNumbers.sum() 60 | descNumbers.sum() 61 | } 62 | 63 | def cachingJobDF() = { 64 | val flightsDF = spark.read 65 | .option("inferSchema", "true") 66 | .json("src/main/resources/data/flights") 67 | 68 | val orderedFlights = flightsDF.orderBy("dist") 69 | orderedFlights.persist(StorageLevel.DISK_ONLY) 70 | orderedFlights.count() 71 | orderedFlights.count() // shorter job 72 | } 73 | 74 | def checkpointingJobDF() = { 75 | sc.setCheckpointDir("spark-warehouse") 76 | val flightsDF = spark.read 77 | .option("inferSchema", "true") 78 | .json("src/main/resources/data/flights") 79 | 80 | val orderedFlights = flightsDF.orderBy("dist") 81 | val checkpointedFlights = orderedFlights.checkpoint() 82 | checkpointedFlights.count() 83 | checkpointedFlights.count() 84 | } 85 | 86 | def main(args: Array[String]): Unit = { 87 | cachingJobDF() 88 | checkpointingJobDF() 89 | Thread.sleep(1000000) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/scala/part4partitioning/Partitioners.scala: -------------------------------------------------------------------------------- 1 | package part4partitioning 2 | 3 | import org.apache.spark.{HashPartitioner, Partitioner, RangePartitioner} 4 | import org.apache.spark.sql.SparkSession 5 | 6 | import scala.util.Random 7 | 8 | object Partitioners { 9 | 10 | val spark = SparkSession.builder() 11 | .appName("Partitioners") 12 | .master("local") 13 | .getOrCreate() 14 | 15 | val sc = spark.sparkContext 16 | 17 | val numbers = sc.parallelize(1 to 10000) 18 | println(numbers.partitioner) // None 19 | 20 | val numbers3 = numbers.repartition(3) // random data redistribution 21 | println(numbers3.partitioner) // None 22 | 23 | // keep track of the partitioner 24 | // KV RDDs can control the partitioning scheme 25 | val keyedNumbers = numbers.map(n => (n % 10, n)) // RDD[(Int, Int)] 26 | val hashedNumbers = keyedNumbers.partitionBy(new HashPartitioner(4)) 27 | /* 28 | keys with the same hash stay on the same partition 29 | Prerequisite for 30 | - combineByKey 31 | - groupByKey 32 | - aggregateByKey 33 | - foldByKey 34 | - reduceByKey 35 | 36 | Prereq for joins, when neither RDD has a known partitioner. 37 | */ 38 | val rangedNumbers = keyedNumbers.partitionBy(new RangePartitioner(5, keyedNumbers)) 39 | /* 40 | Keys within the same range will be on the same partitioner. 41 | For a spectrum 0-1000 42 | keys between Int.MinValue-200 => partition 0 43 | keys between 200-400 => partition 1 44 | keys between 400-600 => partition 2 45 | keys between 600-800 => partition 3 46 | keys between 800-Int.MaxValue => partition 4 47 | 48 | RangePartitioner is a prerequisite for a SORT. 49 | */ 50 | rangedNumbers.sortByKey() // NOT incur a shuffle 51 | 52 | // define your own partitioner 53 | 54 | def generateRandomWords(nWords: Int, maxLength: Int) = { 55 | val r = new Random() 56 | (1 to nWords).map(_ => r.alphanumeric.take(r.nextInt(maxLength)).mkString("")) 57 | } 58 | 59 | val randomWordsRDD = sc.parallelize(generateRandomWords(1000, 100)) 60 | // repartition this RDD by the words length == two words of the same length will be on the same partition 61 | // custom computation = counting the occurrences of 'z' in every word 62 | val zWordsRDD = randomWordsRDD.map(word => (word, word.count(_ == 'z'))) // RDD[(String, Int)] 63 | 64 | class ByLengthPartitioner(override val numPartitions: Int) extends Partitioner { 65 | override def getPartition(key: Any): Int = { 66 | key.toString.length % numPartitions 67 | } 68 | } 69 | 70 | val byLengthZWords = zWordsRDD.partitionBy(new ByLengthPartitioner(100)) 71 | 72 | def main(args: Array[String]): Unit = { 73 | byLengthZWords.foreachPartition(_.foreach(println)) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/scala/part4partitioning/PartitioningProblems.scala: -------------------------------------------------------------------------------- 1 | package part4partitioning 2 | 3 | import org.apache.spark.sql.SparkSession 4 | import org.apache.spark.util.SizeEstimator 5 | 6 | object PartitioningProblems { 7 | 8 | val spark = SparkSession.builder() 9 | .appName("Partitioning Problems") 10 | .master("local[*]") // for parallelism 11 | .getOrCreate() 12 | 13 | def processNumbers(nPartitions: Int) = { 14 | val numbers = spark.range(100000000) // ~800MB 15 | val repartitionedNumbers = numbers.repartition(nPartitions) 16 | repartitionedNumbers.cache() 17 | repartitionedNumbers.count() 18 | 19 | // the computation I care about 20 | repartitionedNumbers.selectExpr("sum(id)").show() 21 | } 22 | 23 | // 1 - use size estimator 24 | def dfSizeEstimator() = { 25 | val numbers = spark.range(100000) 26 | println(SizeEstimator.estimate(numbers)) // usually works, not super accurate, within an order of magnitude - larger number 27 | // measures the memory footprint of the actual JVM object backing the dataset 28 | numbers.cache() 29 | numbers.count() 30 | } 31 | 32 | // 2 - use query plan 33 | def estimateWithQueryPlan() = { 34 | val numbers = spark.range(100000) 35 | println(numbers.queryExecution.optimizedPlan.stats.sizeInBytes) // accurate size in bytes for the DATA 36 | } 37 | 38 | def estimateRDD() = { 39 | val numbers = spark.sparkContext.parallelize(1 to 100000) 40 | numbers.cache().count() 41 | } 42 | 43 | def main(args: Array[String]): Unit = { 44 | // processNumbers(2) // 400MB / partition 45 | // processNumbers(20) // 40MB / partition 46 | // processNumbers(200) // 4MB / partition 47 | // processNumbers(2000) // 400KB / partition 48 | // processNumbers(20000) // 40KB / partition 49 | 50 | // dfSizeEstimator() 51 | // estimateWithQueryPlan() 52 | // estimateRDD() 53 | 54 | // 10-100MB rule for partition size for UNCOMPRESSED DATA 55 | Thread.sleep(10000000) 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/scala/part4partitioning/RepartitionCoalesce.scala: -------------------------------------------------------------------------------- 1 | package part4partitioning 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object RepartitionCoalesce { 6 | 7 | val spark = SparkSession.builder() 8 | .appName("Repartition and Coalesce") 9 | .master("local[*]") 10 | .getOrCreate() 11 | 12 | val sc = spark.sparkContext 13 | 14 | val numbers = sc.parallelize(1 to 10000000) 15 | println(numbers.partitions.length) // number of virtual cores 16 | 17 | // repartition 18 | val repartitionedNumbers = numbers.repartition(2) 19 | repartitionedNumbers.count() 20 | 21 | // coalesce - fundamentally different 22 | val coalescedNumbers = numbers.coalesce(2) // for a smaller number of partitions 23 | coalescedNumbers.count() 24 | 25 | // force coalesce to be a shuffle 26 | val forcedShuffledNumbers = numbers.coalesce(2, true) // force a shuffle 27 | 28 | def main(args: Array[String]): Unit = { 29 | Thread.sleep(10000000) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/part5boost/FixingDataSkews.scala: -------------------------------------------------------------------------------- 1 | package part5boost 2 | 3 | import common._ 4 | import org.apache.spark.sql.{Dataset, SparkSession} 5 | import org.apache.spark.sql.functions._ 6 | 7 | object FixingDataSkews { 8 | 9 | val spark = SparkSession.builder() 10 | .appName("Fixing Data Skews") 11 | .master("local[*]") 12 | .getOrCreate() 13 | 14 | // deactivate broadcast joins 15 | spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1) 16 | import spark.implicits._ 17 | 18 | val guitars: Dataset[Guitar] = Seq.fill(40000)(DataGenerator.randomGuitar()).toDS 19 | val guitarSales: Dataset[GuitarSale] = Seq.fill(200000)(DataGenerator.randomGuitarSale()).toDS 20 | 21 | /* 22 | A Guitar is similar to a GuitarSale if 23 | - same make and model 24 | - abs(guitar.soundScore - guitarSale.soundScore) <= 0.1 25 | 26 | Problem: 27 | - for every Guitar, avg(sale prices of ALL SIMILAR GuitarSales) 28 | - Gibson L-00, config "sadfhja", sound 4.3, 29 | compute avg(sale prices of ALL GuitarSales of Gibson L-00 with sound quality between 4.2 and 4.4 30 | */ 31 | 32 | def naiveSolution() = { 33 | val joined = guitars.join(guitarSales, Seq("make", "model")) 34 | .where(abs(guitarSales("soundScore") - guitars("soundScore")) <= 0.1) 35 | .groupBy("configurationId") 36 | .agg(avg("salePrice").as("averagePrice")) 37 | 38 | joined.explain() 39 | joined.count() 40 | } 41 | 42 | def noSkewSolution() = { 43 | // salting interval 0-99 44 | val explodedGuitars = guitars.withColumn("salt", explode(lit((0 to 99).toArray))) // multiplying the guitars DS x100 45 | val saltedGuitarSales = guitarSales.withColumn("salt", monotonically_increasing_id() % 100) 46 | 47 | val nonSkewedJoin = explodedGuitars.join(saltedGuitarSales, Seq("make", "model", "salt")) 48 | .where(abs(saltedGuitarSales("soundScore") - explodedGuitars("soundScore")) <= 0.1) 49 | .groupBy("configurationId") 50 | .agg(avg("salePrice").as("averagePrice")) 51 | 52 | nonSkewedJoin.explain() 53 | nonSkewedJoin.count() 54 | } 55 | 56 | def main(args: Array[String]): Unit = { 57 | noSkewSolution() 58 | Thread.sleep(1000000) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/part5boost/KryoSerializer.scala: -------------------------------------------------------------------------------- 1 | package part5boost 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.sql.SparkSession 5 | import org.apache.spark.storage.StorageLevel 6 | 7 | object KryoSerializer { 8 | 9 | // 1 - define a SparkConf object with the Kryo serializer 10 | val sparkConf = new SparkConf() 11 | .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 12 | .set("spark.kryo.registrationRequired", "true") 13 | .registerKryoClasses(Array( // 2 - register the classes we want to serialize 14 | classOf[Person], 15 | classOf[Array[Person]] 16 | )) 17 | 18 | val spark = SparkSession.builder() 19 | .appName("Kryo Serialization") 20 | .config(sparkConf) // 3 - pass the SparkConf object to the SparkSession 21 | .master("local[*]") 22 | .getOrCreate() 23 | 24 | val sc = spark.sparkContext 25 | 26 | case class Person(name: String, age: Int) 27 | def generatePeople(nPersons: Int) = (1 to nPersons).map(i => Person(s"Person$i", i % 100)) 28 | 29 | val people = sc.parallelize(generatePeople(10000000)) 30 | 31 | def testCaching() = { 32 | people.persist(StorageLevel.MEMORY_ONLY_SER).count() 33 | /* 34 | Java serialization 35 | - memory usage 254MB 36 | - time 20s 37 | 38 | Kryo serialization 39 | - memory usage 164.5 MB 40 | - time 17s 41 | */ 42 | } 43 | 44 | def testShuffling() = { 45 | people.map(p => (p.age, p)).groupByKey().mapValues(_.size).count() 46 | /* 47 | Java serialization 48 | - shuffle 72.5 MB 49 | - time 20s 50 | 51 | Kryo serialization 52 | - shuffle 42.8 MB 53 | - time 18s 54 | */ 55 | } 56 | 57 | def main(args: Array[String]): Unit = { 58 | testShuffling() 59 | Thread.sleep(10000000) 60 | } 61 | 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/part5boost/SerializationProblems.scala: -------------------------------------------------------------------------------- 1 | package part5boost 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object SerializationProblems { 6 | 7 | val spark = SparkSession.builder() 8 | .appName("Serialization Problems") 9 | .master("local[*]") 10 | .getOrCreate() 11 | 12 | val sc = spark.sparkContext 13 | 14 | val rdd = sc.parallelize(1 to 100) 15 | 16 | class RDDMultiplier { 17 | def multiplyRDD() = rdd.map(_ * 2).collect().toList 18 | } 19 | val rddMultiplier = new RDDMultiplier 20 | // rddMultiplier.multiplyRDD() 21 | // works 22 | 23 | // make class serializable 24 | class MoreGeneralRDDMultiplier extends Serializable { 25 | val factor = 2 26 | def multiplyRDD() = rdd.map(_ * factor).collect().toList 27 | } 28 | val moreGeneralRDDMultiplier = new MoreGeneralRDDMultiplier 29 | // moreGeneralRDDMultiplier.multiplyRDD() 30 | 31 | // technique: enclose member in local value 32 | class MoreGeneralRDDMultiplierEnclosure { 33 | val factor = 2 34 | def multiplyRDD() = { 35 | val enclosedFactor = factor 36 | rdd.map(_ * enclosedFactor).collect().toList 37 | } 38 | } 39 | val moreGeneralRDDMultiplier2 = new MoreGeneralRDDMultiplierEnclosure 40 | // moreGeneralRDDMultiplier2.multiplyRDD() 41 | 42 | /** 43 | * Exercise 44 | */ 45 | class MoreGeneralRDDMultiplierNestedClass { 46 | val factor = 2 47 | 48 | object NestedMultiplier extends Serializable { 49 | val extraTerm = 10 50 | val localFactor = factor 51 | 52 | def multiplyRDD() = rdd.map(_ * localFactor + extraTerm).collect().toList 53 | } 54 | } 55 | val nestedMultiplier = new MoreGeneralRDDMultiplierNestedClass 56 | // nestedMultiplier.NestedMultiplier.multiplyRDD() 57 | 58 | /** 59 | * Exercise 2 60 | */ 61 | case class Person(name: String, age: Int) 62 | val people = sc.parallelize(List( 63 | Person("Alice", 43), 64 | Person("Bob", 12), 65 | Person("Charlie", 23), 66 | Person("Diana", 67) 67 | )) 68 | 69 | class LegalDrinkingAgeChecker(legalAge: Int) { 70 | def processPeople() = { 71 | val ageThreshold = legalAge // capture the constructor argument in a local value 72 | people.map(_.age >= ageThreshold).collect().toList 73 | } 74 | } 75 | val peopleChecker = new LegalDrinkingAgeChecker(21) 76 | // peopleChecker.processPeople() 77 | 78 | /** 79 | * Take things up a notch 80 | * Exercise 3: make this work 81 | * - maybe change the structure of the inner classes 82 | * - don't use Serializable 83 | * 84 | * Hint: FunctionX types are serializable 85 | */ 86 | 87 | class PersonProcessor { // <- can't put Serializable here 88 | class DrinkingAgeChecker(legalAge: Int) { 89 | val check = { // <- 1: make this a val instead of a method 90 | val capturedLegalAge = legalAge // <- 3: capture the constructor argument 91 | age: Int => age >= capturedLegalAge 92 | } 93 | } 94 | 95 | class DrinkingAgeFlagger(checker: Int => Boolean) { 96 | def flag() = { 97 | val capturedChecker = checker // <- 2: capture the lambda 98 | people.map(p => capturedChecker(p.age)).collect().toList 99 | } 100 | } 101 | 102 | def processPeople() = { 103 | val usChecker = new DrinkingAgeChecker(21) 104 | val flagger = new DrinkingAgeFlagger(usChecker.check) 105 | flagger.flag() 106 | } 107 | } 108 | 109 | val personProcessor = new PersonProcessor 110 | personProcessor.processPeople() 111 | 112 | def main(args: Array[String]): Unit = { 113 | 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/main/scala/playground/Playground.scala: -------------------------------------------------------------------------------- 1 | package playground 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | /** 6 | * A simple Scala application where I'll invite you to play and fiddle with the code that we write in this course. 7 | * (not that you couldn't create your own, mind you.) 8 | * 9 | * If you can compile and run this application, it means that the libraries were downloaded correctly. 10 | * In that case, you should be good to go for the rest of the course. 11 | * 12 | * Enjoy! 13 | * 14 | * Daniel @ Rock the JVM 15 | */ 16 | object Playground { 17 | 18 | val spark = SparkSession.builder() 19 | .appName("Spark Optimization Playground") 20 | .master("local") 21 | .getOrCreate() 22 | 23 | val sc = spark.sparkContext 24 | 25 | def main(args: Array[String]): Unit = { 26 | val rdd = sc.parallelize(1 to 1000) 27 | println(s"I have my first RDD, it has ${rdd.count} rows. Now let me go optimize massive jobs.") 28 | } 29 | } 30 | --------------------------------------------------------------------------------