├── LICENSE ├── README.md ├── Rstart.R ├── nyc-taxi-1.png ├── nyc-taxi-2.png ├── nyc-taxi-3.png ├── nyc-taxi-4.png ├── nyc-taxi-5.png ├── nyc-taxi-6.png └── nyc_taxi_map.ipynb /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Max Woolf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NYC Yellow Taxi Visualization Notebook 2 | R Code + Jupyter notebook for analyzing and visualizing NYC Yellow Taxi data. This notebook is the complement to my blog post [How to Visualize New York City Using Taxi Location Data and ggplot2](http://minimaxir.com/2015/11/nyc-ggplot2-howto/). 3 | 4 | *This notebook is licensed under the MIT License. If you use the code or data visualization designs contained within this notebook, it would be greatly appreciated if proper attribution is given back to this notebook and/or myself. Thanks! :)* -------------------------------------------------------------------------------- /Rstart.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(dplyr) 3 | library(ggplot2) 4 | library(extrafont) 5 | library(scales) 6 | library(grid) 7 | library(RColorBrewer) 8 | library(digest) 9 | library(readr) 10 | library(stringr) 11 | 12 | 13 | fontFamily <- "Source Sans Pro" 14 | fontTitle <- "Source Sans Pro Semibold" 15 | 16 | color_palette = c("#16a085","#27ae60","#2980b9","#8e44ad","#f39c12","#c0392b","#1abc9c", "#2ecc71", "#3498db", "#9b59b6", "#f1c40f","#e74c3c") 17 | 18 | neutral_colors = function(number) { 19 | return (brewer.pal(11, "RdYlBu")[-c(5:7)][(number %% 8) + 1]) 20 | } 21 | 22 | set1_colors = function(number) { 23 | return (brewer.pal(9, "Set1")[c(-6,-8)][(number %% 7) + 1]) 24 | } 25 | 26 | theme_custom <- function() {theme_bw(base_size = 8) + 27 | theme(panel.background = element_rect(fill="#eaeaea"), 28 | plot.background = element_rect(fill="white"), 29 | panel.grid.minor = element_blank(), 30 | panel.grid.major = element_line(color="#dddddd"), 31 | axis.ticks.x = element_blank(), 32 | axis.ticks.y = element_blank(), 33 | axis.title.x = element_text(family=fontTitle, size=8, vjust=-.3), 34 | axis.title.y = element_text(family=fontTitle, size=8, vjust=1.5), 35 | panel.border = element_rect(color="#cccccc"), 36 | text = element_text(color = "#1a1a1a", family=fontFamily), 37 | plot.margin = unit(c(0.25,0.1,0.30,0.35), "cm"), 38 | plot.title = element_text(family=fontTitle, size=9, vjust=1)) 39 | } 40 | 41 | create_watermark <- function(source = '', filename = '', dark=F) { 42 | 43 | symbols <- c('','', '', '') 44 | symbol <- symbols[strtoi(substr(digest(filename),1,6), base=36) %% length(symbols)] 45 | if (length(symbol)==0) symbol <- symbols[1] 46 | 47 | bg_white = "#F0F0F0" 48 | bg_text = '#969696' 49 | 50 | if (dark) { 51 | bg_white = "#000000" 52 | bg_text = '#666666' 53 | } 54 | 55 | watermark <- ggplot(aes(x,y), data=data.frame(x=c(0.5), y=c(0.5))) + geom_point(color = "transparent") + 56 | geom_text(x=0, y=0.9, label="By Max Woolf — minimaxir.com", family="Source Sans Pro", color=bg_text, size=1.75, hjust=0) + 57 | 58 | geom_text(x=5, y=0.9, label="Made using R and ggplot2", family="Source Sans Pro", color=bg_text, size=1.75) + 59 | #geom_text(x=0, y=1.01, label = symbol, family = 'FontAwesome', color=bg_text, size=2) + 60 | #geom_text(x=8, y=1, label = "via FiveThirtyEight", family="M+ 1m light", color="white") + 61 | scale_x_continuous(limits=c(0,10)) + 62 | scale_y_continuous(limits=c(0.5,1.5)) + 63 | annotate("segment", x = 0, xend = 10, y=1.5, yend=1.5, color=bg_text, size=0.1) + 64 | theme_bw() + 65 | theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "none", 66 | panel.border = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank(), 67 | axis.ticks = element_blank(), plot.margin = unit(c(0.1,0,-0.4,0), "cm")) + 68 | theme(plot.background=element_rect(fill=bg_white, color=bg_white),panel.background=element_rect(fill=bg_white, color=bg_white)) + 69 | scale_color_manual(values=bg_text) 70 | 71 | if (nchar(source) > 0) {watermark <- watermark + geom_text(x=10, y=0.9, label=paste("Data via",source), family="Source Sans Pro", color=bg_text, size=1.75, hjust=1)} 72 | 73 | return (watermark) 74 | } 75 | 76 | web_Layout <- grid.layout(nrow = 2, ncol = 1, heights = unit(c(2, 77 | 0.125), c("null", "null")), ) 78 | tallweb_Layout <- grid.layout(nrow = 2, ncol = 1, heights = unit(c(3.5, 79 | 0.125), c("null", "null")), ) 80 | video_Layout <- grid.layout(nrow = 1, ncol = 2, widths = unit(c(2, 81 | 1), c("null", "null")), ) 82 | 83 | #grid.show.layout(Layout) 84 | vplayout <- function(...) { 85 | grid.newpage() 86 | pushViewport(viewport(layout = web_Layout)) 87 | } 88 | 89 | talllayout <- function(...) { 90 | grid.newpage() 91 | pushViewport(viewport(layout = tallweb_Layout)) 92 | } 93 | 94 | vidlayout <- function(...) { 95 | grid.newpage() 96 | pushViewport(viewport(layout = video_Layout)) 97 | } 98 | 99 | subplot <- function(x, y) viewport(layout.pos.row = x, 100 | layout.pos.col = y) 101 | 102 | web_plot <- function(a, b) { 103 | vplayout() 104 | print(a, vp = subplot(1, 1)) 105 | print(b, vp = subplot(2, 1)) 106 | } 107 | 108 | tallweb_plot <- function(a, b) { 109 | talllayout() 110 | print(a, vp = subplot(1, 1)) 111 | print(b, vp = subplot(2, 1)) 112 | } 113 | 114 | video_plot <- function(a, b) { 115 | vidlayout() 116 | print(a, vp = subplot(1, 1)) 117 | print(b, vp = subplot(1, 2)) 118 | } 119 | 120 | max_save <- function(plot1, filename, source = '', pdf = FALSE, w=4, h=3, tall=F, dark=F) { 121 | png(paste(filename,"png",sep="."),res=300,units="in",width=w,height=h) 122 | ifelse(tall,tallweb_plot(plot1,create_watermark(source, filename, dark)),web_plot(plot1,create_watermark(source, filename, dark))) 123 | dev.off() 124 | 125 | if (pdf) { 126 | quartz(width=w,height=h,dpi=144) 127 | web_plot(plot1,create_watermark(source, filename, dark)) 128 | quartz.save(paste(filename,"pdf",sep="."), type = "pdf", device = dev.cur()) 129 | } 130 | } 131 | 132 | video_save <- function(plot1, plot2, filename) { 133 | png(paste(filename,"png",sep="."),res=300,units="in",width=1920/300,height=1080/300) 134 | video_plot(plot1,plot2) 135 | dev.off() 136 | 137 | } 138 | 139 | fte_theme <- function (palate_color = "Greys") { 140 | 141 | #display.brewer.all(n=9,type="seq",exact.n=TRUE) 142 | palate <- brewer.pal(palate_color, n=9) 143 | color.background = palate[2] 144 | color.grid.minor = palate[3] 145 | color.grid.major = palate[3] 146 | color.axis.text = palate[6] 147 | color.axis.title = palate[7] 148 | color.title = palate[9] 149 | #color.title = "#2c3e50" 150 | 151 | font.title <- "Source Sans Pro" 152 | font.axis <- "Open Sans Condensed Bold" 153 | #font.axis <- "M+ 1m regular" 154 | #font.title <- "Arial" 155 | #font.axis <- "Arial" 156 | 157 | 158 | theme_bw(base_size=9) + 159 | # Set the entire chart region to a light gray color 160 | theme(panel.background=element_rect(fill=color.background, color=color.background)) + 161 | theme(plot.background=element_rect(fill=color.background, color=color.background)) + 162 | theme(panel.border=element_rect(color=color.background)) + 163 | # Format the grid 164 | theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) + 165 | theme(panel.grid.minor=element_blank()) + 166 | #scale_x_continuous(minor_breaks=0,breaks=seq(0,100,10),limits=c(0,100)) + 167 | #scale_y_continuous(minor_breaks=0,breaks=seq(0,26,4),limits=c(0,25)) + 168 | theme(axis.ticks=element_blank()) + 169 | # Dispose of the legend 170 | theme(legend.position="none") + 171 | theme(legend.background = element_rect(fill=color.background)) + 172 | theme(legend.text = element_text(size=7,colour=color.axis.title,family=font.axis)) + 173 | # Set title and axis labels, and format these and tick marks 174 | theme(plot.title=element_text(colour=color.title,family=font.title, size=9, vjust=1.25, lineheight=0.1)) + 175 | theme(axis.text.x=element_text(size=7,colour=color.axis.text,family=font.axis)) + 176 | theme(axis.text.y=element_text(size=7,colour=color.axis.text,family=font.axis)) + 177 | theme(axis.title.y=element_text(size=7,colour=color.axis.title,family=font.title, vjust=1.25)) + 178 | theme(axis.title.x=element_text(size=7,colour=color.axis.title,family=font.title, vjust=0)) + 179 | 180 | # Big bold line at y=0 181 | #geom_hline(yintercept=0,size=0.75,colour=palate[9]) + 182 | # Plot margins and finally line annotations 183 | theme(plot.margin = unit(c(0.35, 0.2, 0.15, 0.4), "cm")) + 184 | 185 | theme(strip.background = element_rect(fill=color.background, color=color.background),strip.text=element_text(size=7,colour=color.axis.title,family=font.title)) 186 | 187 | } 188 | -------------------------------------------------------------------------------- /nyc-taxi-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minimaxir/nyc-taxi-notebook/4a088876e9d90b51749d8707a91150a5f675c74c/nyc-taxi-1.png -------------------------------------------------------------------------------- /nyc-taxi-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minimaxir/nyc-taxi-notebook/4a088876e9d90b51749d8707a91150a5f675c74c/nyc-taxi-2.png -------------------------------------------------------------------------------- /nyc-taxi-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minimaxir/nyc-taxi-notebook/4a088876e9d90b51749d8707a91150a5f675c74c/nyc-taxi-3.png -------------------------------------------------------------------------------- /nyc-taxi-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minimaxir/nyc-taxi-notebook/4a088876e9d90b51749d8707a91150a5f675c74c/nyc-taxi-4.png -------------------------------------------------------------------------------- /nyc-taxi-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minimaxir/nyc-taxi-notebook/4a088876e9d90b51749d8707a91150a5f675c74c/nyc-taxi-5.png -------------------------------------------------------------------------------- /nyc-taxi-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minimaxir/nyc-taxi-notebook/4a088876e9d90b51749d8707a91150a5f675c74c/nyc-taxi-6.png -------------------------------------------------------------------------------- /nyc_taxi_map.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to Visualize New York City Using Taxi Location Data and ggplot2\n", 8 | "\n", 9 | "by [Max Woolf](http://minimaxir.com)\n", 10 | "\n", 11 | "This notebook is the complement to my blog post [How to Visualize New York City Using Taxi Location Data and ggplot2](http://minimaxir.com/2015/11/nyc-ggplot2-howto/).\n", 12 | "\n", 13 | "*This notebook is licensed under the MIT License. If you use the code or data visualization designs contained within this notebook, it would be greatly appreciated if proper attribution is given back to this notebook and/or myself. Thanks! :)*" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [ 23 | { 24 | "name": "stderr", 25 | "output_type": "stream", 26 | "text": [ 27 | "\n", 28 | "Attaching package: ‘dplyr’\n", 29 | "\n", 30 | "The following objects are masked from ‘package:stats’:\n", 31 | "\n", 32 | " filter, lag\n", 33 | "\n", 34 | "The following objects are masked from ‘package:base’:\n", 35 | "\n", 36 | " intersect, setdiff, setequal, union\n", 37 | "\n", 38 | "Registering fonts with R\n", 39 | "\n", 40 | "Attaching package: ‘scales’\n", 41 | "\n", 42 | "The following objects are masked from ‘package:readr’:\n", 43 | "\n", 44 | " col_factor, col_numeric\n", 45 | "\n" 46 | ] 47 | }, 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "R version 3.2.2 (2015-08-14)\n", 52 | "Platform: x86_64-apple-darwin13.4.0 (64-bit)\n", 53 | "Running under: OS X 10.11.1 (El Capitan)\n", 54 | "\n", 55 | "locale:\n", 56 | "[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8\n", 57 | "\n", 58 | "attached base packages:\n", 59 | "[1] grid stats graphics grDevices utils datasets methods \n", 60 | "[8] base \n", 61 | "\n", 62 | "other attached packages:\n", 63 | "[1] bigrquery_0.1.0 stringr_1.0.0 digest_0.6.8 RColorBrewer_1.1-2\n", 64 | "[5] scales_0.3.0 extrafont_0.17 ggplot2_1.0.1 dplyr_0.4.3 \n", 65 | "[9] readr_0.1.1 \n", 66 | "\n", 67 | "loaded via a namespace (and not attached):\n", 68 | " [1] Rcpp_0.12.1 Rttf2pt1_1.3.3 magrittr_1.5 MASS_7.3-43 \n", 69 | " [5] munsell_0.4.2 uuid_0.1-2 colorspace_1.2-6 R6_2.1.1 \n", 70 | " [9] httr_1.0.0 plyr_1.8.3 tools_3.2.2 parallel_3.2.2 \n", 71 | "[13] gtable_0.1.2 DBI_0.3.1 extrafontdb_1.0 assertthat_0.1 \n", 72 | "[17] IRdisplay_0.3 reshape2_1.4.1 repr_0.4 base64enc_0.1-3 \n", 73 | "[21] IRkernel_0.5 evaluate_0.8 rzmq_0.7.7 stringi_0.5-5 \n", 74 | "[25] jsonlite_0.9.17 proto_0.3-10 " 75 | ] 76 | }, 77 | "execution_count": 1, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "options(warn=-1)\n", 84 | "\n", 85 | "# IMPORTANT: This assumes that all packages in \"Rstart.R\" are installed,\n", 86 | "# and the fonts \"Source Sans Pro\" and \"Open Sans Condensed Bold\" are installed\n", 87 | "# via extrafont. If ggplot2 charts fail to render, you may need to change/remove the theme call.\n", 88 | "\n", 89 | "source(\"Rstart.R\")\n", 90 | "library(bigrquery)\n", 91 | "library(methods) # needed for query_exec in Jupyter: https://github.com/hadley/bigrquery/issues/32\n", 92 | "\n", 93 | "options(repr.plot.mimetypes = 'image/png', repr.plot.width=4, repr.plot.height=3, repr.plot.res=300)\n", 94 | "\n", 95 | "sessionInfo()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": { 101 | "collapsed": true 102 | }, 103 | "source": [ 104 | "## rbigquery\n", 105 | "\n", 106 | "This uses the `rbigquery` R package to query the data. Ensure that it is set up correctly, with your own project name from BigQuery." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 2, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "project_id <- # DO NOT SHARE!" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "# Getting the Data via BigQuery\n", 125 | "\n", 126 | "Gather the query and execute. May take a couple minutes to run data + 15 minutes to retrieve the data! Also uses a [query optimization](https://www.reddit.com/r/bigquery/comments/3fo9ao/nyc_taxi_trips_now_officially_shared_by_the_nyc/ctqfr8h) recommended by Felipe Hoffa." 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 3, 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "\r", 141 | "Retrieving data: 5.3s\r", 142 | "Retrieving data: 7.0s\r", 143 | "Retrieving data: 8.1s\r", 144 | "Retrieving data: 9.2s\r", 145 | "Retrieving data: 10.4s\r", 146 | "Retrieving data: 12.6s\r", 147 | "Retrieving data: 13.7s\r", 148 | "Retrieving data: 14.7s\r", 149 | "Retrieving data: 15.8s\r", 150 | "Retrieving data: 17.0s\r", 151 | "Retrieving data: 18.3s\r", 152 | "Retrieving data: 21.3s\r", 153 | "Retrieving data: 22.3s\r", 154 | "Retrieving data: 23.4s\r", 155 | "Retrieving data: 24.8s\r", 156 | "Retrieving data: 25.8s\r", 157 | "Retrieving data: 26.9s\r", 158 | "Retrieving data: 28.2s\r", 159 | "Retrieving data: 29.2s\r", 160 | "Retrieving data: 30.6s\r", 161 | "Retrieving data: 31.8s\r", 162 | "Retrieving data: 32.9s\r", 163 | "Retrieving data: 34.1s\r", 164 | "Retrieving data: 35.7s\r", 165 | "Retrieving data: 37.1s\r", 166 | "Retrieving data: 38.2s\r", 167 | "Retrieving data: 39.5s\r", 168 | "Retrieving data: 41.1s\r", 169 | "Retrieving data: 42.4s\r", 170 | "Retrieving data: 43.5s\r", 171 | "Retrieving data: 44.6s\r", 172 | "Retrieving data: 46.4s\r", 173 | "Retrieving data: 47.6s\r", 174 | "Retrieving data: 48.8s\r", 175 | "Retrieving data: 49.9s\r", 176 | "Retrieving data: 51.2s\r", 177 | "Retrieving data: 56.1s\r", 178 | "Retrieving data: 58.0s\r", 179 | "Retrieving data: 60.2s\r", 180 | "Retrieving data: 61.8s\r", 181 | "Retrieving data: 63.3s\r", 182 | "Retrieving data: 64.6s\r", 183 | "Retrieving data: 65.7s\r", 184 | "Retrieving data: 67.0s\r", 185 | "Retrieving data: 68.2s\r", 186 | "Retrieving data: 69.4s\r", 187 | "Retrieving data: 70.7s\r", 188 | "Retrieving data: 72.0s\r", 189 | "Retrieving data: 73.5s\r", 190 | "Retrieving data: 75.0s\r", 191 | "Retrieving data: 80.1s\r", 192 | "Retrieving data: 81.4s\r", 193 | "Retrieving data: 82.6s\r", 194 | "Retrieving data: 83.7s\r", 195 | "Retrieving data: 85.3s\r", 196 | "Retrieving data: 86.4s\r", 197 | "Retrieving data: 87.6s\r", 198 | "Retrieving data: 89.6s\r", 199 | "Retrieving data: 90.8s\r", 200 | "Retrieving data: 92.0s\r", 201 | "Retrieving data: 93.6s\r", 202 | "Retrieving data: 94.7s\r", 203 | "Retrieving data: 95.8s\r", 204 | "Retrieving data: 97.0s\r", 205 | "Retrieving data: 98.3s\r", 206 | "Retrieving data: 99.4s\r", 207 | "Retrieving data: 100.8s\r", 208 | "Retrieving data: 101.9s\r", 209 | "Retrieving data: 102.9s\r", 210 | "Retrieving data: 104.2s\r", 211 | "Retrieving data: 105.5s\r", 212 | "Retrieving data: 106.7s\r", 213 | "Retrieving data: 107.9s\r", 214 | "Retrieving data: 109.1s\r", 215 | "Retrieving data: 110.2s\r", 216 | "Retrieving data: 111.6s\r", 217 | "Retrieving data: 112.8s\r", 218 | "Retrieving data: 114.0s\r", 219 | "Retrieving data: 115.2s\r", 220 | "Retrieving data: 116.2s\r", 221 | "Retrieving data: 117.4s\r", 222 | "Retrieving data: 118.7s\r", 223 | "Retrieving data: 120.0s\r", 224 | "Retrieving data: 121.4s\r", 225 | "Retrieving data: 122.7s\r", 226 | "Retrieving data: 123.8s\r", 227 | "Retrieving data: 125.3s\r", 228 | "Retrieving data: 126.5s\r", 229 | "Retrieving data: 127.7s\r", 230 | "Retrieving data: 128.8s\r", 231 | "Retrieving data: 130.0s\r", 232 | "Retrieving data: 131.3s\r", 233 | "Retrieving data: 137.0s\r", 234 | "Retrieving data: 138.4s\r", 235 | "Retrieving data: 143.3s\r", 236 | "Retrieving data: 144.5s\r", 237 | "Retrieving data: 146.1s\r", 238 | "Retrieving data: 147.2s\r", 239 | "Retrieving data: 150.6s\r", 240 | "Retrieving data: 151.9s\r", 241 | "Retrieving data: 153.8s\r", 242 | "Retrieving data: 154.8s\r", 243 | "Retrieving data: 156.2s\r", 244 | "Retrieving data: 157.4s\r", 245 | "Retrieving data: 158.9s\r", 246 | "Retrieving data: 160.2s\r", 247 | "Retrieving data: 161.4s\r", 248 | "Retrieving data: 162.7s\r", 249 | "Retrieving data: 163.8s\r", 250 | "Retrieving data: 165.2s\r", 251 | "Retrieving data: 166.3s\r", 252 | "Retrieving data: 167.6s\r", 253 | "Retrieving data: 168.7s\r", 254 | "Retrieving data: 170.0s\r", 255 | "Retrieving data: 171.3s\r", 256 | "Retrieving data: 172.5s\r", 257 | "Retrieving data: 173.6s\r", 258 | "Retrieving data: 174.9s\r", 259 | "Retrieving data: 176.2s\r", 260 | "Retrieving data: 177.6s\r", 261 | "Retrieving data: 178.9s\r", 262 | "Retrieving data: 180.3s\r", 263 | "Retrieving data: 181.3s\r", 264 | "Retrieving data: 182.6s\r", 265 | "Retrieving data: 183.8s\r", 266 | "Retrieving data: 185.0s\r", 267 | "Retrieving data: 186.3s\r", 268 | "Retrieving data: 187.6s\r", 269 | "Retrieving data: 189.6s\r", 270 | "Retrieving data: 190.8s\r", 271 | "Retrieving data: 192.1s\r", 272 | "Retrieving data: 195.8s\r", 273 | "Retrieving data: 196.8s\r", 274 | "Retrieving data: 197.9s\r", 275 | "Retrieving data: 199.0s\r", 276 | "Retrieving data: 200.3s\r", 277 | "Retrieving data: 201.3s\r", 278 | "Retrieving data: 202.5s\r", 279 | "Retrieving data: 203.8s\r", 280 | "Retrieving data: 205.0s\r", 281 | "Retrieving data: 206.2s\r", 282 | "Retrieving data: 207.5s\r", 283 | "Retrieving data: 208.7s\r", 284 | "Retrieving data: 209.9s\r", 285 | "Retrieving data: 211.1s\r", 286 | "Retrieving data: 212.8s\r", 287 | "Retrieving data: 213.9s\r", 288 | "Retrieving data: 221.0s\r", 289 | "Retrieving data: 222.2s\r", 290 | "Retrieving data: 223.2s\r", 291 | "Retrieving data: 224.3s\r", 292 | "Retrieving data: 225.4s\r", 293 | "Retrieving data: 226.8s\r", 294 | "Retrieving data: 227.8s\r", 295 | "Retrieving data: 228.9s\r", 296 | "Retrieving data: 230.4s\r", 297 | "Retrieving data: 231.7s\r", 298 | "Retrieving data: 232.9s\r", 299 | "Retrieving data: 234.3s\r", 300 | "Retrieving data: 235.5s\r", 301 | "Retrieving data: 236.6s\r", 302 | "Retrieving data: 237.9s\r", 303 | "Retrieving data: 239.0s\r", 304 | "Retrieving data: 240.5s\r", 305 | "Retrieving data: 241.7s\r", 306 | "Retrieving data: 243.0s\r", 307 | "Retrieving data: 244.3s\r", 308 | "Retrieving data: 245.5s\r", 309 | "Retrieving data: 246.6s\r", 310 | "Retrieving data: 247.8s\r", 311 | "Retrieving data: 249.2s\r", 312 | "Retrieving data: 250.9s\r", 313 | "Retrieving data: 252.5s\r", 314 | "Retrieving data: 254.3s\r", 315 | "Retrieving data: 255.6s\r", 316 | "Retrieving data: 256.8s\r", 317 | "Retrieving data: 259.0s\r", 318 | "Retrieving data: 260.5s\r", 319 | "Retrieving data: 261.8s\r", 320 | "Retrieving data: 263.0s\r", 321 | "Retrieving data: 264.4s\r", 322 | "Retrieving data: 265.7s\r", 323 | "Retrieving data: 267.1s\r", 324 | "Retrieving data: 268.8s\r", 325 | "Retrieving data: 270.6s\r", 326 | "Retrieving data: 271.9s\r", 327 | "Retrieving data: 273.2s\r", 328 | "Retrieving data: 274.4s\r", 329 | "Retrieving data: 275.7s\r", 330 | "Retrieving data: 277.0s\r", 331 | "Retrieving data: 278.2s\r", 332 | "Retrieving data: 280.9s\r", 333 | "Retrieving data: 282.0s\r", 334 | "Retrieving data: 283.3s\r", 335 | "Retrieving data: 284.4s\r", 336 | "Retrieving data: 285.9s\r", 337 | "Retrieving data: 287.4s\r", 338 | "Retrieving data: 288.5s\r", 339 | "Retrieving data: 289.9s\r", 340 | "Retrieving data: 291.0s\r", 341 | "Retrieving data: 292.6s\r", 342 | "Retrieving data: 293.9s\r", 343 | "Retrieving data: 295.1s\r", 344 | "Retrieving data: 296.3s\r", 345 | "Retrieving data: 297.7s\r", 346 | "Retrieving data: 298.8s\r", 347 | "Retrieving data: 300.1s\r", 348 | "Retrieving data: 301.2s\r", 349 | "Retrieving data: 302.2s\r", 350 | "Retrieving data: 303.4s\r", 351 | "Retrieving data: 304.5s\r", 352 | "Retrieving data: 305.5s\r", 353 | "Retrieving data: 306.9s\r", 354 | "Retrieving data: 308.0s\r", 355 | "Retrieving data: 309.3s\r", 356 | "Retrieving data: 310.6s\r", 357 | "Retrieving data: 311.6s\r", 358 | "Retrieving data: 312.9s\r", 359 | "Retrieving data: 314.2s\r", 360 | "Retrieving data: 315.3s\r", 361 | "Retrieving data: 316.6s\r", 362 | "Retrieving data: 317.8s\r", 363 | "Retrieving data: 319.6s\r", 364 | "Retrieving data: 321.1s\r", 365 | "Retrieving data: 323.6s\r", 366 | "Retrieving data: 324.7s\r", 367 | "Retrieving data: 325.9s\r", 368 | "Retrieving data: 327.1s\r", 369 | "Retrieving data: 328.3s\r", 370 | "Retrieving data: 329.5s\r", 371 | "Retrieving data: 330.7s\r", 372 | "Retrieving data: 332.0s\r", 373 | "Retrieving data: 333.2s\r", 374 | "Retrieving data: 334.3s\r", 375 | "Retrieving data: 335.8s\r", 376 | "Retrieving data: 336.9s\r", 377 | "Retrieving data: 338.1s\r", 378 | "Retrieving data: 339.2s\r", 379 | "Retrieving data: 340.3s\r", 380 | "Retrieving data: 341.5s\r", 381 | "Retrieving data: 342.8s\r", 382 | "Retrieving data: 346.8s\r", 383 | "Retrieving data: 348.4s\r", 384 | "Retrieving data: 349.4s\r", 385 | "Retrieving data: 350.7s\r", 386 | "Retrieving data: 352.2s\r", 387 | "Retrieving data: 353.4s\r", 388 | "Retrieving data: 365.6s\r", 389 | "Retrieving data: 366.9s\r", 390 | "Retrieving data: 368.1s\r", 391 | "Retrieving data: 369.7s\r", 392 | "Retrieving data: 371.0s\r", 393 | "Retrieving data: 372.2s\r", 394 | "Retrieving data: 373.4s\r", 395 | "Retrieving data: 374.8s\r", 396 | "Retrieving data: 376.0s\r", 397 | "Retrieving data: 377.3s\r", 398 | "Retrieving data: 378.4s\r", 399 | "Retrieving data: 379.7s\r", 400 | "Retrieving data: 380.9s\r", 401 | "Retrieving data: 382.1s\r", 402 | "Retrieving data: 383.5s\r", 403 | "Retrieving data: 384.8s\r", 404 | "Retrieving data: 386.0s\r", 405 | "Retrieving data: 387.3s\r", 406 | "Retrieving data: 388.5s\r", 407 | "Retrieving data: 389.8s\r", 408 | "Retrieving data: 391.1s\r", 409 | "Retrieving data: 392.3s\r", 410 | "Retrieving data: 393.5s\r", 411 | "Retrieving data: 395.0s\r", 412 | "Retrieving data: 396.2s\r", 413 | "Retrieving data: 397.4s\r", 414 | "Retrieving data: 398.6s\r", 415 | "Retrieving data: 400.1s\r", 416 | "Retrieving data: 401.2s\r", 417 | "Retrieving data: 402.3s\r", 418 | "Retrieving data: 403.6s\r", 419 | "Retrieving data: 404.8s\r", 420 | "Retrieving data: 406.1s\r", 421 | "Retrieving data: 407.3s\r", 422 | "Retrieving data: 408.3s\r", 423 | "Retrieving data: 409.5s\r", 424 | "Retrieving data: 410.8s\r", 425 | "Retrieving data: 412.1s\r", 426 | "Retrieving data: 413.5s\r", 427 | "Retrieving data: 414.7s\r", 428 | "Retrieving data: 415.8s\r", 429 | "Retrieving data: 417.2s\r", 430 | "Retrieving data: 418.4s\r", 431 | "Retrieving data: 419.8s\r", 432 | "Retrieving data: 421.1s\r", 433 | "Retrieving data: 422.3s\r", 434 | "Retrieving data: 423.5s\r", 435 | "Retrieving data: 424.7s\r", 436 | "Retrieving data: 426.2s\r", 437 | "Retrieving data: 427.7s\r", 438 | "Retrieving data: 429.1s\r", 439 | "Retrieving data: 430.7s\r", 440 | "Retrieving data: 433.0s\r", 441 | "Retrieving data: 438.1s\r", 442 | "Retrieving data: 439.3s\r", 443 | "Retrieving data: 440.4s\r", 444 | "Retrieving data: 441.7s\r", 445 | "Retrieving data: 443.0s\r", 446 | "Retrieving data: 444.2s\r", 447 | "Retrieving data: 445.5s\r", 448 | "Retrieving data: 446.7s\r", 449 | "Retrieving data: 447.8s\r", 450 | "Retrieving data: 449.1s\r", 451 | "Retrieving data: 450.4s\r", 452 | "Retrieving data: 451.4s\r", 453 | "Retrieving data: 452.9s\r", 454 | "Retrieving data: 454.3s\r", 455 | "Retrieving data: 455.6s\r", 456 | "Retrieving data: 456.8s\r", 457 | "Retrieving data: 458.0s\r", 458 | "Retrieving data: 478.3s\r", 459 | "Retrieving data: 479.7s\r", 460 | "Retrieving data: 481.2s\r", 461 | "Retrieving data: 506.0s\r", 462 | "Retrieving data: 507.5s\r", 463 | "Retrieving data: 508.9s\r", 464 | "Retrieving data: 510.2s\r", 465 | "Retrieving data: 511.8s\r", 466 | "Retrieving data: 513.3s\r", 467 | "Retrieving data: 514.7s\r", 468 | "Retrieving data: 516.4s\r", 469 | "Retrieving data: 537.0s\r", 470 | "Retrieving data: 538.6s\r", 471 | "Retrieving data: 540.0s\r", 472 | "Retrieving data: 541.3s\r", 473 | "Retrieving data: 543.1s\r", 474 | "Retrieving data: 545.0s\r", 475 | "Retrieving data: 546.2s\r", 476 | "Retrieving data: 547.8s\r", 477 | "Retrieving data: 548.9s\r", 478 | "Retrieving data: 550.3s\r", 479 | "Retrieving data: 551.6s\r", 480 | "Retrieving data: 552.8s\r", 481 | "Retrieving data: 554.1s\r", 482 | "Retrieving data: 561.8s\r", 483 | "Retrieving data: 563.1s\r", 484 | "Retrieving data: 564.3s\r", 485 | "Retrieving data: 565.4s\r", 486 | "Retrieving data: 566.7s\r", 487 | "Retrieving data: 567.8s\r", 488 | "Retrieving data: 568.9s\r", 489 | "Retrieving data: 570.1s\r", 490 | "Retrieving data: 571.4s\r", 491 | "Retrieving data: 572.6s\r", 492 | "Retrieving data: 574.2s\r", 493 | "Retrieving data: 575.3s\r", 494 | "Retrieving data: 576.5s\r", 495 | "Retrieving data: 601.6s\r", 496 | "Retrieving data: 603.5s\r", 497 | "Retrieving data: 605.4s\r", 498 | "Retrieving data: 606.8s\r", 499 | "Retrieving data: 608.8s\r", 500 | "Retrieving data: 610.7s\r", 501 | "Retrieving data: 612.9s\r", 502 | "Retrieving data: 614.6s\r", 503 | "Retrieving data: 617.4s\r", 504 | "Retrieving data: 619.0s\r", 505 | "Retrieving data: 622.7s\r", 506 | "Retrieving data: 625.0s\r", 507 | "Retrieving data: 627.4s\r", 508 | "Retrieving data: 629.9s\r", 509 | "Retrieving data: 631.3s\r", 510 | "Retrieving data: 633.1s\r", 511 | "Retrieving data: 635.5s\r", 512 | "Retrieving data: 645.7s\r", 513 | "Retrieving data: 647.4s\r", 514 | "Retrieving data: 648.8s\r", 515 | "Retrieving data: 653.0s\r", 516 | "Retrieving data: 654.4s\r", 517 | "Retrieving data: 655.8s\r", 518 | "Retrieving data: 657.2s\r", 519 | "Retrieving data: 658.7s\r", 520 | "Retrieving data: 660.3s\r", 521 | "Retrieving data: 661.8s\r", 522 | "Retrieving data: 663.3s\r", 523 | "Retrieving data: 664.6s\r", 524 | "Retrieving data: 666.1s\r", 525 | "Retrieving data: 667.8s\r", 526 | "Retrieving data: 691.9s\r", 527 | "Retrieving data: 693.7s\r", 528 | "Retrieving data: 695.3s\r", 529 | "Retrieving data: 696.8s\r", 530 | "Retrieving data: 698.6s\r", 531 | "Retrieving data: 700.1s\r", 532 | "Retrieving data: 701.4s\r", 533 | "Retrieving data: 702.9s\r", 534 | "Retrieving data: 704.4s\r", 535 | "Retrieving data: 705.8s\r", 536 | "Retrieving data: 707.2s\r", 537 | "Retrieving data: 708.5s\r", 538 | "Retrieving data: 724.5s\r", 539 | "Retrieving data: 747.5s\r", 540 | "Retrieving data: 764.9s\r", 541 | "Retrieving data: 766.4s\r", 542 | "Retrieving data: 781.9s\r", 543 | "Retrieving data: 783.1s\r", 544 | "Retrieving data: 784.6s\r", 545 | "Retrieving data: 785.8s\r", 546 | "Retrieving data: 787.1s\r", 547 | "Retrieving data: 788.4s\r", 548 | "Retrieving data: 789.9s\r", 549 | "Retrieving data: 791.0s\r", 550 | "Retrieving data: 792.4s\r", 551 | "Retrieving data: 793.5s\r", 552 | "Retrieving data: 795.0s\r", 553 | "Retrieving data: 796.4s\r", 554 | "Retrieving data: 797.7s\r", 555 | "Retrieving data: 799.0s\r", 556 | "Retrieving data: 800.3s\r", 557 | "Retrieving data: 801.9s\r", 558 | "Retrieving data: 803.2s\r", 559 | "Retrieving data: 804.4s\r", 560 | "Retrieving data: 805.9s\r", 561 | "Retrieving data: 807.6s\r", 562 | "Retrieving data: 809.0s\r", 563 | "Retrieving data: 810.6s\r", 564 | "Retrieving data: 812.3s\r", 565 | "Retrieving data: 813.6s\r", 566 | "Retrieving data: 814.9s\r", 567 | "Retrieving data: 816.3s\r", 568 | "Retrieving data: 817.5s\r", 569 | "Retrieving data: 818.9s\r", 570 | "Retrieving data: 820.1s\r", 571 | "Retrieving data: 821.2s\r", 572 | "Retrieving data: 822.5s\r", 573 | "Retrieving data: 824.6s\r", 574 | "Retrieving data: 825.9s\r", 575 | "Retrieving data: 827.2s\r", 576 | "Retrieving data: 828.3s\r", 577 | "Retrieving data: 829.6s\r", 578 | "Retrieving data: 831.1s\r", 579 | "Retrieving data: 832.3s\r", 580 | "Retrieving data: 833.6s\r", 581 | "Retrieving data: 835.1s\r", 582 | "Retrieving data: 836.4s\r", 583 | "Retrieving data: 851.3s\r", 584 | "Retrieving data: 852.5s\r", 585 | "Retrieving data: 853.8s\n" 586 | ] 587 | } 588 | ], 589 | "source": [ 590 | "query <- \"SELECT ROUND(pickup_latitude, 4) AS lat,\n", 591 | "ROUND(pickup_longitude, 4) AS long,\n", 592 | "COUNT(*) AS num_pickups,\n", 593 | "SUM(fare_amount) AS total_revenue\n", 594 | "FROM [nyc-tlc:yellow.trips]\n", 595 | "WHERE fare_amount/trip_distance BETWEEN 2 AND 10\n", 596 | "GROUP BY lat, long\"\n", 597 | "\n", 598 | "df <- tbl_df(query_exec(query, project=project_id, max_pages=Inf))" 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "metadata": {}, 604 | "source": [ 605 | "Examine data, and cache locally. (Comment/Uncomment the read/write as appropriate)" 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": 2, 611 | "metadata": { 612 | "collapsed": false 613 | }, 614 | "outputs": [ 615 | { 616 | "data": { 617 | "text/html": [ 618 | "\n", 619 | "\n", 620 | "\n", 621 | "\t\n", 622 | "\t\n", 623 | "\t\n", 624 | "\t\n", 625 | "\t\n", 626 | "\t\n", 627 | "\t\n", 628 | "\t\n", 629 | "\t\n", 630 | "\t\n", 631 | "\n", 632 | "
latlongnum_pickupstotal_revenue
140.7772-73.955212181115615.1
2-73.996840.737210101.5
340.7508-73.9916714070124.4
440.7574-73.9724402542911.6
540.7904-73.9769874888959.9
640.7182-73.9893382944125.6
740.7121-73.9591527166410.8
840.7717-73.95916957137926.3
940.6884-73.9805487067445.5
1040.7551-73.971511107110735.7
\n" 633 | ], 634 | "text/latex": [ 635 | "\\begin{tabular}{r|llll}\n", 636 | " & lat & long & num_pickups & total_revenue\\\\\n", 637 | "\\hline\n", 638 | "\t1 & 40.7772 & -73.9552 & 12181 & 115615.1\\\\\n", 639 | "\t2 & -73.9968 & 40.7372 & 10 & 101.5\\\\\n", 640 | "\t3 & 40.7508 & -73.9916 & 7140 & 70124.4\\\\\n", 641 | "\t4 & 40.7574 & -73.9724 & 4025 & 42911.6\\\\\n", 642 | "\t5 & 40.7904 & -73.9769 & 8748 & 88959.9\\\\\n", 643 | "\t6 & 40.7182 & -73.9893 & 3829 & 44125.6\\\\\n", 644 | "\t7 & 40.7121 & -73.9591 & 5271 & 66410.8\\\\\n", 645 | "\t8 & 40.7717 & -73.959 & 16957 & 137926.3\\\\\n", 646 | "\t9 & 40.6884 & -73.9805 & 4870 & 67445.5\\\\\n", 647 | "\t10 & 40.7551 & -73.9715 & 11107 & 110735.7\\\\\n", 648 | "\\end{tabular}\n" 649 | ], 650 | "text/plain": [ 651 | " lat long num_pickups total_revenue\n", 652 | "1 40.7772 -73.9552 12181 115615.1\n", 653 | "2 -73.9968 40.7372 10 101.5\n", 654 | "3 40.7508 -73.9916 7140 70124.4\n", 655 | "4 40.7574 -73.9724 4025 42911.6\n", 656 | "5 40.7904 -73.9769 8748 88959.9\n", 657 | "6 40.7182 -73.9893 3829 44125.6\n", 658 | "7 40.7121 -73.9591 5271 66410.8\n", 659 | "8 40.7717 -73.9590 16957 137926.3\n", 660 | "9 40.6884 -73.9805 4870 67445.5\n", 661 | "10 40.7551 -73.9715 11107 110735.7" 662 | ] 663 | }, 664 | "execution_count": 2, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | }, 668 | { 669 | "data": { 670 | "text/html": [ 671 | "'# of Rows in Dataframe: 4457474'" 672 | ], 673 | "text/latex": [ 674 | "'# of Rows in Dataframe: 4457474'" 675 | ], 676 | "text/markdown": [ 677 | "'# of Rows in Dataframe: 4457474'" 678 | ], 679 | "text/plain": [ 680 | "[1] \"# of Rows in Dataframe: 4457474\"" 681 | ] 682 | }, 683 | "execution_count": 2, 684 | "metadata": {}, 685 | "output_type": "execute_result" 686 | }, 687 | { 688 | "data": { 689 | "text/html": [ 690 | "'Dataframe Size: 119 Mb'" 691 | ], 692 | "text/latex": [ 693 | "'Dataframe Size: 119 Mb'" 694 | ], 695 | "text/markdown": [ 696 | "'Dataframe Size: 119 Mb'" 697 | ], 698 | "text/plain": [ 699 | "[1] \"Dataframe Size: 119 Mb\"" 700 | ] 701 | }, 702 | "execution_count": 2, 703 | "metadata": {}, 704 | "output_type": "execute_result" 705 | } 706 | ], 707 | "source": [ 708 | "df <- read.csv(\"nyc-taxi-data.csv\", header=T)\n", 709 | "\n", 710 | "df %>% head(10)\n", 711 | "\n", 712 | "sprintf(\"# of Rows in Dataframe: %s\", nrow(df))\n", 713 | "sprintf(\"Dataframe Size: %s\", format(object.size(df), units = \"MB\"))\n", 714 | "\n", 715 | "# write.csv(df, \"nyc-taxi-data.csv\", row.names=F)" 716 | ] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": 3, 721 | "metadata": { 722 | "collapsed": true 723 | }, 724 | "outputs": [], 725 | "source": [ 726 | "min_lat <- 40.5774\n", 727 | "max_lat <- 40.9176\n", 728 | "min_long <- -74.15\n", 729 | "max_long <- -73.7004" 730 | ] 731 | }, 732 | { 733 | "cell_type": "markdown", 734 | "metadata": {}, 735 | "source": [ 736 | "Build theme for the map, scripping out most of the grid and replacing it with black." 737 | ] 738 | }, 739 | { 740 | "cell_type": "code", 741 | "execution_count": 4, 742 | "metadata": { 743 | "collapsed": true 744 | }, 745 | "outputs": [], 746 | "source": [ 747 | "theme_map_dark <- function(palate_color = \"Greys\") {\n", 748 | "\n", 749 | "palate <- brewer.pal(palate_color, n=9)\n", 750 | " color.background = \"black\"\n", 751 | " color.grid.minor = \"black\"\n", 752 | " color.grid.major = \"black\"\n", 753 | " color.axis.text = palate[1]\n", 754 | " color.axis.title = palate[1]\n", 755 | " color.title = palate[1]\n", 756 | "\n", 757 | " font.title <- \"Source Sans Pro\"\n", 758 | " font.axis <- \"Open Sans Condensed Bold\"\n", 759 | "\n", 760 | "theme_bw(base_size=5) +\n", 761 | " theme(panel.background=element_rect(fill=color.background, color=color.background)) +\n", 762 | " theme(plot.background=element_rect(fill=color.background, color=color.background)) +\n", 763 | " theme(panel.border=element_rect(color=color.background)) +\n", 764 | " theme(panel.grid.major=element_blank()) +\n", 765 | " theme(panel.grid.minor=element_blank()) +\n", 766 | " theme(axis.ticks=element_blank()) +\n", 767 | " theme(legend.background = element_rect(fill=color.background)) +\n", 768 | " theme(legend.text = element_text(size=3,colour=color.axis.title,family=font.axis)) +\n", 769 | " theme(legend.title = element_blank(), legend.position=\"top\", legend.direction=\"horizontal\") +\n", 770 | " theme(legend.key.width=unit(1, \"cm\"), legend.key.height=unit(0.25, \"cm\"), legend.margin=unit(-0.5,\"cm\")) +\n", 771 | " theme(plot.title=element_text(colour=color.title,family=font.title, size=5)) +\n", 772 | " theme(axis.text.x=element_blank()) +\n", 773 | " theme(axis.text.y=element_blank()) +\n", 774 | " theme(axis.title.y=element_blank()) +\n", 775 | " theme(axis.title.x=element_blank()) +\n", 776 | " theme(plot.margin = unit(c(0.0, -0.5, -1, -0.75), \"cm\")) +\n", 777 | " theme(strip.background = element_rect(fill=color.background, color=color.background),strip.text=element_text(size=7,colour=color.axis.title,family=font.title))\n", 778 | "\n", 779 | "}" 780 | ] 781 | }, 782 | { 783 | "cell_type": "markdown", 784 | "metadata": {}, 785 | "source": [ 786 | "# Plotting NYC\n", 787 | "\n", 788 | "Now we can plot NYC! Let's test using the most basic ggplot2 plot possible." 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "execution_count": 32, 794 | "metadata": { 795 | "collapsed": false 796 | }, 797 | "outputs": [ 798 | { 799 | "data": { 800 | "text/html": [ 801 | "pdf: 2" 802 | ], 803 | "text/latex": [ 804 | "\\textbf{pdf:} 2" 805 | ], 806 | "text/markdown": [ 807 | "**pdf:** 2" 808 | ], 809 | "text/plain": [ 810 | "pdf \n", 811 | " 2 " 812 | ] 813 | }, 814 | "execution_count": 32, 815 | "metadata": {}, 816 | "output_type": "execute_result" 817 | } 818 | ], 819 | "source": [ 820 | "plot <- ggplot(df, aes(x=long, y=lat)) +\n", 821 | " geom_point(size=0.06) +\n", 822 | "\n", 823 | "png(\"nyc-taxi-1.png\", w=600, h=600)\n", 824 | "plot\n", 825 | "dev.off()" 826 | ] 827 | }, 828 | { 829 | "cell_type": "markdown", 830 | "metadata": {}, 831 | "source": [ 832 | "![](nyc-taxi-1.png)\n", 833 | "\n", 834 | "Latitude and Longitude in the thousands? That's definitely not right.\n", 835 | "\n", 836 | "Let's force the bounding box." 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": 33, 842 | "metadata": { 843 | "collapsed": false 844 | }, 845 | "outputs": [ 846 | { 847 | "data": { 848 | "text/html": [ 849 | "pdf: 2" 850 | ], 851 | "text/latex": [ 852 | "\\textbf{pdf:} 2" 853 | ], 854 | "text/markdown": [ 855 | "**pdf:** 2" 856 | ], 857 | "text/plain": [ 858 | "pdf \n", 859 | " 2 " 860 | ] 861 | }, 862 | "execution_count": 33, 863 | "metadata": {}, 864 | "output_type": "execute_result" 865 | } 866 | ], 867 | "source": [ 868 | "plot <- ggplot(df, aes(x=long, y=lat)) +\n", 869 | " geom_point(size=0.06) +\n", 870 | " scale_x_continuous(limits=c(min_long, max_long)) +\n", 871 | " scale_y_continuous(limits=c(min_lat, max_lat))\n", 872 | "\n", 873 | "png(\"nyc-taxi-2.png\", w=600, h=600)\n", 874 | "plot\n", 875 | "dev.off()" 876 | ] 877 | }, 878 | { 879 | "cell_type": "markdown", 880 | "metadata": {}, 881 | "source": [ 882 | "![](nyc-taxi-2.png)" 883 | ] 884 | }, 885 | { 886 | "cell_type": "markdown", 887 | "metadata": {}, 888 | "source": [ 889 | "Better. Let's apply the theme and force a 300 dpi to reduce aliasing." 890 | ] 891 | }, 892 | { 893 | "cell_type": "code", 894 | "execution_count": 34, 895 | "metadata": { 896 | "collapsed": false 897 | }, 898 | "outputs": [ 899 | { 900 | "data": { 901 | "text/html": [ 902 | "pdf: 2" 903 | ], 904 | "text/latex": [ 905 | "\\textbf{pdf:} 2" 906 | ], 907 | "text/markdown": [ 908 | "**pdf:** 2" 909 | ], 910 | "text/plain": [ 911 | "pdf \n", 912 | " 2 " 913 | ] 914 | }, 915 | "execution_count": 34, 916 | "metadata": {}, 917 | "output_type": "execute_result" 918 | } 919 | ], 920 | "source": [ 921 | "plot <- ggplot(df %>% filter(num_pickups > 10), aes(x=long, y=lat)) +\n", 922 | " geom_point(color=\"white\", size=0.06) +\n", 923 | " scale_x_continuous(limits=c(min_long, max_long)) +\n", 924 | " scale_y_continuous(limits=c(min_lat, max_lat)) +\n", 925 | " theme_map_dark()\n", 926 | "\n", 927 | "png(\"nyc-taxi-3.png\", w=600, h=600, res=300)\n", 928 | "plot\n", 929 | "dev.off()" 930 | ] 931 | }, 932 | { 933 | "cell_type": "markdown", 934 | "metadata": {}, 935 | "source": [ 936 | "![](nyc-taxi-3.png)" 937 | ] 938 | }, 939 | { 940 | "cell_type": "markdown", 941 | "metadata": {}, 942 | "source": [ 943 | "Even better. Make the final improvements and annotations to the chart." 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": 49, 949 | "metadata": { 950 | "collapsed": false 951 | }, 952 | "outputs": [ 953 | { 954 | "data": { 955 | "text/html": [ 956 | "pdf: 2" 957 | ], 958 | "text/latex": [ 959 | "\\textbf{pdf:} 2" 960 | ], 961 | "text/markdown": [ 962 | "**pdf:** 2" 963 | ], 964 | "text/plain": [ 965 | "pdf \n", 966 | " 2 " 967 | ] 968 | }, 969 | "execution_count": 49, 970 | "metadata": {}, 971 | "output_type": "execute_result" 972 | } 973 | ], 974 | "source": [ 975 | "plot <- ggplot(df %>% filter(num_pickups > 10), aes(x=long, y=lat, color=num_pickups)) +\n", 976 | " geom_point(size=0.06) +\n", 977 | " scale_x_continuous(limits=c(min_long, max_long)) +\n", 978 | " scale_y_continuous(limits=c(min_lat, max_lat)) +\n", 979 | " theme_map_dark() +\n", 980 | " scale_color_gradient(low=\"#CCCCCC\", high=\"#8E44AD\", trans=\"log\") +\n", 981 | " labs(title = \"Map of NYC, Plotted Using Locations Of All Yellow Taxi Pickups\") +\n", 982 | " theme(legend.position=\"none\") +\n", 983 | " coord_equal()\n", 984 | "\n", 985 | "png(\"nyc-taxi-4.png\", w=600, h=600, res=300)\n", 986 | "plot\n", 987 | "dev.off()" 988 | ] 989 | }, 990 | { 991 | "cell_type": "markdown", 992 | "metadata": {}, 993 | "source": [ 994 | "![](nyc-taxi-4.png)" 995 | ] 996 | }, 997 | { 998 | "cell_type": "markdown", 999 | "metadata": {}, 1000 | "source": [ 1001 | "`coord_equal` results in white space above the chart. Here's a (failed) attempt to fix it using tools from the `grid` package." 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "code", 1006 | "execution_count": 88, 1007 | "metadata": { 1008 | "collapsed": true 1009 | }, 1010 | "outputs": [], 1011 | "source": [ 1012 | "#map_save <- function(filename, plot) {\n", 1013 | "# vp = viewport(grid.layout(1, 1))\n", 1014 | "# \n", 1015 | "# png(filename, w=600, h=600, res=300)\n", 1016 | "# grid.newpage()\n", 1017 | "# print(plot, vp=vp)\n", 1018 | "# grid.rect(gp=gpar(fill=\"black\", col=\"black\"))\n", 1019 | "# pushViewport(viewport(grid.layout(1, 1)))\n", 1020 | "# print(plot, vp=vp)\n", 1021 | "# dev.off()\n", 1022 | "#}" 1023 | ] 1024 | }, 1025 | { 1026 | "cell_type": "markdown", 1027 | "metadata": {}, 1028 | "source": [ 1029 | "Add Hex bins above the map we've generated." 1030 | ] 1031 | }, 1032 | { 1033 | "cell_type": "code", 1034 | "execution_count": 23, 1035 | "metadata": { 1036 | "collapsed": false 1037 | }, 1038 | "outputs": [ 1039 | { 1040 | "data": { 1041 | "text/html": [ 1042 | "pdf: 2" 1043 | ], 1044 | "text/latex": [ 1045 | "\\textbf{pdf:} 2" 1046 | ], 1047 | "text/markdown": [ 1048 | "**pdf:** 2" 1049 | ], 1050 | "text/plain": [ 1051 | "pdf \n", 1052 | " 2 " 1053 | ] 1054 | }, 1055 | "execution_count": 23, 1056 | "metadata": {}, 1057 | "output_type": "execute_result" 1058 | } 1059 | ], 1060 | "source": [ 1061 | "plot <- ggplot(df %>% filter(num_pickups > 20), aes(x=long, y=lat, z=total_revenue)) +\n", 1062 | " geom_point(size=0.06, color=\"#999999\") +\n", 1063 | " stat_summary_hex(fun = sum, bins=100, alpha=0.7) +\n", 1064 | " scale_x_continuous(limits=c(min_long, max_long)) +\n", 1065 | " scale_y_continuous(limits=c(min_lat, max_lat)) +\n", 1066 | " theme_map_dark() +\n", 1067 | " scale_fill_gradient(low=\"#CCCCCC\", high=\"#27AE60\", labels=dollar) +\n", 1068 | " labs(title = \"Total Revenue for NYC Yellow Taxis by Pickup Location, from Jan 2009 ― June 2015\") +\n", 1069 | " coord_equal()\n", 1070 | "\n", 1071 | "png(\"nyc-taxi-5.png\", w=950, h=860, res=300)\n", 1072 | "plot\n", 1073 | "dev.off()" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "markdown", 1078 | "metadata": { 1079 | "collapsed": true 1080 | }, 1081 | "source": [ 1082 | "![](nyc-taxi-5.png)" 1083 | ] 1084 | }, 1085 | { 1086 | "cell_type": "markdown", 1087 | "metadata": {}, 1088 | "source": [ 1089 | "Conveys information accurately, but aestetics could be improved. One last try:" 1090 | ] 1091 | }, 1092 | { 1093 | "cell_type": "code", 1094 | "execution_count": 42, 1095 | "metadata": { 1096 | "collapsed": false 1097 | }, 1098 | "outputs": [ 1099 | { 1100 | "data": { 1101 | "text/html": [ 1102 | "pdf: 2" 1103 | ], 1104 | "text/latex": [ 1105 | "\\textbf{pdf:} 2" 1106 | ], 1107 | "text/markdown": [ 1108 | "**pdf:** 2" 1109 | ], 1110 | "text/plain": [ 1111 | "pdf \n", 1112 | " 2 " 1113 | ] 1114 | }, 1115 | "execution_count": 42, 1116 | "metadata": {}, 1117 | "output_type": "execute_result" 1118 | } 1119 | ], 1120 | "source": [ 1121 | "# Helper function to make bins not show if total revenue is below threshold\n", 1122 | "total_rev <- function(x, threshold = 10^5) {\n", 1123 | " if (sum(x) < threshold) {return (NA)}\n", 1124 | " else {return (sum(x))}\n", 1125 | "}\n", 1126 | "\n", 1127 | "plot <- ggplot(df %>% filter(num_pickups > 10), aes(x=long, y=lat, z=total_revenue)) +\n", 1128 | " geom_point(size=0.06, color=\"#999999\") +\n", 1129 | " stat_summary_hex(fun = total_rev, bins=100, alpha=0.5) +\n", 1130 | " scale_x_continuous(limits=c(-74.0224, -73.8521)) +\n", 1131 | " scale_y_continuous(limits=c(40.6959, 40.8348)) +\n", 1132 | " theme_map_dark() +\n", 1133 | " scale_fill_gradient(low=\"#FFFFFF\", high=\"#E74C3C\", labels=dollar, trans=\"log\", breaks=c(10^(6:8))) +\n", 1134 | " labs(title = \"Total Revenue for NYC Yellow Taxis by Pickup Location, from Jan 2009 ― June 2015\") +\n", 1135 | " coord_equal()\n", 1136 | "\n", 1137 | "png(\"nyc-taxi-6.png\", w=900, h=900, res=300)\n", 1138 | "plot\n", 1139 | "dev.off()" 1140 | ] 1141 | }, 1142 | { 1143 | "cell_type": "markdown", 1144 | "metadata": {}, 1145 | "source": [ 1146 | "![](nyc-taxi-6.png)" 1147 | ] 1148 | }, 1149 | { 1150 | "cell_type": "markdown", 1151 | "metadata": { 1152 | "collapsed": true 1153 | }, 1154 | "source": [ 1155 | "# The MIT License (MIT)\n", 1156 | "\n", 1157 | "Copyright (c) 2015 Max Woolf\n", 1158 | "\n", 1159 | "Permission is hereby granted, free of charge, to any person obtaining a copy\n", 1160 | "of this software and associated documentation files (the \"Software\"), to deal\n", 1161 | "in the Software without restriction, including without limitation the rights\n", 1162 | "to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n", 1163 | "copies of the Software, and to permit persons to whom the Software is\n", 1164 | "furnished to do so, subject to the following conditions:\n", 1165 | "\n", 1166 | "The above copyright notice and this permission notice shall be included in all\n", 1167 | "copies or substantial portions of the Software.\n", 1168 | "\n", 1169 | "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n", 1170 | "IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n", 1171 | "FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n", 1172 | "AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n", 1173 | "LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n", 1174 | "OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n", 1175 | "SOFTWARE." 1176 | ] 1177 | } 1178 | ], 1179 | "metadata": { 1180 | "kernelspec": { 1181 | "display_name": "R", 1182 | "language": "R", 1183 | "name": "ir" 1184 | }, 1185 | "language_info": { 1186 | "codemirror_mode": "r", 1187 | "file_extension": ".r", 1188 | "mimetype": "text/x-r-source", 1189 | "name": "R", 1190 | "pygments_lexer": "r", 1191 | "version": "3.2.2" 1192 | } 1193 | }, 1194 | "nbformat": 4, 1195 | "nbformat_minor": 0 1196 | } 1197 | --------------------------------------------------------------------------------