├── .gitignore
├── LICENSE
├── README.md
├── analysis
    ├── analysis.R
    ├── collisions_map.html
    ├── export_data.sql
    ├── helpers.R
    └── regression_model.R
├── augment_data
    ├── Gemfile
    ├── Gemfile.lock
    ├── README.md
    ├── augment_collisions_with_geocoding.sql
    ├── augment_collisions_with_most_common_coordinates.sql
    ├── geocode.rb
    └── populate_geocodings.sql
├── download_raw_data.sh
├── import_data.sh
├── initialize_database.sh
├── setup_files
    ├── create_indexes.sql
    ├── create_schema.sql
    ├── import_data.sql
    └── populate_vehicles_and_factors.sql
└── shapefiles
    ├── nyct2010wi_18d
        ├── nyct2010wi.dbf
        ├── nyct2010wi.prj
        ├── nyct2010wi.shp
        ├── nyct2010wi.shp.xml
        └── nyct2010wi.shx
    └── taxi_zones
        ├── taxi_zones.dbf
        ├── taxi_zones.prj
        ├── taxi_zones.sbn
        ├── taxi_zones.sbx
        ├── taxi_zones.shp
        ├── taxi_zones.shp.xml
        └── taxi_zones.shx


/.gitignore:
--------------------------------------------------------------------------------
1 | raw_data/*
2 | analysis/*.csv
3 | analysis/*.csv.gz
4 | analysis/graphs/*.png
5 | analysis/graphs/boroughs/*.png
6 | analysis/graphs/zones/*.png
7 | .Rapp.history
8 | .DS_Store
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Todd Schneider
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NYC Motor Vehicle Collisions
 2 | 
 3 | Code in support of this post: [Mapping Motor Vehicle Collisions in New York City](https://toddwschneider.com/posts/nyc-motor-vehicle-collisions-map/)
 4 | 
 5 | Raw data comes from the NYPD: https://data.cityofnewyork.us/Public-Safety/NYPD-Motor-Vehicle-Collisions/h9gi-nx95
 6 | 
 7 | ## Instructions
 8 | 
 9 | 1. Download and install PostgreSQL and PostGIS (both are available via Homebrew)
10 | 2. `./download_raw_data.sh`
11 | 3. `./initialize_database.sh`
12 | 4. `./import_data.sh`
13 | 
14 | Additional code to fill in missing coordinates for collisions that have cross streets or addresses but no lat/lon lives in the `augment_data/` subfolder
15 | 
16 | Assorted SQL and R scripts to analyze data and draw maps are in the `analysis/` subfolder
17 | 


--------------------------------------------------------------------------------
/analysis/analysis.R:
--------------------------------------------------------------------------------
  1 | source("helpers.R")
  2 | 
  3 | collisions = query("
  4 |   SELECT
  5 |     unique_key,
  6 |     collision_time::date AS date,
  7 |     date(date_trunc('month', collision_time)) AS month,
  8 |     extract(hour FROM collision_time) AS hour_of_day,
  9 |     extract(dow FROM collision_time) AS day_of_week,
 10 |     latitude,
 11 |     longitude,
 12 |     z.locationid,
 13 |     z.zone,
 14 |     c.borough,
 15 |     nyct2010_gid,
 16 |     number_of_motorists_injured + number_of_cyclists_injured + number_of_pedestrians_injured AS people_injured,
 17 |     number_of_motorists_injured AS motorists_injured,
 18 |     number_of_cyclists_injured AS cyclists_injured,
 19 |     number_of_pedestrians_injured AS pedestrians_injured,
 20 |     number_of_motorists_killed + number_of_cyclists_killed + number_of_pedestrians_killed AS people_killed,
 21 |     number_of_motorists_killed AS motorists_killed,
 22 |     number_of_cyclists_killed AS cyclists_killed,
 23 |     number_of_pedestrians_killed AS pedestrians_killed
 24 |   FROM collisions c
 25 |     LEFT JOIN taxi_zones z ON c.taxi_zone_gid = z.gid
 26 |   ORDER BY unique_key
 27 | ")
 28 | 
 29 | zb = query("SELECT DISTINCT zone, borough FROM taxi_zones ORDER BY zone")
 30 | 
 31 | date_seq = seq(
 32 |   min(collisions$date),
 33 |   max(collisions$date),
 34 |   by = "1 day"
 35 | )
 36 | 
 37 | variable_factor_levels = c(
 38 |   "collisions",
 39 |   "people_injured",
 40 |   "people_killed",
 41 |   "motorists_injured",
 42 |   "motorists_killed",
 43 |   "cyclists_injured",
 44 |   "cyclists_killed",
 45 |   "pedestrians_injured",
 46 |   "pedestrians_killed"
 47 | )
 48 | 
 49 | variable_factor_labels = variable_factor_levels %>%
 50 |   gsub("_", " ", .) %>%
 51 |   capitalize_first_letter()
 52 | 
 53 | aggregate_collisions = function(dimensions = quos()) {
 54 |   dimensions_excluded = purrr::map(dimensions, function(d) expr(-!!d))
 55 | 
 56 |   collisions %>%
 57 |     mutate(collisions = 1) %>%
 58 |     select(!!!dimensions, date, collisions, matches("_(injured|killed)$")) %>%
 59 |     group_by(!!!dimensions, date) %>%
 60 |     summarize_all(sum) %>%
 61 |     ungroup() %>%
 62 |     group_by(!!!dimensions) %>%
 63 |     complete(
 64 |       date = date_seq,
 65 |       fill = list(
 66 |         collisions = 0,
 67 |         people_injured = 0,
 68 |         motorists_injured = 0,
 69 |         cyclists_injured = 0,
 70 |         pedestrians_injured = 0,
 71 |         people_killed = 0,
 72 |         motorists_killed = 0,
 73 |         cyclists_killed = 0,
 74 |         pedestrians_killed = 0
 75 |       )
 76 |     ) %>%
 77 |     ungroup() %>%
 78 |     gather(variable, daily, -date, !!!dimensions_excluded) %>%
 79 |     group_by(!!!dimensions, variable) %>%
 80 |     arrange(!!!dimensions, variable, date) %>%
 81 |     mutate(
 82 |       rolling28 = rollsumr(daily, k = 28, na.pad = TRUE),
 83 |       rolling365 = rollsumr(daily, k = 365, na.pad = TRUE)
 84 |     ) %>%
 85 |     ungroup() %>%
 86 |     mutate(
 87 |       year = year(date),
 88 |       variable = factor(variable, levels = variable_factor_levels, labels = variable_factor_labels)
 89 |     )
 90 | }
 91 | 
 92 | aggregate_collisions_by_year = function(dimensions = quos()) {
 93 |   aggregate_collisions(dimensions = dimensions) %>%
 94 |     group_by(!!!dimensions, variable, year) %>%
 95 |     summarize(total = sum(daily)) %>%
 96 |     ungroup()
 97 | }
 98 | 
 99 | aggregated_data = aggregate_collisions_by_year() %>%
100 |   filter(year %in% 2013:2018)
101 | 
102 | aggregated_data_by_borough = aggregate_collisions_by_year(dimensions = quos(borough)) %>%
103 |   filter(year %in% 2013:2018)
104 | 
105 | aggregated_data_by_zone = aggregate_collisions_by_year(dimensions = quos(zone)) %>%
106 |   filter(year %in% 2013:2018) %>%
107 |   inner_join(zb, by = "zone")
108 | 
109 | zones = aggregated_data_by_zone %>%
110 |   distinct(zone) %>%
111 |   filter(!is.na(zone)) %>%
112 |   pull(zone)
113 | 
114 | plot_bs = 32
115 | plot_width = 800
116 | 
117 | p1 = aggregated_data %>%
118 |   filter(variable == "Collisions") %>%
119 |   ggplot(aes(x = year, y = total)) +
120 |   geom_line(size = 1, color = nypd_blue) +
121 |   geom_point(size = 3, color = nypd_blue) +
122 |   geom_blank(aes(y = 0)) +
123 |   geom_blank(aes(y = 1.3 * total)) +
124 |   scale_y_continuous(labels = scales::comma) +
125 |   ggtitle("New York City Motor Vehicle Collisions", "Annual total") +
126 |   labs(caption = "Data via NYPD\ntoddwschneider.com") +
127 |   theme_tws(base_size = plot_bs) +
128 |   theme(
129 |     axis.title = element_blank(),
130 |     panel.grid.minor.x = element_blank()
131 |   )
132 | 
133 | p2 = aggregated_data %>%
134 |   filter(variable != "Collisions") %>%
135 |   ggplot(aes(x = year, y = total)) +
136 |   geom_line(size = 1, color = nypd_blue) +
137 |   geom_point(size = 3, color = nypd_blue) +
138 |   geom_blank(aes(y = 0)) +
139 |   geom_blank(aes(y = 1.4 * total)) +
140 |   scale_x_continuous(breaks = c(2014, 2016, 2018)) +
141 |   scale_y_continuous(labels = scales::comma) +
142 |   facet_wrap(~variable, scales = "free_y", ncol = 2) +
143 |   ggtitle("New York City", "Annual injuries from motor vehicle collisions") +
144 |   labs(caption = "Data via NYPD\ntoddwschneider.com") +
145 |   theme_tws(base_size = plot_bs) +
146 |   theme(
147 |     axis.title = element_blank(),
148 |     panel.grid.minor.y = element_blank(),
149 |     plot.subtitle = element_text(margin = unit(c(0, 0, 1.1, 0), "lines")),
150 |     plot.margin = margin(plot_bs / 2, plot_bs * 0.75, plot_bs / 2, plot_bs / 2),
151 |     axis.text = element_text(size = rel(0.7)),
152 |     strip.text = element_text(size = rel(0.7))
153 |   )
154 | 
155 | png("graphs/nyc_collisions.png", height = plot_width * 0.75, width = plot_width)
156 | print(p1)
157 | dev.off()
158 | 
159 | png("graphs/nyc_injuries.png", height = plot_width * 1.5, width = plot_width)
160 | print(p2)
161 | dev.off()
162 | 
163 | for (b in c("Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island")) {
164 |   p1 = aggregated_data_by_borough %>%
165 |     filter(borough == b, variable == "Collisions") %>%
166 |     ggplot(aes(x = year, y = total)) +
167 |     geom_line(size = 1, color = nypd_blue) +
168 |     geom_point(size = 3, color = nypd_blue) +
169 |     geom_blank(aes(y = 0)) +
170 |     geom_blank(aes(y = 1.4 * total)) +
171 |     scale_x_continuous(breaks = c(2014, 2016, 2018)) +
172 |     scale_y_continuous(labels = scales::comma) +
173 |     ggtitle(paste(b, "Motor Vehicle Collisions"), "Annual total") +
174 |     labs(caption = "Data via NYPD\ntoddwschneider.com") +
175 |     theme_tws(base_size = plot_bs) +
176 |     theme(
177 |       axis.title = element_blank(),
178 |       panel.grid.minor.y = element_blank()
179 |     )
180 | 
181 |   p2 = aggregated_data_by_borough %>%
182 |     filter(variable != "Collisions", borough == b) %>%
183 |     ggplot(aes(x = year, y = total)) +
184 |     geom_line(size = 1, color = nypd_blue) +
185 |     geom_point(size = 3, color = nypd_blue) +
186 |     geom_blank(aes(y = 0)) +
187 |     geom_blank(aes(y = 1.4 * total)) +
188 |     scale_x_continuous(breaks = c(2014, 2016, 2018)) +
189 |     scale_y_continuous(labels = scales::comma, breaks = integer_breaks(n = 3)) +
190 |     facet_wrap(~variable, scales = "free_y", ncol = 2) +
191 |     ggtitle(b, "Annual injuries from motor vehicle collisions") +
192 |     labs(caption = "Data via NYPD\ntoddwschneider.com") +
193 |     theme_tws(base_size = plot_bs) +
194 |     theme(
195 |       axis.title = element_blank(),
196 |       panel.grid.minor.y = element_blank(),
197 |       plot.subtitle = element_text(margin = unit(c(0, 0, 1.1, 0), "lines")),
198 |       plot.margin = margin(plot_bs / 2, plot_bs * 0.75, plot_bs / 2, plot_bs / 2),
199 |       axis.text = element_text(size = rel(0.7)),
200 |       strip.text = element_text(size = rel(0.7))
201 |     )
202 | 
203 |   png(paste0("graphs/boroughs/", tolower(gsub(" ", "_", b)), "_collisions.png"), height = plot_width * 0.75, width = plot_width)
204 |   print(p1)
205 |   dev.off()
206 | 
207 |   png(paste0("graphs/boroughs/", tolower(gsub(" ", "_", b)), "_injuries.png"), height = plot_width * 1.5, width = plot_width)
208 |   print(p2)
209 |   dev.off()
210 | }
211 | 
212 | for (z in zones) {
213 |   zfile = z %>%
214 |     str_replace_all("['()]", "") %>%
215 |     str_replace_all("[\\s/]", "_") %>%
216 |     tolower()
217 | 
218 |   borough = filter(zb, zone == z)$borough
219 | 
220 |   p1 = aggregated_data_by_zone %>%
221 |     filter(zone == z, variable == "Collisions") %>%
222 |     ggplot(aes(x = year, y = total)) +
223 |     geom_line(size = 1, color = nypd_blue) +
224 |     geom_point(size = 3, color = nypd_blue) +
225 |     geom_blank(aes(y = 0)) +
226 |     geom_blank(aes(y = 1.4 * total)) +
227 |     scale_x_continuous(breaks = c(2014, 2016, 2018)) +
228 |     scale_y_continuous(labels = scales::comma) +
229 |     ggtitle(
230 |       paste(z, borough, sep = ", "),
231 |       "Annual motor vehicle collisions"
232 |     ) +
233 |     labs(caption = "Data via NYPD\ntoddwschneider.com") +
234 |     theme_tws(base_size = plot_bs) +
235 |     theme(
236 |       axis.title = element_blank(),
237 |       panel.grid.minor.y = element_blank()
238 |     )
239 | 
240 |   p2 = aggregated_data_by_zone %>%
241 |     filter(zone == z, variable != "Collisions") %>%
242 |     ggplot(aes(x = year, y = total)) +
243 |     geom_line(size = 1, color = nypd_blue) +
244 |     geom_point(size = 3, color = nypd_blue) +
245 |     geom_blank(aes(y = 0)) +
246 |     geom_blank(aes(y = 1.4 * total)) +
247 |     scale_x_continuous(breaks = c(2014, 2016, 2018)) +
248 |     scale_y_continuous(labels = scales::comma, breaks = integer_breaks(n = 3)) +
249 |     facet_wrap(~variable, scales = "free_y", ncol = 2) +
250 |     ggtitle(
251 |       paste(z, borough, sep = ", "),
252 |       "Annual injuries from motor vehicle collision"
253 |     ) +
254 |     labs(caption = "Data via NYPD\ntoddwschneider.com") +
255 |     theme_tws(base_size = plot_bs) +
256 |     theme(
257 |       axis.title = element_blank(),
258 |       panel.grid.minor.y = element_blank(),
259 |       plot.subtitle = element_text(margin = unit(c(0, 0, 1.1, 0), "lines")),
260 |       plot.margin = margin(plot_bs / 2, plot_bs * 0.75, plot_bs / 2, plot_bs / 2),
261 |       axis.text = element_text(size = rel(0.7)),
262 |       strip.text = element_text(size = rel(0.7))
263 |     )
264 | 
265 |   png(paste0("graphs/zones/", zfile, "_collisions.png"), height = plot_width * 0.75, width = plot_width)
266 |   print(p1)
267 |   dev.off()
268 | 
269 |   png(paste0("graphs/zones/", zfile, "_injuries.png"), height = plot_width * 1.5, width = plot_width)
270 |   print(p2)
271 |   dev.off()
272 | }
273 | 
274 | 
275 | 
276 | # find zones with steepest (positive or negative) trends
277 | regressions_data = aggregated_data_by_zone %>%
278 |   select(zone, borough, year, variable, total) %>%
279 |   mutate(variable = gsub(" ", "_", tolower(variable))) %>%
280 |   spread(variable, total)
281 | 
282 | slopes = purrr::map(zones, function(z) {
283 |   df = filter(regressions_data, zone == z)
284 | 
285 |   tibble(
286 |     zone = z,
287 |     people_injured = lm(people_injured ~ year, data = df)$coef["year"],
288 |     motorists_injured = lm(motorists_injured ~ year, data = df)$coef["year"],
289 |     cyclists_injured = lm(cyclists_injured ~ year, data = df)$coef["year"],
290 |     pedestrians_injured = lm(pedestrians_injured ~ year, data = df)$coef["year"]
291 |   )
292 | }) %>% bind_rows()
293 | 
294 | arrange(slopes, people_injured)
295 | arrange(slopes, desc(people_injured))
296 | 
297 | 
298 | 
299 | # injury rates by time of day and alcohol involvement
300 | alcohol_involved_unique_keys = query("
301 |   SELECT DISTINCT collision_unique_key
302 |   FROM collisions_contributing_factors
303 |   WHERE contributing_factor LIKE '%alcohol%'
304 | ")$collision_unique_key
305 | 
306 | injury_rates_hourly = collisions %>%
307 |   mutate(alcohol_involved = unique_key %in% alcohol_involved_unique_keys) %>%
308 |   group_by(hour_of_day) %>%
309 |   summarize(
310 |     collisions = n(),
311 |     frac_with_injury = mean(people_injured > 0),
312 |     frac_with_fatality = mean(people_killed > 0),
313 |     frac_with_alcohol_involvement = mean(alcohol_involved)
314 |   ) %>%
315 |   ungroup()
316 | 
317 | injury_rates_hourly = bind_rows(
318 |   injury_rates_hourly,
319 |   injury_rates_hourly %>%
320 |     filter(hour_of_day == 0) %>%
321 |     mutate(hour_of_day = 24)
322 | )
323 | 
324 | p1 = ggplot(injury_rates_hourly, aes(x = hour_of_day, y = collisions)) +
325 |   geom_line(size = 1, color = nypd_blue) +
326 |   scale_x_continuous(breaks = c(0, 6, 12, 18, 24), labels = c("12 AM", "6 AM", "12 PM", "6 PM", "12 AM")) +
327 |   scale_y_continuous(labels = scales::comma) +
328 |   expand_limits(y = c(0, 125e3)) +
329 |   ggtitle(
330 |     "NYC collisions by time of day",
331 |     "Jul 2012–Jan 2019"
332 |   ) +
333 |   labs(caption = "Data via NYPD\ntoddwschneider.com") +
334 |   theme_tws(base_size = plot_bs) +
335 |   no_axis_titles()
336 | 
337 | p2 = ggplot(injury_rates_hourly, aes(x = hour_of_day, y = frac_with_injury)) +
338 |   geom_line(size = 1, color = nypd_blue) +
339 |   scale_x_continuous(breaks = c(0, 6, 12, 18, 24), labels = c("12 AM", "6 AM", "12 PM", "6 PM", "12 AM")) +
340 |   scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
341 |   expand_limits(y = 0) +
342 |   ggtitle(
343 |     "Injury rate by time of day",
344 |     "% of NYC collisions that result in injury"
345 |   ) +
346 |   labs(caption = "Data via NYPD, Jul 2012–Jan 2019\ntoddwschneider.com") +
347 |   theme_tws(base_size = plot_bs) +
348 |   no_axis_titles()
349 | 
350 | p3 = ggplot(injury_rates_hourly, aes(x = hour_of_day, y = frac_with_fatality)) +
351 |   geom_line(size = 1, color = nypd_blue) +
352 |   scale_x_continuous(breaks = c(0, 6, 12, 18, 24), labels = c("12 AM", "6 AM", "12 PM", "6 PM", "12 AM")) +
353 |   scale_y_continuous(labels = scales::percent_format(accuracy = 0.1)) +
354 |   expand_limits(y = 0) +
355 |   ggtitle(
356 |     "Fatality rate by time of day",
357 |     "% of NYC collisions that result in fatality"
358 |   ) +
359 |   labs(caption = "Data via NYPD, Jul 2012–Jan 2019\ntoddwschneider.com") +
360 |   theme_tws(base_size = plot_bs) +
361 |   no_axis_titles()
362 | 
363 | p4 = ggplot(injury_rates_hourly, aes(x = hour_of_day, y = frac_with_alcohol_involvement)) +
364 |   geom_line(size = 1, color = nypd_blue) +
365 |   scale_x_continuous(breaks = c(0, 6, 12, 18, 24), labels = c("12 AM", "6 AM", "12 PM", "6 PM", "12 AM")) +
366 |   scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
367 |   expand_limits(y = 0) +
368 |   ggtitle(
369 |     "Alcohol involvement by time of day",
370 |     "% of NYC collisions with alcohol cited as contributing factor"
371 |   ) +
372 |   labs(caption = "Data via NYPD, Jul 2012–Jan 2019\ntoddwschneider.com") +
373 |   theme_tws(base_size = plot_bs) +
374 |   theme(plot.subtitle = element_text(size = rel(0.8))) +
375 |   no_axis_titles()
376 | 
377 | png("graphs/collisions_by_hour.png", height = plot_width, width = plot_width)
378 | print(p1)
379 | dev.off()
380 | 
381 | png("graphs/injury_rate_by_hour.png", height = plot_width, width = plot_width)
382 | print(p2)
383 | dev.off()
384 | 
385 | png("graphs/fatality_rate_by_hour.png", height = plot_width, width = plot_width)
386 | print(p3)
387 | dev.off()
388 | 
389 | png("graphs/alcohol_involvement_by_hour.png", height = plot_width, width = plot_width)
390 | print(p4)
391 | dev.off()
392 | 
393 | alcohol_stats = collisions %>%
394 |   mutate(alcohol_involved = unique_key %in% alcohol_involved_unique_keys) %>%
395 |   group_by(alcohol_involved) %>%
396 |   summarize(
397 |     collisions = n(),
398 |     frac_with_injury = mean(people_injured > 0),
399 |     frac_with_fatality = mean(people_killed > 0)
400 |   ) %>%
401 |   ungroup()
402 | 
403 | injury_rates_alcohol = collisions %>%
404 |   mutate(alcohol_involved = unique_key %in% alcohol_involved_unique_keys) %>%
405 |   group_by(hour_of_day, alcohol_involved) %>%
406 |   summarize(
407 |     collisions = n(),
408 |     frac_with_injury = mean(people_injured > 0),
409 |     frac_with_fatality = mean(people_killed > 0)
410 |   ) %>%
411 |   ungroup()
412 | 
413 | injury_rates_alcohol = bind_rows(
414 |   injury_rates_alcohol,
415 |   injury_rates_alcohol %>%
416 |     filter(hour_of_day == 0) %>%
417 |     mutate(hour_of_day = 24)
418 | )
419 | 
420 | p5 = ggplot(injury_rates_alcohol, aes(x = hour_of_day, y = frac_with_injury, color = alcohol_involved)) +
421 |   geom_line(size = 1) +
422 |   scale_x_continuous(breaks = c(0, 6, 12, 18, 24), labels = c("12 AM", "6 AM", "12 PM", "6 PM", "12 AM")) +
423 |   scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
424 |   scale_color_manual(values = c(nypd_blue, "#49acd5"), guide = FALSE) +
425 |   expand_limits(y = c(0, 0.4)) +
426 |   annotate(
427 |     "text", x = 17, y = 0.15, label = "Alcohol not cited",
428 |     size = 8, color = nypd_blue, family = "Open Sans"
429 |   ) +
430 |   annotate(
431 |     "text", x = 17, y = 0.34, label = "Alcohol cited",
432 |     size = 8, color = "#49acd5", family = "Open Sans"
433 |   ) +
434 |   ggtitle(
435 |     "Injury rate by alcohol involvement",
436 |     "% of NYC collisions that result in injury"
437 |   ) +
438 |   labs(caption = "Data via NYPD, Jul 2012–Jan 2019\ntoddwschneider.com") +
439 |   theme_tws(base_size = plot_bs) +
440 |   no_axis_titles()
441 | 
442 | p6 = ggplot(injury_rates_alcohol, aes(x = hour_of_day, y = frac_with_fatality, color = alcohol_involved)) +
443 |   geom_line(size = 1) +
444 |   scale_x_continuous(breaks = c(0, 6, 12, 18, 24), labels = c("12 AM", "6 AM", "12 PM", "6 PM", "12 AM")) +
445 |   scale_y_continuous(labels = scales::percent_format(accuracy = 0.1)) +
446 |   scale_color_manual(values = c(nypd_blue, "#49acd5"), guide = FALSE) +
447 |   expand_limits(y = c(0, 0.01)) +
448 |   annotate(
449 |     "text", x = 5, y = 0, label = "Alcohol not cited",
450 |     size = 8, color = nypd_blue, family = "Open Sans"
451 |   ) +
452 |   annotate(
453 |     "text", x = 5, y = 0.009, label = "Alcohol cited",
454 |     size = 8, color = "#49acd5", family = "Open Sans"
455 |   ) +
456 |   ggtitle(
457 |     "Fatality rate by alcohol involvement",
458 |     "% of NYC collisions that result in fatality"
459 |   ) +
460 |   labs(caption = "Data via NYPD, Jul 2012–Jan 2019\ntoddwschneider.com") +
461 |   theme_tws(base_size = plot_bs) +
462 |   no_axis_titles()
463 | 
464 | png("graphs/injury_rate_by_alcohol_involvement.png", height = plot_width, width = plot_width)
465 | print(p5)
466 | dev.off()
467 | 
468 | png("graphs/fatality_rate_by_alcohol_involvement.png", height = plot_width, width = plot_width)
469 | print(p6)
470 | dev.off()
471 | 


--------------------------------------------------------------------------------
/analysis/collisions_map.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |   <head>
  4 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  5 |     <meta charset="utf-8">
  6 | 
  7 |     <title>NYC Motor Vehicle Collisions</title>
  8 | 
  9 |     <script src="https://unpkg.com/deck.gl@latest/deckgl.min.js"></script>
 10 |     <script src="https://api.tiles.mapbox.com/mapbox-gl-js/v0.52.0/mapbox-gl.js"></script>
 11 |     <script src="https://d3js.org/d3-dsv.v1.min.js"></script>
 12 |     <script src="https://d3js.org/d3-fetch.v1.min.js"></script>
 13 | 
 14 |     <link rel="stylesheet" type="text/css" href="https://api.tiles.mapbox.com/mapbox-gl-js/v0.52.0/mapbox-gl.css">
 15 |     <link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono" rel="stylesheet">
 16 | 
 17 |     <style type="text/css">
 18 |       body {
 19 |         font-family: 'IBM Plex Mono', monospace;
 20 |         width: 100vw;
 21 |         height: 100vh;
 22 |         margin: 0;
 23 |       }
 24 |       #control-panel { width: 210px; }
 25 |       .mobile #control-panel { width: 270px; }
 26 |       #control-panel, #legend, #tooltip, #show-control-panel, .hide-controls {
 27 |         background: #17171d;
 28 |         opacity: 0.9;
 29 |         color: #ddd;
 30 |         border: 1px solid #555;
 31 |         border-radius: 8px;
 32 |       }
 33 |       #control-panel a, #legend a, #tooltip a, #show-control-panel a, .hide-controls {
 34 |         text-decoration: none;
 35 |         color: #5ba7c2;
 36 |       }
 37 |       #control-panel hr {
 38 |         border: 0.5px solid #555;
 39 |         margin: 12px -8px;
 40 |       }
 41 |       #radius {
 42 |         margin-top: 4px;
 43 |         width: 90%;
 44 |       }
 45 |       #radius::-moz-range-track { background: rgb(144, 144, 144); }
 46 |       #control-panel select {
 47 |         width: 100%;
 48 |         padding: 5px;
 49 |         font-family: 'IBM Plex Mono';
 50 |         font-size: 12px;
 51 |         border: 0px;
 52 |         background: #292929;
 53 |         color: #ddd;
 54 |         height: 34px;
 55 |         -webkit-appearance: none;
 56 |         -moz-appearance: none;
 57 |         appearance: none;
 58 |         background-image: url("data:image/svg+xml;utf8,<svg fill='#dddddd' height='24' viewBox='0 0 24 24' width='24' xmlns='http://www.w3.org/2000/svg'><path d='M7 10l5 5 5-5z'/><path d='M0 0h24v24H0z' fill='none'/></svg>");
 59 |         background-repeat: no-repeat;
 60 |         background-position-x: 100%;
 61 |         background-position-y: 50%;
 62 |       }
 63 |       #control-panel select[disabled] {
 64 |         cursor: not-allowed;
 65 |         background-image: none;
 66 |         background-color: #666;
 67 |         color: #ccc;
 68 |       }
 69 |       #control-panel select:hover { cursor: pointer; }
 70 |       #legend {
 71 |         position: absolute;
 72 |         bottom: 26px;
 73 |         right: 8px;
 74 |         z-index: 9;
 75 |         padding: 4px 6px;
 76 |         font-size: 12px;
 77 |       }
 78 |       .legend-bar {
 79 |         width: 12px;
 80 |         height: 11px;
 81 |       }
 82 |       .legend-label { margin-bottom: 3px; }
 83 |       .legend-bar { display: inline-block; }
 84 |       .legend-max { float: right; }
 85 |       #control-panel, #show-control-panel {
 86 |         position: fixed;
 87 |         max-height: 90vh;
 88 |         top: 8px;
 89 |         left: 8px;
 90 |         padding: 8px;
 91 |         font-size: 12px;
 92 |         z-index: 10;
 93 |       }
 94 |       .control-panel-row { margin-bottom: 12px; }
 95 |       .control-panel-github { margin-bottom: 4px; }
 96 |       .mobile #control-panel { overflow-y: scroll; }
 97 |       .mobile #control-panel,
 98 |       .mobile #show-control-panel,
 99 |       .mobile #control-panel select,
100 |       .mobile #control-panel select:focus {
101 |         font-size: 16px;
102 |       }
103 |       .mobile #legend { bottom: 40px; }
104 |       .mobile .injury-checkboxes { line-height: 36px; }
105 |       .radio-button-label { margin-right: 6px; }
106 |       .mobile .hide-controls {
107 |         border: none;
108 |         padding-bottom: 12px;
109 |       }
110 |       body:not(.mobile) .hide-controls {
111 |         position: absolute;
112 |         top: -1px;
113 |         right: -21px;
114 |         height: 16px;
115 |         width: 16px;
116 |         border-radius: 16px;
117 |         text-align: center;
118 |       }
119 |       #control-panel .injury-checkboxes input[disabled]:hover { cursor: not-allowed; }
120 |       .collisions-total img { height: 12px; }
121 |       #tooltip:empty { display: none; }
122 |       #tooltip {
123 |         font-size: 12px;
124 |         position: absolute;
125 |         padding: 6px;
126 |         margin: 8px;
127 |         max-width: 320px;
128 |         z-index: 100;
129 |         pointer-events: none;
130 |       }
131 |       #tooltip table { border-collapse: collapse; }
132 |       #tooltip th {
133 |         text-align: left;
134 |         font-weight: normal;
135 |         padding: 2px 6px 12px 6px;
136 |       }
137 |       #tooltip td { padding: 2px 6px; }
138 |       #tooltip .numeric { text-align: right; }
139 |       #tooltip .total td { border-top: 1px solid #aaa; }
140 |       #tooltip .year-histogram td { padding-top: 12px; }
141 |       .hidden { display: none; }
142 |     </style>
143 |   </head>
144 | 
145 |   <body>
146 |     <div id="show-control-panel" class="hidden">
147 |       <a href="#" onclick="showControlPanel()">
148 |         <div>
149 |           Show controls
150 |         </div>
151 |       </a>
152 |     </div>
153 | 
154 |     <div id="control-panel">
155 |       <a href="#" onclick="hideControlPanel()">
156 |         <div class="hide-controls">
157 |           &#215;
158 |         </div>
159 |       </a>
160 | 
161 |       <div class="control-panel-row color-variable">
162 |         <select id="colorVariable" onchange="renderLayer()">
163 |           <option value="collisions">Total collisions</option>
164 |           <option value="total_injured">Total people injured</option>
165 |           <option value="total_killed">Total people killed</option>
166 |           <option value="motorists_injured">Motorists injured</option>
167 |           <option value="motorists_killed">Motorists killed</option>
168 |           <option value="cyclists_injured">Cyclists injured</option>
169 |           <option value="cyclists_killed">Cyclists killed</option>
170 |           <option value="pedestrians_injured">Pedestrians injured</option>
171 |           <option value="pedestrians_killed">Pedestrians killed</option>
172 |         </select>
173 |       </div>
174 | 
175 |       <div class="control-panel-row">
176 |         <select id="borough" onchange="filterData()">
177 |           <option value="">All boroughs</option>
178 |           <option value="x">Bronx</option>
179 |           <option value="k">Brooklyn</option>
180 |           <option value="m">Manhattan</option>
181 |           <option value="q">Queens</option>
182 |           <option value="s">Staten Island</option>
183 |         </select>
184 |       </div>
185 | 
186 |       <div class="control-panel-row">
187 |         <select id="year" onchange="filterData()">
188 |           <option value="">2012–2018</option>
189 |           <option value="2018">2018</option>
190 |           <option value="2017">2017</option>
191 |           <option value="2016">2016</option>
192 |           <option value="2015">2015</option>
193 |           <option value="2014">2014</option>
194 |           <option value="2013">2013</option>
195 |           <option value="2012">2012 (Jul–Dec)</option>
196 |         </select>
197 |       </div>
198 | 
199 |       <div class="control-panel-row">
200 |         <select id="timeOfDay" onchange="filterData()">
201 |           <option value="">All times of day</option>
202 |           <option value="m">8:00 AM–11:00 AM</option>
203 |           <option value="i">11:00 AM–4:00 PM</option>
204 |           <option value="a">4:00 PM–7:00 PM</option>
205 |           <option value="e">7:00 PM–10:00 PM</option>
206 |           <option value="o">10:00 PM–8:00 AM</option>
207 |         </select>
208 |       </div>
209 | 
210 |       <div class="control-panel-row">
211 |         <select id="numVehicles" onchange="filterData()">
212 |           <option value="">1+ vehicles</option>
213 |           <option value="1">1 vehicle</option>
214 |           <option value="2">2 vehicles</option>
215 |           <option value="3">3+ vehicles</option>
216 |         </select>
217 |       </div>
218 | 
219 |       <div class="control-panel-row injury-status">
220 |         <select id="injuryStatus" onchange="filterData()">
221 |           <option value="">All injury statuses</option>
222 |           <option value="both">At least 1 injury or fatality</option>
223 |           <option value="killed">At least 1 fatality</option>
224 |           <option value="none">No injuries</option>
225 |         </select>
226 |       </div>
227 | 
228 |       <div class="control-panel-row">
229 |         <label>Type injured</label>
230 | 
231 |         <div class="injury-checkboxes disabled" onmouseover="injuryTooltip(this)" onmouseout="emptyTooltip()">
232 |           <div>
233 |             <input id="motorists-injured" type="checkbox" disabled onchange="filterData()"></input>
234 |             <label for="motorists-injured">Motorists</label>
235 |           </div>
236 | 
237 |           <div>
238 |             <input id="cyclists-injured" type="checkbox" disabled onchange="filterData()"></input>
239 |             <label for="cyclists-injured">Cyclists</label>
240 |           </div>
241 | 
242 |           <div>
243 |             <input id="pedestrians-injured" type="checkbox" disabled onchange="filterData()"></input>
244 |             <label for="pedestrians-injured">Pedestrians</label>
245 |           </div>
246 |         </div>
247 |       </div>
248 | 
249 |       <hr/>
250 | 
251 |       <div class="control-panel-row collisions-total">
252 |         Fetching data
253 |         <img src="https://cdn.toddwschneider.com/collisions/ajax-loader.gif" alt="throbber">
254 |       </div>
255 | 
256 |       <hr/>
257 | 
258 |       <div class="control-panel-row">
259 |         <label>Hexagon radius: <span id="radius-value">300</span> meters</label>
260 |         <input id="radius" type="range" min="50" max="1000" step="50" value="300" onchange="renderLayer()"></input>
261 |       </div>
262 | 
263 |       <div class="control-panel-row radio-buttons">
264 |         <input id="mode-2d" type="radio" name="mode" value="2d" checked onchange="setMode()">
265 |         <label for="mode-2d" class="radio-button-label">2D</label>
266 | 
267 |         <input id="mode-3d" type="radio" name="mode" value="3d" onchange="setMode()">
268 |         <label for="mode-3d" class="radio-button-label">3D</label>
269 |       </div>
270 | 
271 |       <hr/>
272 | 
273 |       <div class="control-panel-github">
274 |         <a href="https://github.com/toddwschneider/nyc-motor-vehicle-collisions" target="_blank">
275 |           GitHub
276 |         </a>
277 |       </div>
278 |     </div>
279 | 
280 |     <div id="tooltip"></div>
281 | 
282 |     <div id="legend" class="hidden">
283 |       <div class="legend-label">Collisions</div>
284 |       <div class="legend-colors"></div>
285 |       <div>
286 |         <span class="legend-min"></span>
287 |         <span class="legend-max"></span>
288 |       </div>
289 |     </div>
290 |   </body>
291 | 
292 |   <script type="text/javascript">
293 |     'use strict';
294 | 
295 |     if (!('fetch' in window)) {
296 |       alert('Sorry, your browser is not supported. Recommended browsers are Chrome, Firefox, and Safari.');
297 |       throw new Error('unsupported browser');
298 |     }
299 | 
300 |     let mobile = mobileDevice();
301 |     let urlParams = new URLSearchParams(window.location.search);
302 | 
303 |     const COLOR_RANGE = [
304 |       [0, 0, 4, 223],
305 |       [20, 11, 53, 239],
306 |       [58, 9, 99],
307 |       [96, 19, 110],
308 |       [133, 33, 107],
309 |       [169, 46, 94],
310 |       [203, 65, 73],
311 |       [230, 93, 47],
312 |       [247, 131, 17],
313 |       [252, 173, 18],
314 |       [245, 219, 75],
315 |       [252, 255, 164]
316 |     ];
317 | 
318 |     buildLegend();
319 | 
320 |     const LIGHT_SETTINGS = {
321 |       lightsPosition: [-74.08, 40.8, 8000, -73.5, 41, 8000],
322 |       ambientRatio: 0.4,
323 |       diffuseRatio: 0.6,
324 |       specularRatio: 0.8,
325 |       lightsStrength: [1, 0, 1, 0],
326 |       numberOfLights: 2
327 |     };
328 | 
329 |     initializeLayerInputs();
330 | 
331 |     // data is generated by export_data.sql script
332 |     let baseUrl = 'https://toddwschneiderdotcom.twscontent.com/collisions/20190131';
333 |     let dataUrl = `${baseUrl}/nyc_motor_vehicle_collisions.csv`;
334 | 
335 |     if (mobile) {
336 |       document.body.classList.add('mobile');
337 |       window.onresize = () => {
338 |         document.body.style.height = `${window.innerHeight}px`;
339 |         document.getElementById('control-panel').style.maxHeight = `${window.innerHeight - 36}px`;
340 |       }
341 |       window.onresize();
342 |       document.querySelector('.hide-controls').innerHTML = 'Hide controls';
343 |       hideControlPanel();
344 |     }
345 | 
346 |     let currentMode = document.querySelector('input[name="mode"]:checked').value;
347 |     let current3dPitch = 54;
348 | 
349 |     let viewStateDefaults = {
350 |       longitude: -73.977,
351 |       latitude: 40.728,
352 |       zoom: 10.9,
353 |       bearing: 0,
354 |       pitch: (currentMode === '3d' ? current3dPitch : 0),
355 |       minZoom: 8
356 |     };
357 | 
358 |     const LNG_LAT_BOUNDS = {
359 |       longitude: [-75, -73],
360 |       latitude: [40, 41.5]
361 |     };
362 | 
363 |     let currentViewState = Object.assign({}, viewStateDefaults);
364 | 
365 |     Object.keys(currentViewState).forEach(k => {
366 |       if (urlParams.has(k)) {
367 |         currentViewState[k] = Number(urlParams.get(k));
368 |       }
369 |     });
370 | 
371 |     let viewStateChangeTimeout;
372 | 
373 |     const deckgl = new deck.DeckGL(Object.assign({
374 |       mapboxApiAccessToken: 'YOUR_TOKEN_HERE', // register at https://www.mapbox.com to get a token
375 |       mapStyle: 'mapbox://styles/mapbox/dark-v9?optimize=true',
376 |       controller: {touchRotate: (currentMode === '3d')},
377 |       debug: false,
378 |       onWebGLInitialized: gl => {
379 |         gl.blendFunc(gl.SRC_ALPHA, gl.ONE_MINUS_SRC_ALPHA, gl.ONE, gl.ONE_MINUS_SRC_ALPHA);
380 |         gl.blendEquation(gl.FUNC_ADD, gl.FUNC_ADD);
381 |       },
382 |       onViewStateChange: ({viewState}) => {
383 |         if (currentMode === '3d') {
384 |           current3dPitch = viewState.pitch;
385 |         }
386 |         currentViewState = Object.assign({}, viewState);
387 | 
388 |         clearTimeout(viewStateChangeTimeout);
389 | 
390 |         viewStateChangeTimeout = setTimeout(() => {
391 |           enforeLngLatBounds();
392 |           updateUrl();
393 |         }, 100);
394 |       }
395 |     }, currentViewState));
396 | 
397 |     const FILTER_IDS = ['borough', 'year', 'numVehicles', 'timeOfDay', 'injuryStatus', 'injuryCheckboxes'];
398 |     let currentFilters = {};
399 |     initializeFilterInputs();
400 | 
401 |     let data, filteredData;
402 | 
403 |     d3.csv(dataUrl, row => {
404 |       return {
405 |         pos: [+row.lng, +row.lat],
406 |         year: +row.year,
407 |         borough: row.borough,
408 |         num_vehicles: +row.num_vehicles,
409 |         time_of_day: row.time_of_day,
410 |         motorists_injured: +row.motorists_injured,
411 |         motorists_killed: +row.motorists_killed,
412 |         cyclists_injured: +row.cyclists_injured,
413 |         cyclists_killed: +row.cyclists_killed,
414 |         pedestrians_injured: +row.pedestrians_injured,
415 |         pedestrians_killed: +row.pedestrians_killed,
416 |         total_injured: (+row.motorists_injured) + (+row.cyclists_injured) + (+row.pedestrians_injured),
417 |         total_killed: (+row.motorists_killed) + (+row.cyclists_killed) + (+row.pedestrians_killed)
418 |       };
419 |     }).then(rows => {
420 |       data = rows;
421 | 
422 |       if (Object.keys(currentFilters).length > 0) filterData();
423 | 
424 |       renderLayer();
425 |     }).catch(e => {
426 |       console.error(e);
427 |       alert('Something went wront, maybe try reloading the page');
428 |     });
429 | 
430 |     function hexIntensityCalculator() {
431 |       let colorVariable = document.getElementById('colorVariable').selectedOptions[0].value;
432 | 
433 |       if (colorVariable === 'collisions') {
434 |         return points => points.length;
435 |       } else {
436 |         return points => points.map(p => p[colorVariable]).reduce((a, b) => a + b, 0);
437 |       }
438 |     }
439 | 
440 |     function renderLayer() {
441 |       let options = {
442 |         extruded: currentMode === '3d',
443 |         opacity: (currentMode === '3d' ? 0.4 : 0.3),
444 |         coverage: (currentMode === '3d' ? 0.9 : 1),
445 |         radius: Number(document.getElementById('radius').value)
446 |       };
447 | 
448 |       document.getElementById('radius-value').innerHTML = options.radius;
449 | 
450 |       if (mobile) {
451 |         options.onClick = updateTooltip;
452 |       } else {
453 |         options.onHover = updateTooltip;
454 |       }
455 | 
456 |       let calculator = hexIntensityCalculator();
457 | 
458 |       const hexagonLayer = new deck.HexagonLayer(Object.assign({
459 |         id: 'heatmap',
460 |         data: filteredData || data,
461 |         colorRange: COLOR_RANGE,
462 |         elevationRange: [0, 5000],
463 |         getPosition: d => d.pos,
464 |         getColorValue: calculator,
465 |         getElevationValue: calculator,
466 |         lightSettings: LIGHT_SETTINGS,
467 |         pickable: true,
468 |         autoHighlight: true
469 |       }, options));
470 | 
471 |       document.querySelector('.collisions-total').innerHTML = `
472 |         ${numberWithCommas((filteredData || data).length)} total collisions
473 |       `;
474 | 
475 |       deckgl.setProps({
476 |         layers: [hexagonLayer],
477 |         viewState: Object.assign({}, currentViewState)
478 |       });
479 | 
480 |       updateUrl();
481 |       updateLegend(hexagonLayer.state.colorValueDomain);
482 |     }
483 | 
484 |     function updateCurrentFilters() {
485 |       let filters = {injuryCheckboxes: []};
486 | 
487 |       ['borough', 'year', 'numVehicles', 'timeOfDay', 'injuryStatus'].forEach(k => {
488 |         let val = document.getElementById(k).selectedOptions[0].value;
489 |         if (val) filters[k] = val;
490 |       });
491 | 
492 |       if (['both', 'killed'].indexOf(filters.injuryStatus) > -1) {
493 |         document.querySelectorAll('.injury-checkboxes input:checked').forEach(e => {
494 |           filters.injuryCheckboxes.push(e.id);
495 |         });
496 |       }
497 | 
498 |       if (filters.year) filters.year = Number(filters.year);
499 |       if (filters.numVehicles) filters.numVehicles = Number(filters.numVehicles);
500 | 
501 |       currentFilters = filters;
502 |     }
503 | 
504 |     function filterData() {
505 |       updateCurrentFilters();
506 |       updateUrl();
507 | 
508 |       let borough = currentFilters.borough;
509 |       let year = currentFilters.year;
510 |       let numVehicles = currentFilters.numVehicles;
511 |       let timeOfDay = currentFilters.timeOfDay;
512 |       let injuryStatus = currentFilters.injuryStatus;
513 | 
514 |       let injuryTypes = [];
515 | 
516 |       if (['both', 'killed'].indexOf(injuryStatus) > -1) {
517 |         enableInjuryCheckboxes();
518 | 
519 |         currentFilters.injuryCheckboxes.forEach(id => {
520 |           let category = id.split('-')[0];
521 |           injuryTypes.push(`${category}_killed`);
522 |           if (injuryStatus === 'both') injuryTypes.push(`${category}_injured`);
523 |         });
524 | 
525 |         if (injuryTypes.length === 0) {
526 |           injuryTypes.push('total_killed');
527 |           if (injuryStatus === 'both') injuryTypes.push('total_injured');
528 |         }
529 |       } else {
530 |         disableInjuryCheckboxes();
531 |       }
532 | 
533 |       if (injuryStatus === '' && borough === '' && year === 0 && numVehicles === 0 && timeOfDay === '') {
534 |         filteredData = undefined;
535 |       } else {
536 |         filteredData = data.filter(row => {
537 |           if (borough && row.borough !== borough) return false;
538 | 
539 |           if (year && row.year !== year) return false;
540 | 
541 |           if (numVehicles === 3 && row.num_vehicles < 3) {
542 |             return false;
543 |           } else if (numVehicles > 0 && numVehicles < 3 && row.num_vehicles != numVehicles) {
544 |             return false;
545 |           }
546 | 
547 |           if (timeOfDay && row.time_of_day !== timeOfDay) return false;
548 | 
549 |           if (injuryStatus === 'none' && row.total_injured + row.total_killed > 0) return false;
550 | 
551 |           if (injuryTypes.length > 0 && !injuryTypes.some(key => row[key] > 0)) return false;
552 | 
553 |           return true;
554 |         });
555 |       }
556 | 
557 |       renderLayer();
558 |     }
559 | 
560 |     function setMode() {
561 |       currentMode = document.querySelector('input[name="mode"]:checked').value;
562 | 
563 |       if (currentMode === '3d') {
564 |         if (currentViewState.pitch === 0) {
565 |           currentViewState.pitch = current3dPitch;
566 |         }
567 | 
568 |         deckgl.setProps({controller: {touchRotate: true}});
569 |       } else if (currentMode === '2d') {
570 |         currentViewState.pitch = 0;
571 |         currentViewState.bearing = 0;
572 | 
573 |         deckgl.setProps({controller: {touchRotate: false}});
574 |       }
575 | 
576 |       renderLayer();
577 |     }
578 | 
579 |     let mobileTooltipTimeout;
580 | 
581 |     function updateTooltip({x, y, object}) {
582 |       let tooltip = document.getElementById('tooltip');
583 | 
584 |       if (object) {
585 |         let yearsHistogram = {},
586 |             motoristsInjured = 0,
587 |             motoristsKilled = 0,
588 |             cyclistsInjured = 0,
589 |             cyclistsKilled = 0,
590 |             pedestriansInjured = 0,
591 |             pedestriansKilled = 0;
592 | 
593 |         object.points.forEach(point => {
594 |           yearsHistogram[point.year] = yearsHistogram[point.year] || 0;
595 |           yearsHistogram[point.year] += 1;
596 | 
597 |           motoristsInjured += point.motorists_injured;
598 |           motoristsKilled += point.motorists_killed;
599 |           cyclistsInjured += point.cyclists_injured;
600 |           cyclistsKilled += point.cyclists_killed;
601 |           pedestriansInjured += point.pedestrians_injured;
602 |           pedestriansKilled += point.pedestrians_killed;
603 |         });
604 | 
605 |         let totalInjured = motoristsInjured + cyclistsInjured + pedestriansInjured;
606 |         let totalKilled = motoristsKilled + cyclistsKilled + pedestriansKilled;
607 | 
608 |         let histogramHtml = '';
609 | 
610 |         if (!currentFilters.year) {
611 |           histogramHtml = `
612 |             <tr class="year-histogram">
613 |               <td>Year</td>
614 |               <td class="numeric">Collisions</td>
615 |             </tr>
616 |           `;
617 | 
618 |           [2012, 2013, 2014, 2015, 2016, 2017, 2018].forEach(year => {
619 |             histogramHtml += `
620 |               <tr>
621 |                 <td>${year}</td>
622 |                 <td class="numeric">
623 |                   ${numberWithCommas(yearsHistogram[year] || 0)}
624 |                 </td>
625 |               </tr>
626 |             `;
627 |           });
628 |         }
629 | 
630 |         tooltip.innerHTML = `
631 |           <table>
632 |             <thead>
633 |               <tr>
634 |                 <th>Collisions</th>
635 |                 <th class="numeric">${numberWithCommas(object.points.length)}</th>
636 |                 <th class="numeric"></th>
637 |               </tr>
638 |             </thead>
639 |             <tbody>
640 |               <tr>
641 |                 <td>Type</td>
642 |                 <td class="numeric">Injured</td>
643 |                 <td class="numeric">Killed</td>
644 |               </tr>
645 |               <tr>
646 |                 <td>Motorists</td>
647 |                 <td class="numeric">${numberWithCommas(motoristsInjured)}</td>
648 |                 <td class="numeric">${numberWithCommas(motoristsKilled)}</td>
649 |               </tr>
650 |               <tr>
651 |                 <td>Cyclists</td>
652 |                 <td class="numeric">${numberWithCommas(cyclistsInjured)}</td>
653 |                 <td class="numeric">${numberWithCommas(cyclistsKilled)}</td>
654 |               </tr>
655 |               <tr>
656 |                 <td>Pedestrians</td>
657 |                 <td class="numeric">${numberWithCommas(pedestriansInjured)}</td>
658 |                 <td class="numeric">${numberWithCommas(pedestriansKilled)}</td>
659 |               </tr>
660 |               <tr class="total">
661 |                 <td>Total</td>
662 |                 <td class="numeric">${numberWithCommas(totalInjured)}</td>
663 |                 <td class="numeric">${numberWithCommas(totalKilled)}</td>
664 |               </tr>
665 | 
666 |               ${histogramHtml}
667 |             </tbody>
668 |           </table>
669 |         `;
670 | 
671 |         let ttWidth = tooltip.clientWidth;
672 |         let ttHeight = tooltip.clientHeight;
673 | 
674 |         let offsetX = Math.min(x, window.innerWidth - ttWidth - 20);
675 |         let offsetY = y + ttHeight > window.innerHeight - 50 ? y - ttHeight - 40 : y + 10;
676 | 
677 |         tooltip.style.top = `${offsetY}px`;
678 |         tooltip.style.left = `${offsetX}px`;
679 |       } else {
680 |         tooltip.innerHTML = '';
681 |       }
682 | 
683 |       if (mobile) {
684 |         clearTimeout(mobileTooltipTimeout);
685 |         mobileTooltipTimeout = setTimeout(emptyTooltip, 3000);
686 |       }
687 |     }
688 | 
689 |     function numberWithCommas(num) {
690 |       return num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ',');
691 |     }
692 | 
693 |     function hideControlPanel() {
694 |       document.getElementById('show-control-panel').classList.remove('hidden');
695 |       document.getElementById('control-panel').classList.add('hidden');
696 |     }
697 | 
698 |     function showControlPanel() {
699 |       document.getElementById('show-control-panel').classList.add('hidden');
700 |       document.getElementById('control-panel').classList.remove('hidden');
701 |     }
702 | 
703 |     function enableInjuryCheckboxes() {
704 |       document.querySelector('.injury-checkboxes').classList.remove('disabled');
705 |       document.querySelectorAll('.injury-checkboxes input').forEach(e => {
706 |         e.disabled = false;
707 |       });
708 |     }
709 | 
710 |     function disableInjuryCheckboxes() {
711 |       document.querySelector('.injury-checkboxes').classList.add('disabled');
712 |       document.querySelectorAll('.injury-checkboxes input').forEach(e => {
713 |         e.disabled = true;
714 |         e.checked = false;
715 |       });
716 |     }
717 | 
718 |     function injuryTooltip(div) {
719 |       if (!div.classList.contains('disabled')) return;
720 | 
721 |       const tooltip = document.getElementById('tooltip');
722 | 
723 |       tooltip.style.top = `${event.clientY}px`;
724 |       tooltip.style.left = `${event.clientX}px`;
725 |       tooltip.innerHTML = 'Filter by injury status to enable';
726 |     }
727 | 
728 |     function emptyTooltip() {
729 |       document.getElementById('tooltip').innerHTML = '';
730 |     }
731 | 
732 |     function buildLegend() {
733 |       let container = document.querySelector('.legend-colors');
734 | 
735 |       COLOR_RANGE.forEach(rgb => {
736 |         let div = document.createElement('div');
737 |         div.setAttribute('class', 'legend-bar');
738 |         div.setAttribute('style', `background-color: rgb(${rgb.join(',')})`);
739 |         container.appendChild(div);
740 |       });
741 |     }
742 | 
743 |     function updateLegend(domain) {
744 |       let label = {
745 |         collisions: 'Collisions',
746 |         total_injured: 'People injured',
747 |         total_killed: 'People killed',
748 |         motorists_injured: 'Motorists injured',
749 |         motorists_killed: 'Motorists killed',
750 |         cyclists_injured: 'Cyclists injured',
751 |         cyclists_killed: 'Cyclists killed',
752 |         pedestrians_injured: 'Pedestrians injured',
753 |         pedestrians_killed: 'Pedestrians killed'
754 |       }[document.getElementById('colorVariable').selectedOptions[0].value];
755 | 
756 |       document.getElementById('legend').classList.remove('hidden');
757 |       document.querySelector('.legend-min').innerHTML = numberWithCommas(domain[0]);
758 |       document.querySelector('.legend-max').innerHTML = numberWithCommas(domain[1]);
759 |       document.querySelector('.legend-label').innerHTML = label;
760 |     }
761 | 
762 |     function mobileDevice() {
763 |       return /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent);
764 |     }
765 | 
766 |     function initializeFilterInputs() {
767 |       FILTER_IDS.forEach(k => {
768 |         if (!urlParams.get(k)) return;
769 | 
770 |         if (k === 'injuryCheckboxes') {
771 |           let ids = urlParams.get(k).split(',');
772 |           ids.forEach(i => document.getElementById(i).checked = true);
773 |         } else {
774 |           document.querySelector(`#${k} option[value="${urlParams.get(k)}"]`).selected = true;
775 |         }
776 | 
777 |         updateCurrentFilters();
778 |       });
779 |     }
780 | 
781 |     function initializeLayerInputs() {
782 |       if (urlParams.has('radius')) {
783 |         document.getElementById('radius').value = urlParams.get('radius');
784 |       }
785 | 
786 |       if (urlParams.has('colorVariable')) {
787 |         document.querySelector(`#colorVariable option[value="${urlParams.get('colorVariable')}"]`).selected = true;
788 |       }
789 | 
790 |       if (urlParams.has('mode')) {
791 |         document.getElementById(`mode-${urlParams.get('mode')}`).checked = true;
792 |       }
793 |     };
794 | 
795 |     function updateUrl() {
796 |       urlParams.set('latitude', currentViewState.latitude.toFixed(3));
797 |       urlParams.set('longitude', currentViewState.longitude.toFixed(3));
798 |       urlParams.set('zoom', currentViewState.zoom.toFixed(1));
799 |       urlParams.set('pitch', currentViewState.pitch.toFixed(0));
800 |       urlParams.set('bearing', currentViewState.bearing.toFixed(0));
801 | 
802 |       Object.keys(viewStateDefaults).forEach(k => {
803 |         if (viewStateDefaults[k] === Number(urlParams.get(k))) {
804 |           urlParams.delete(k);
805 |         }
806 |       });
807 | 
808 |       FILTER_IDS.forEach(k => {
809 |         if (currentFilters[k] && k !== 'injuryCheckboxes') {
810 |           urlParams.set(k, currentFilters[k]);
811 |         } else if (currentFilters[k] && k === 'injuryCheckboxes' && currentFilters[k].length > 0) {
812 |           urlParams.set(k, currentFilters[k]);
813 |         } else {
814 |           urlParams.delete(k);
815 |         }
816 |       });
817 | 
818 |       let radiusInput = document.getElementById('radius');
819 |       if (Number(radiusInput.value) === 300) {
820 |         urlParams.delete('radius');
821 |       } else {
822 |         urlParams.set('radius', radiusInput.value);
823 |       }
824 | 
825 |       let colorVar = document.getElementById('colorVariable').selectedOptions[0].value;
826 |       if (colorVar === 'collisions') {
827 |         urlParams.delete('colorVariable');
828 |       } else {
829 |         urlParams.set('colorVariable', colorVar);
830 |       }
831 | 
832 |       if (currentMode === '2d') {
833 |         urlParams.delete('mode');
834 |       } else {
835 |         urlParams.set('mode', currentMode);
836 |       }
837 | 
838 |       let query = urlParams.toString() ? `?${urlParams}` : '';
839 |       window.history.replaceState({}, '', decodeURIComponent(`${location.pathname}${query}`));
840 |     }
841 | 
842 |     function enforeLngLatBounds() {
843 |       let lngLatValid = (
844 |         currentViewState.longitude >= LNG_LAT_BOUNDS.longitude[0] &&
845 |         currentViewState.longitude <= LNG_LAT_BOUNDS.longitude[1] &&
846 |         currentViewState.latitude >= LNG_LAT_BOUNDS.latitude[0] &&
847 |         currentViewState.latitude <= LNG_LAT_BOUNDS.latitude[1]
848 |       );
849 | 
850 |       if (lngLatValid) return;
851 | 
852 |       currentViewState.longitude = Math.min(
853 |         Math.max(currentViewState.longitude, LNG_LAT_BOUNDS.longitude[0]),
854 |         LNG_LAT_BOUNDS.longitude[1]
855 |       );
856 | 
857 |       currentViewState.latitude = Math.min(
858 |         Math.max(currentViewState.latitude, LNG_LAT_BOUNDS.latitude[0]),
859 |         LNG_LAT_BOUNDS.latitude[1]
860 |       );
861 | 
862 |       renderLayer();
863 |     }
864 |   </script>
865 | </html>
866 | 


--------------------------------------------------------------------------------
/analysis/export_data.sql:
--------------------------------------------------------------------------------
 1 | DROP TABLE IF EXISTS export_data;
 2 | 
 3 | CREATE TEMP TABLE export_data AS
 4 | WITH nv AS (
 5 |   SELECT
 6 |     collision_unique_key,
 7 |     count(*) AS num_vehicles
 8 |   FROM collisions_vehicles
 9 |   GROUP BY collision_unique_key
10 | )
11 | SELECT
12 |   extract(year FROM collision_time) AS year,
13 |   CASE borough
14 |     WHEN 'Bronx' THEN 'x'
15 |     WHEN 'Brooklyn' THEN 'k'
16 |     WHEN 'Manhattan' THEN 'm'
17 |     WHEN 'Queens' THEN 'q'
18 |     WHEN 'Staten Island' THEN 's'
19 |   END AS borough,
20 |   round(latitude, 5) AS lat,
21 |   round(longitude, 5) AS lng,
22 |   nv.num_vehicles,
23 |   number_of_motorists_injured AS motorists_injured,
24 |   number_of_motorists_killed AS motorists_killed,
25 |   number_of_cyclists_injured AS cyclists_injured,
26 |   number_of_cyclists_killed AS cyclists_killed,
27 |   number_of_pedestrians_injured AS pedestrians_injured,
28 |   number_of_pedestrians_killed AS pedestrians_killed,
29 |   CASE
30 |     WHEN extract(hour FROM collision_time) IN (8, 9, 10) THEN 'm'
31 |     WHEN extract(hour FROM collision_time) IN (11, 12, 13, 14, 15) THEN 'i'
32 |     WHEN extract(hour FROM collision_time) IN (16, 17, 18) THEN 'a'
33 |     WHEN extract(hour FROM collision_time) IN (19, 20, 21) THEN 'e'
34 |     ELSE 'o'
35 |   END AS time_of_day
36 | FROM collisions c
37 |   LEFT JOIN nv ON c.unique_key = nv.collision_unique_key
38 | WHERE latitude IS NOT NULL
39 |   AND longitude IS NOT NULL
40 |   AND latitude BETWEEN 40.4 AND 41
41 |   AND longitude BETWEEN -74.4 AND -73.5
42 | ORDER BY unique_key;
43 | 
44 | \copy (SELECT * FROM export_data) TO 'nyc_motor_vehicle_collisions.csv' CSV HEADER;
45 | 


--------------------------------------------------------------------------------
/analysis/helpers.R:
--------------------------------------------------------------------------------
 1 | required_packages = c("tidyverse", "scales", "lubridate", "RPostgres", "zoo")
 2 | installed_packages = rownames(installed.packages())
 3 | packages_to_install = required_packages[!(required_packages %in% installed_packages)]
 4 | 
 5 | if (length(packages_to_install) > 0) {
 6 |   install.packages(
 7 |     packages_to_install,
 8 |     dependencies = TRUE,
 9 |     repos = "https://cloud.r-project.org",
10 |   )
11 | }
12 | 
13 | library(tidyverse)
14 | library(scales)
15 | library(lubridate)
16 | library(zoo)
17 | library(RPostgres)
18 | 
19 | con = dbConnect(
20 |   dbDriver("Postgres"),
21 |   dbname = "nyc-motor-vehicle-collisions",
22 |   host = "localhost"
23 | )
24 | 
25 | query = function(sql) {
26 |   res = dbSendQuery(con, sql)
27 |   results = dbFetch(res) %>% as_tibble()
28 |   dbClearResult(res)
29 |   results
30 | }
31 | 
32 | capitalize_first_letter = function(string) {
33 |   paste0(toupper(substr(string, 1, 1)), substr(string, 2, nchar(string)))
34 | }
35 | 
36 | theme_void_sf = function(base_size = 12) {
37 |   theme_void(base_size = base_size) +
38 |     theme(
39 |       panel.grid = element_line(size = 0),
40 |       text = element_text(family = "Open Sans")
41 |     )
42 | }
43 | 
44 | font_family = "Open Sans"
45 | title_font_family = "Fjalla One"
46 | nypd_blue = "#00003c"
47 | 
48 | theme_tws = function(base_size = 12) {
49 |   bg_color = "#f4f4f4"
50 |   bg_rect = element_rect(fill = bg_color, color = bg_color)
51 | 
52 |   theme_bw(base_size) +
53 |     theme(
54 |       text = element_text(family = font_family),
55 |       plot.title = element_text(family = title_font_family),
56 |       plot.subtitle = element_text(size = rel(1)),
57 |       plot.caption = element_text(size = rel(0.5), margin = unit(c(1, 0, 0, 0), "lines"), lineheight = 1.1, color = "#555555"),
58 |       plot.background = bg_rect,
59 |       axis.ticks = element_blank(),
60 |       axis.text.x = element_text(size = rel(1)),
61 |       axis.title.x = element_text(size = rel(1), margin = margin(1, 0, 0, 0, unit = "lines")),
62 |       axis.text.y = element_text(size = rel(1)),
63 |       axis.title.y = element_text(size = rel(1)),
64 |       panel.background = bg_rect,
65 |       panel.border = element_blank(),
66 |       panel.grid.major = element_line(color = "grey80", size = 0.25),
67 |       panel.grid.minor = element_line(color = "grey80", size = 0.25),
68 |       panel.spacing = unit(1.5, "lines"),
69 |       legend.background = bg_rect,
70 |       legend.key.width = unit(1.5, "line"),
71 |       legend.key = element_blank(),
72 |       strip.background = element_blank()
73 |     )
74 | }
75 | 
76 | no_axis_titles = function() {
77 |   theme(axis.title = element_blank())
78 | }
79 | 
80 | # via https://stackoverflow.com/a/10559838
81 | integer_breaks = function(n = 3, ...) {
82 |   breaker = pretty_breaks(n, ...)
83 |   function(x) {
84 |      breaks = breaker(x)
85 |      breaks[breaks == floor(breaks)]
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/analysis/regression_model.R:
--------------------------------------------------------------------------------
  1 | source("helpers.R")
  2 | library(glmnet)
  3 | 
  4 | # set up regression data
  5 | regression_collisions = query("
  6 |   SELECT
  7 |     unique_key,
  8 |     extract(hour FROM collision_time) AS hour_of_day,
  9 |     extract(dow FROM collision_time) AS day_of_week,
 10 |     extract(year FROM collision_time) AS year,
 11 |     coalesce(on_street_name, off_street_name) AS street_name,
 12 |     borough,
 13 |     number_of_motorists_injured + number_of_cyclists_injured + number_of_pedestrians_injured > 0 AS has_injury,
 14 |     number_of_motorists_killed + number_of_cyclists_killed + number_of_pedestrians_killed > 0 AS has_fatality
 15 |   FROM collisions
 16 |   WHERE borough IS NOT NULL
 17 |   ORDER BY unique_key
 18 | ")
 19 | 
 20 | regression_contributing_factors = query("
 21 |   WITH candidates AS (
 22 |     SELECT contributing_factor
 23 |     FROM collisions_contributing_factors
 24 |     GROUP BY contributing_factor
 25 |     HAVING COUNT(*) >= 5000
 26 |   )
 27 |   SELECT *
 28 |   FROM collisions_contributing_factors
 29 |   WHERE contributing_factor IN (SELECT contributing_factor FROM candidates)
 30 | ")
 31 | 
 32 | regression_vehicle_types = query("
 33 |   WITH candidates AS (
 34 |     SELECT vehicle_type
 35 |     FROM collisions_vehicles
 36 |     GROUP BY vehicle_type
 37 |     HAVING COUNT(*) >= 5000
 38 |   )
 39 |   SELECT *
 40 |   FROM collisions_vehicles
 41 |   WHERE vehicle_type IN (SELECT vehicle_type FROM candidates)
 42 | ")
 43 | 
 44 | regression_vehicles_involved = query("
 45 |   SELECT collision_unique_key, count(*)::int AS num_vehicles
 46 |   FROM collisions_vehicles
 47 |   GROUP BY collision_unique_key
 48 |   ORDER BY collision_unique_key
 49 | ")
 50 | 
 51 | regression_collisions = regression_collisions %>%
 52 |   inner_join(regression_vehicles_involved, by = c("unique_key" = "collision_unique_key")) %>%
 53 |   mutate(
 54 |     hour_of_day = factor(hour_of_day, levels = c(12:23, 0:11)),
 55 |     day_of_week = factor(day_of_week),
 56 |     weekday = factor(day_of_week %in% 1:5),
 57 |     year = factor(year),
 58 |     borough = fct_relevel(factor(borough), "Manhattan"),
 59 |     num_vehicles = factor(num_vehicles, levels = c(2, 1, 3, 4, 5)),
 60 |     street_type = fct_relevel(factor(case_when(
 61 |       grepl("expressway|expy|expwy|parkway|pkwy|highway|bqe|turnpike|fdr|thruway", street_name) ~ "highway",
 62 |       grepl("street| st$", street_name) ~ "street",
 63 |       grepl("avenue|broadway|bowery| ave$", street_name) ~ "avenue",
 64 |       grepl(" road| rd$", street_name) ~ "road",
 65 |       grepl(" lane| ln$", street_name) ~ "lane",
 66 |       grepl(" drive| dr$", street_name) ~ "drive",
 67 |       grepl("boulevard|blvd", street_name) ~ "boulevard",
 68 |       grepl(" place| pl$", street_name) ~ "place",
 69 |       grepl("bridge", street_name) ~ "bridge",
 70 |       grepl("tunnel", street_name) ~ "tunnel",
 71 |       !is.na(street_name) ~ "other",
 72 |       TRUE ~ "unknown"
 73 |     )), "unknown")
 74 |   )
 75 | 
 76 | for(f in sort(unique(regression_contributing_factors$contributing_factor))) {
 77 |   fname = paste0("cf_", gsub(".", "_", make.names(f), fixed = TRUE))
 78 | 
 79 |   factor_unique_keys = regression_contributing_factors %>%
 80 |     filter(contributing_factor == f) %>%
 81 |     pull(collision_unique_key) %>%
 82 |     unique()
 83 | 
 84 |   regression_collisions = regression_collisions %>%
 85 |     mutate(!!fname := as.numeric(unique_key %in% factor_unique_keys))
 86 | }
 87 | 
 88 | for(v in sort(unique(regression_vehicle_types$vehicle_type))) {
 89 |   vname = paste0("vt_", gsub(".", "_", make.names(v), fixed = TRUE))
 90 | 
 91 |   vehicle_unique_keys = regression_vehicle_types %>%
 92 |     filter(vehicle_type == v) %>%
 93 |     pull(collision_unique_key) %>%
 94 |     unique()
 95 | 
 96 |   regression_collisions = regression_collisions %>%
 97 |     mutate(!!vname := as.numeric(unique_key %in% vehicle_unique_keys))
 98 | }
 99 | 
100 | # build model matrices
101 | injury_model_matrix = sparse.model.matrix(
102 |   has_injury ~ . - 1,
103 |   select(regression_collisions, -day_of_week, -street_name, -unique_key, -has_fatality)
104 | )
105 | 
106 | fatality_model_matrix = sparse.model.matrix(
107 |   has_fatality ~ . - 1,
108 |   select(regression_collisions, -day_of_week, -street_name, -unique_key, -has_injury)
109 | )
110 | 
111 | # run regularized regressions
112 | injury_cvfit = cv.glmnet(
113 |   x = injury_model_matrix,
114 |   y = regression_collisions$has_injury,
115 |   family = "binomial"
116 | )
117 | 
118 | fatality_cvfit = cv.glmnet(
119 |   x = fatality_model_matrix,
120 |   y = regression_collisions$has_fatality,
121 |   family = "binomial"
122 | )
123 | 
124 | # check on lambda values and coefficients
125 | plot(injury_cvfit)
126 | coef(injury_cvfit, s = "lambda.1se")
127 | 
128 | plot(fatality_cvfit)
129 | coef(fatality_cvfit, s = "lambda.1se")
130 | 


--------------------------------------------------------------------------------
/augment_data/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | ruby '2.6.6'
4 | 
5 | gem 'activerecord', '~> 5.2', require: 'active_record'
6 | gem 'pg', '~> 1.1'
7 | gem 'rest-client', '~> 2.0'
8 | 


--------------------------------------------------------------------------------
/augment_data/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     activemodel (5.2.8.1)
 5 |       activesupport (= 5.2.8.1)
 6 |     activerecord (5.2.8.1)
 7 |       activemodel (= 5.2.8.1)
 8 |       activesupport (= 5.2.8.1)
 9 |       arel (>= 9.0)
10 |     activesupport (5.2.8.1)
11 |       concurrent-ruby (~> 1.0, >= 1.0.2)
12 |       i18n (>= 0.7, < 2)
13 |       minitest (~> 5.1)
14 |       tzinfo (~> 1.1)
15 |     arel (9.0.0)
16 |     concurrent-ruby (1.1.10)
17 |     domain_name (0.5.20190701)
18 |       unf (>= 0.0.5, < 1.0.0)
19 |     http-accept (1.7.0)
20 |     http-cookie (1.0.3)
21 |       domain_name (~> 0.5)
22 |     i18n (1.11.0)
23 |       concurrent-ruby (~> 1.0)
24 |     mime-types (3.3.1)
25 |       mime-types-data (~> 3.2015)
26 |     mime-types-data (3.2020.0512)
27 |     minitest (5.16.2)
28 |     netrc (0.11.0)
29 |     pg (1.2.3)
30 |     rest-client (2.1.0)
31 |       http-accept (>= 1.7.0, < 2.0)
32 |       http-cookie (>= 1.0.2, < 2.0)
33 |       mime-types (>= 1.16, < 4.0)
34 |       netrc (~> 0.8)
35 |     thread_safe (0.3.6)
36 |     tzinfo (1.2.10)
37 |       thread_safe (~> 0.1)
38 |     unf (0.1.4)
39 |       unf_ext
40 |     unf_ext (0.0.7.7)
41 | 
42 | PLATFORMS
43 |   ruby
44 | 
45 | DEPENDENCIES
46 |   activerecord (~> 5.2)
47 |   pg (~> 1.1)
48 |   rest-client (~> 2.0)
49 | 
50 | RUBY VERSION
51 |    ruby 2.6.6p146
52 | 
53 | BUNDLED WITH
54 |    1.17.2
55 | 


--------------------------------------------------------------------------------
/augment_data/README.md:
--------------------------------------------------------------------------------
 1 | # Fill in collision records that are missing coordinates
 2 | 
 3 | ~13% of the collisions in the raw dataset are missing lat/lon coordinates. Many of those records have cross streets or addresses, which can be used to fill in coordinates with reasonable guesses. There are 2 strategies used to fill in missing coordinates: other collisions at the same cross streets, and geocoding. These processes add coordinates for ~8% of the full dataset, leaving ~5% of collisions without coordinates. The `collisions.coordinates_source` column keeps track of where each collision's coordinates came from. It's possible these augmentation processes might introduce some data errors, but an anecdotal manual review suggested that they are accurate more often than not.
 4 | 
 5 | ## Coordinates from other collisions listed at the same cross streets
 6 | 
 7 | Calculate the most common coordinates for each pair of cross streets, and use them to fill in coordinates for collisions at the same cross streets that are missing coordinates. E.g. there are collisions listed at Bruckner Boulevard & E 138 St that are missing coordinates, but there are also collisions at the same cross streets that have coordinates, so assume that the unknown Bruckner & E 138 collisions happened at the most common coordinates listed for the known Bruckner & E 138 collisions, subject there being at least 2 known collisions at the same lat/lon rounded to 4 digits.
 8 | 
 9 | `psql nyc-motor-vehicle-collisions -f augment_collisions_with_most_common_coordinates.sql`
10 | 
11 | ## Geocode missing coordinates
12 | 
13 | 1. Get a Google Maps Geocoding API key: https://developers.google.com/maps/documentation/geocoding/get-api-key
14 | 2. `psql nyc-motor-vehicle-collisions -f populate_geocodings.sql`
15 | 3. `bundle install`
16 | 4. `ruby geocode.rb --google-api-key YOUR_API_KEY_HERE`
17 | 5. `psql nyc-motor-vehicle-collisions -f augment_collisions_with_geocoding.sql`
18 | 
19 | Note that as of January 2019, Google Maps Geocoding API allows up to 40,000 requests per month for free in the United States, then charges $5 per 1,000 requests after that. See https://developers.google.com/maps/documentation/geocoding/usage-and-billing for the latest pricing. When I ran `geocode.rb` there fewer than 40,000 records to geocode, so I was able to do it for free.
20 | 
21 | ## Augmentation results
22 | 
23 | Coordinates sources with raw data through 12/31/2018:
24 | 
25 | ```sql
26 | SELECT
27 |   extract(year FROM collision_time) AS year,
28 |   COUNT(*) AS n,
29 |   ROUND(SUM(CASE WHEN coordinates_source = 'raw_data' THEN 1 END)::numeric / COUNT(*), 2) AS raw_data,
30 |   ROUND(SUM(CASE WHEN coordinates_source = 'most_common_in_raw_data' THEN 1 END)::numeric / COUNT(*), 2) AS most_common_in_raw_data,
31 |   ROUND(SUM(CASE WHEN coordinates_source = 'geocoding' THEN 1 END)::numeric / COUNT(*), 2) AS geocoding,
32 |   ROUND(SUM(CASE WHEN coordinates_source IS NULL THEN 1 END)::numeric / COUNT(*), 2) AS unknown
33 | FROM collisions
34 | GROUP BY year
35 | ORDER BY year;
36 | ```
37 | 
38 | ```sql
39 |  year |   n    | raw_data | most_common_in_raw_data | geocoding | unknown
40 | ------+--------+----------+-------------------------+-----------+---------
41 |  2012 | 100541 |     0.85 |                    0.06 |      0.03 |    0.06
42 |  2013 | 203727 |     0.84 |                    0.07 |      0.03 |    0.06
43 |  2014 | 206028 |     0.84 |                    0.07 |      0.03 |    0.06
44 |  2015 | 217693 |     0.84 |                    0.07 |      0.03 |    0.06
45 |  2016 | 229780 |     0.84 |                    0.08 |      0.03 |    0.05
46 |  2017 | 230991 |     0.94 |                    0.01 |      0.03 |    0.02
47 |  2018 | 231016 |     0.93 |                    0.01 |      0.03 |    0.03
48 | ```
49 | 


--------------------------------------------------------------------------------
/augment_data/augment_collisions_with_geocoding.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE tmp_points AS
 2 | SELECT
 3 |   id,
 4 |   ST_SetSRID(ST_MakePoint(longitude, latitude), 4326) AS location
 5 | FROM geocodings
 6 | WHERE (taxi_zone_gid IS NULL OR nyct2010_gid IS NULL)
 7 |   AND latitude IS NOT NULL
 8 |   AND longitude IS NOT NULL;
 9 | 
10 | CREATE INDEX ON tmp_points USING gist (location);
11 | 
12 | CREATE TABLE tmp_zones AS
13 | SELECT t.id, z.gid, z.borough
14 | FROM tmp_points t, taxi_zones z
15 | WHERE ST_Within(t.location, z.geom);
16 | 
17 | CREATE UNIQUE INDEX ON tmp_zones (id);
18 | 
19 | CREATE TABLE tmp_tracts AS
20 | SELECT t.id, n.gid, n.boroname
21 | FROM tmp_points t, nyct2010 n
22 | WHERE ST_Within(t.location, n.geom);
23 | 
24 | CREATE UNIQUE INDEX ON tmp_tracts (id);
25 | 
26 | UPDATE geocodings
27 | SET taxi_zone_gid = tmp_zones.gid,
28 |     borough = tmp_zones.borough
29 | FROM tmp_zones
30 | WHERE geocodings.id = tmp_zones.id;
31 | 
32 | UPDATE geocodings
33 | SET nyct2010_gid = tmp_tracts.gid,
34 |     borough = tmp_tracts.boroname
35 | FROM tmp_tracts
36 | WHERE geocodings.id = tmp_tracts.id;
37 | 
38 | WITH coords AS (
39 |   SELECT
40 |     c.unique_key,
41 |     g.latitude,
42 |     g.longitude,
43 |     g.taxi_zone_gid,
44 |     g.borough,
45 |     g.nyct2010_gid
46 |   FROM collisions c
47 |     INNER JOIN geocodings g
48 |       ON coalesce(c.on_street_name, '') = coalesce(g.on_street_name, '')
49 |       AND coalesce(c.cross_street_name, '') = coalesce(g.cross_street_name, '')
50 |       AND coalesce(c.off_street_name, '') = coalesce(g.off_street_name, '')
51 |       AND coalesce(c.reported_borough, '') = coalesce(g.reported_borough, '')
52 |   WHERE c.latitude IS NULL
53 |     AND c.longitude IS NULL
54 |     AND g.latitude IS NOT NULL
55 |     AND g.longitude IS NOT NULL
56 | )
57 | UPDATE collisions
58 | SET latitude = coords.latitude,
59 |     longitude = coords.longitude,
60 |     taxi_zone_gid = coords.taxi_zone_gid,
61 |     borough = coords.borough,
62 |     nyct2010_gid = coords.nyct2010_gid,
63 |     coordinates_source = 'geocoding'
64 | FROM coords
65 | WHERE collisions.unique_key = coords.unique_key;
66 | 
67 | DROP TABLE tmp_points;
68 | DROP TABLE tmp_zones;
69 | DROP TABLE tmp_tracts;
70 | 


--------------------------------------------------------------------------------
/augment_data/augment_collisions_with_most_common_coordinates.sql:
--------------------------------------------------------------------------------
 1 | CREATE TEMP TABLE most_common_coords AS
 2 | WITH candidates AS (
 3 |   SELECT
 4 |     on_street_name,
 5 |     cross_street_name,
 6 |     borough,
 7 |     taxi_zone_gid,
 8 |     nyct2010_gid,
 9 |     round(latitude, 4) AS lat,
10 |     round(longitude, 4) AS lng,
11 |     count(*) AS n
12 |   FROM collisions
13 |   WHERE coordinates_source = 'raw_data'
14 |     AND latitude IS NOT NULL
15 |     AND longitude IS NOT NULL
16 |     AND on_street_name IS NOT NULL
17 |     AND (
18 |       cross_street_name IS NOT NULL
19 |       OR on_street_name LIKE '% bridge'
20 |     )
21 |   GROUP BY on_street_name, cross_street_name, borough, taxi_zone_gid, nyct2010_gid, lat, lng
22 | )
23 | SELECT DISTINCT ON (on_street_name, cross_street_name) *
24 | FROM candidates
25 | WHERE n >= 2
26 | ORDER BY on_street_name, cross_street_name, n DESC;
27 | 
28 | WITH most_common AS (
29 |   SELECT
30 |     c.unique_key,
31 |     mcc.lat,
32 |     mcc.lng,
33 |     mcc.taxi_zone_gid,
34 |     mcc.borough,
35 |     mcc.nyct2010_gid
36 |   FROM collisions c
37 |     INNER JOIN most_common_coords mcc
38 |       ON c.on_street_name IS NOT NULL
39 |       AND c.on_street_name = mcc.on_street_name
40 |       AND coalesce(c.cross_street_name, '') = coalesce(mcc.cross_street_name, '')
41 |   WHERE c.latitude IS NULL
42 |     AND c.longitude IS NULL
43 | )
44 | UPDATE collisions
45 | SET latitude = most_common.lat,
46 |     longitude = most_common.lng,
47 |     taxi_zone_gid = most_common.taxi_zone_gid,
48 |     borough = most_common.borough,
49 |     nyct2010_gid = most_common.nyct2010_gid,
50 |     coordinates_source = 'most_common_in_raw_data'
51 | FROM most_common
52 | WHERE collisions.unique_key = most_common.unique_key;
53 | 
54 | DROP TABLE most_common_coords;
55 | 


--------------------------------------------------------------------------------
/augment_data/geocode.rb:
--------------------------------------------------------------------------------
  1 | require 'bundler/setup'
  2 | Bundler.require
  3 | 
  4 | ActiveRecord::Base.establish_connection(
  5 |   adapter: 'postgresql',
  6 |   host: 'localhost',
  7 |   database: 'nyc-motor-vehicle-collisions'
  8 | )
  9 | 
 10 | class Geocoding < ActiveRecord::Base
 11 |   API_BASE_URL = 'https://maps.googleapis.com/maps/api/geocode/json'
 12 |   API_KEY = Hash[*ARGV]['--google-api-key']
 13 |   NYC_BOUNDS = '40.4,-74.3|41.0,-73.65'
 14 | 
 15 |   def geocode!
 16 |     raise 'no api key' if API_KEY.blank?
 17 | 
 18 |     begin
 19 |       request = RestClient.get(API_BASE_URL, params: {
 20 |         address: address_for_geocode,
 21 |         bounds: NYC_BOUNDS,
 22 |         key: API_KEY
 23 |       })
 24 |     rescue RestClient::BadRequest
 25 |       puts "Bad request for #{id}"
 26 |       return
 27 |     end
 28 | 
 29 |     json = JSON.parse(request.body)
 30 |     result = json.dig('results', 0)
 31 | 
 32 |     if result && (result['types'] & acceptable_google_types).present?
 33 |       self.latitude = result.dig('geometry', 'location', 'lat')
 34 |       self.longitude = result.dig('geometry', 'location', 'lng')
 35 |     end
 36 | 
 37 |     self.full_response = json
 38 | 
 39 |     save!
 40 |   end
 41 | end
 42 | 
 43 | class IntersectionGeocoding < Geocoding
 44 |   scope :for_geocoding, -> { where(full_response: nil) }
 45 | 
 46 |   def address_for_geocode
 47 |     raise "#{id} missing on_street_name" if on_street_name.blank?
 48 |     raise "#{id} missing cross_street_name" if cross_street_name.blank?
 49 | 
 50 |     [
 51 |       "#{on_street_name} and #{cross_street_name}",
 52 |       reported_borough,
 53 |       'nyc'
 54 |     ].compact.join(', ').downcase
 55 |   end
 56 | 
 57 |   def acceptable_google_types
 58 |     %w(intersection)
 59 |   end
 60 | end
 61 | 
 62 | class StreetAddressGeocoding < Geocoding
 63 |   scope :for_geocoding, -> {
 64 |     where("
 65 |       full_response IS NULL
 66 |       AND off_street_name NOT LIKE '%parking lot%'
 67 |       AND off_street_name NOT LIKE '%p/l%'
 68 |       AND off_street_name NOT LIKE '%pl of%'
 69 |       AND off_street_name NOT LIKE '%muni lot%'
 70 |       AND off_street_name NOT LIKE '%driveway%'
 71 |       AND NOT (
 72 |         off_street_name ~* '^\\d+ east drive' AND reported_borough IS NULL
 73 |       )
 74 |       AND off_street_name ~* '\\d'
 75 |     ")
 76 |   }
 77 | 
 78 |   def address_for_geocode
 79 |     raise "#{id} missing off_street_name" if off_street_name.blank?
 80 | 
 81 |     [
 82 |       off_street_name.squish,
 83 |       reported_borough,
 84 |       'nyc'
 85 |     ].compact.join(', ').downcase
 86 |   end
 87 | 
 88 |   def acceptable_google_types
 89 |     %w(street_address premise)
 90 |   end
 91 | end
 92 | 
 93 | def run
 94 |   if Geocoding::API_KEY.blank?
 95 |     puts [
 96 |       'You have to specify a Google Maps Geocoding API key',
 97 |       'Usage:',
 98 |       '  ruby geocode.rb --google-api-key YOUR_API_KEY_HERE'
 99 |     ].join("\n\n")
100 | 
101 |     return
102 |   end
103 | 
104 |   [IntersectionGeocoding, StreetAddressGeocoding].each do |klass|
105 |     scope = klass.for_geocoding
106 |     puts "#{Time.now}: going to geocode #{scope.count} #{klass.name} addresses"
107 | 
108 |     scope.find_each.with_index do |g, i|
109 |       puts "#{Time.now}: done #{i} addresses" if i > 0 && i % 50 == 0
110 |       g.geocode!
111 |     end
112 | 
113 |     puts "#{Time.now}: finished #{klass.name}"
114 |   end
115 | end
116 | 
117 | run
118 | 


--------------------------------------------------------------------------------
/augment_data/populate_geocodings.sql:
--------------------------------------------------------------------------------
 1 | INSERT INTO geocodings (type, on_street_name, cross_street_name, reported_borough, n)
 2 | SELECT
 3 |   'IntersectionGeocoding'::text AS type,
 4 |   on_street_name,
 5 |   cross_street_name,
 6 |   reported_borough,
 7 |   COUNT(*) AS n
 8 | FROM collisions
 9 | WHERE latitude IS NULL
10 |   AND longitude IS NULL
11 |   AND on_street_name IS NOT NULL
12 |   AND cross_street_name IS NOT NULL
13 | GROUP BY on_street_name, cross_street_name, reported_borough
14 | ORDER BY COUNT(*) DESC, on_street_name, cross_street_name, reported_borough;
15 | 
16 | INSERT INTO geocodings (type, off_street_name, reported_borough, n)
17 | SELECT
18 |   'StreetAddressGeocoding'::text AS type,
19 |   off_street_name,
20 |   reported_borough,
21 |   COUNT(*) AS n
22 | FROM collisions
23 | WHERE latitude IS NULL
24 |   AND longitude IS NULL
25 |   AND off_street_name IS NOT NULL
26 | GROUP BY off_street_name, reported_borough
27 | ORDER BY COUNT(*) DESC, off_street_name, reported_borough;
28 | 


--------------------------------------------------------------------------------
/download_raw_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "`date`: downloading raw data"
4 | mkdir -p raw_data
5 | wget -c -O raw_data/collisions.csv https://data.cityofnewyork.us/api/views/h9gi-nx95/rows.csv?accessType=DOWNLOAD
6 | echo "`date`: done downloading raw data"
7 | 


--------------------------------------------------------------------------------
/import_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "`date`: importing raw data"
 4 | psql nyc-motor-vehicle-collisions -f setup_files/import_data.sql
 5 | echo "`date`: done importing raw data; populating vehicles and contributing factors"
 6 | psql nyc-motor-vehicle-collisions -f setup_files/populate_vehicles_and_factors.sql
 7 | echo "`date`: done populating; creating indexes"
 8 | psql nyc-motor-vehicle-collisions -f setup_files/create_indexes.sql
 9 | echo "`date`: done"
10 | 


--------------------------------------------------------------------------------
/initialize_database.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | createdb nyc-motor-vehicle-collisions
 4 | 
 5 | psql nyc-motor-vehicle-collisions -f setup_files/create_schema.sql
 6 | 
 7 | shp2pgsql -I -s 2263:4326 shapefiles/taxi_zones/taxi_zones.shp | psql -d nyc-motor-vehicle-collisions
 8 | psql nyc-motor-vehicle-collisions -c "CREATE INDEX ON taxi_zones (locationid);"
 9 | psql nyc-motor-vehicle-collisions -c "VACUUM ANALYZE taxi_zones;"
10 | 
11 | shp2pgsql -I -s 2263:4326 shapefiles/nyct2010wi_18d/nyct2010wi.shp nyct2010 | psql -d nyc-motor-vehicle-collisions
12 | psql nyc-motor-vehicle-collisions -c "VACUUM ANALYZE nyct2010;"
13 | 


--------------------------------------------------------------------------------
/setup_files/create_indexes.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX ON collisions (taxi_zone_gid);
2 | CREATE INDEX ON collisions (borough);
3 | CREATE INDEX ON collisions USING brin (collision_time) WITH (pages_per_range = 32);
4 | 


--------------------------------------------------------------------------------
/setup_files/create_schema.sql:
--------------------------------------------------------------------------------
  1 | CREATE EXTENSION postgis;
  2 | 
  3 | CREATE TABLE collisions_raw (
  4 |   date date,
  5 |   time time,
  6 |   borough text,
  7 |   zip_code text,
  8 |   latitude numeric,
  9 |   longitude numeric,
 10 |   location text,
 11 |   on_street_name text,
 12 |   cross_street_name text,
 13 |   off_street_name text,
 14 |   number_of_persons_injured integer,
 15 |   number_of_persons_killed integer,
 16 |   number_of_pedestrians_injured integer,
 17 |   number_of_pedestrians_killed integer,
 18 |   number_of_cyclists_injured integer,
 19 |   number_of_cyclists_killed integer,
 20 |   number_of_motorists_injured integer,
 21 |   number_of_motorists_killed integer,
 22 |   contributing_factor_vehicle_1 text,
 23 |   contributing_factor_vehicle_2 text,
 24 |   contributing_factor_vehicle_3 text,
 25 |   contributing_factor_vehicle_4 text,
 26 |   contributing_factor_vehicle_5 text,
 27 |   unique_key integer primary key,
 28 |   vehicle_type_code_1 text,
 29 |   vehicle_type_code_2 text,
 30 |   vehicle_type_code_3 text,
 31 |   vehicle_type_code_4 text,
 32 |   vehicle_type_code_5 text
 33 | );
 34 | 
 35 | CREATE TABLE collisions (
 36 |   unique_key integer primary key,
 37 |   collision_time timestamp without time zone,
 38 |   taxi_zone_gid integer,
 39 |   nyct2010_gid integer,
 40 |   borough text,
 41 |   reported_borough text,
 42 |   zip_code text,
 43 |   latitude numeric,
 44 |   longitude numeric,
 45 |   on_street_name text,
 46 |   cross_street_name text,
 47 |   off_street_name text,
 48 |   number_of_persons_injured integer,
 49 |   number_of_persons_killed integer,
 50 |   number_of_pedestrians_injured integer,
 51 |   number_of_pedestrians_killed integer,
 52 |   number_of_cyclists_injured integer,
 53 |   number_of_cyclists_killed integer,
 54 |   number_of_motorists_injured integer,
 55 |   number_of_motorists_killed integer,
 56 |   contributing_factor_vehicle_1 text,
 57 |   contributing_factor_vehicle_2 text,
 58 |   contributing_factor_vehicle_3 text,
 59 |   contributing_factor_vehicle_4 text,
 60 |   contributing_factor_vehicle_5 text,
 61 |   vehicle_type_code_1 text,
 62 |   vehicle_type_code_2 text,
 63 |   vehicle_type_code_3 text,
 64 |   vehicle_type_code_4 text,
 65 |   vehicle_type_code_5 text,
 66 |   coordinates_source text
 67 | );
 68 | 
 69 | CREATE TABLE collisions_vehicles (
 70 |   collision_unique_key integer not null,
 71 |   vehicle_number integer not null,
 72 |   vehicle_type text,
 73 |   primary key (collision_unique_key, vehicle_number)
 74 | );
 75 | 
 76 | CREATE TABLE collisions_contributing_factors (
 77 |   collision_unique_key integer not null,
 78 |   contributing_number integer not null,
 79 |   contributing_factor text,
 80 |   primary key (collision_unique_key, contributing_number)
 81 | );
 82 | 
 83 | CREATE TABLE geocodings (
 84 |   id serial primary key,
 85 |   type text not null,
 86 |   on_street_name text,
 87 |   cross_street_name text,
 88 |   off_street_name text,
 89 |   reported_borough text,
 90 |   n integer,
 91 |   latitude numeric,
 92 |   longitude numeric,
 93 |   full_response jsonb,
 94 |   taxi_zone_gid integer,
 95 |   nyct2010_gid integer,
 96 |   borough text
 97 | );
 98 | 
 99 | CREATE UNIQUE INDEX ON geocodings (on_street_name, cross_street_name, reported_borough) WHERE off_street_name IS NULL;
100 | CREATE UNIQUE INDEX ON geocodings (on_street_name, cross_street_name) WHERE off_street_name IS NULL AND reported_borough IS NULL;
101 | CREATE UNIQUE INDEX ON geocodings (off_street_name, reported_borough) WHERE on_street_name IS NULL;
102 | CREATE UNIQUE INDEX ON geocodings (off_street_name) WHERE on_street_name IS NULL AND reported_borough IS NULL;
103 | 


--------------------------------------------------------------------------------
/setup_files/import_data.sql:
--------------------------------------------------------------------------------
 1 | SET datestyle = 'ISO, MDY';
 2 | 
 3 | \copy collisions_raw FROM 'raw_data/collisions.csv' CSV HEADER;
 4 | 
 5 | CREATE TABLE tmp_points AS
 6 | SELECT
 7 |   unique_key,
 8 |   ST_SetSRID(ST_MakePoint(longitude, latitude), 4326) AS location
 9 | FROM collisions_raw
10 | WHERE longitude IS NOT NULL AND latitude IS NOT NULL;
11 | 
12 | CREATE INDEX ON tmp_points USING gist (location);
13 | 
14 | CREATE TABLE tmp_zones AS
15 | SELECT t.unique_key, z.gid, z.borough
16 | FROM tmp_points t, taxi_zones z
17 | WHERE ST_Within(t.location, z.geom);
18 | 
19 | CREATE UNIQUE INDEX ON tmp_zones (unique_key);
20 | 
21 | CREATE TABLE tmp_tracts AS
22 | SELECT t.unique_key, n.gid, n.boroname
23 | FROM tmp_points t, nyct2010 n
24 | WHERE ST_Within(t.location, n.geom);
25 | 
26 | CREATE UNIQUE INDEX ON tmp_tracts (unique_key);
27 | 
28 | DELETE FROM collisions
29 | WHERE unique_key IN (SELECT unique_key FROM collisions_raw);
30 | 
31 | INSERT INTO collisions (
32 |   unique_key, collision_time, taxi_zone_gid, nyct2010_gid, borough,
33 |   reported_borough, zip_code, latitude, longitude, on_street_name,
34 |   cross_street_name, off_street_name, number_of_persons_injured,
35 |   number_of_persons_killed, number_of_pedestrians_injured,
36 |   number_of_pedestrians_killed, number_of_cyclists_injured,
37 |   number_of_cyclists_killed, number_of_motorists_injured,
38 |   number_of_motorists_killed, contributing_factor_vehicle_1,
39 |   contributing_factor_vehicle_2, contributing_factor_vehicle_3,
40 |   contributing_factor_vehicle_4, contributing_factor_vehicle_5,
41 |   vehicle_type_code_1, vehicle_type_code_2, vehicle_type_code_3,
42 |   vehicle_type_code_4, vehicle_type_code_5, coordinates_source
43 | )
44 | SELECT
45 |   r.unique_key,
46 |   (r.date || ' ' || r.time)::timestamp without time zone AS collision_time,
47 |   z.gid AS taxi_zone_gid,
48 |   t.gid AS nyct2010_gid,
49 |   -- prefer borough from nyct2010 over taxi_zones because nyct2010
50 |   -- includes water areas and therefore captures bridge accidents
51 |   coalesce(t.boroname, z.borough) AS borough,
52 |   nullif(lower(trim(r.borough)), '') AS reported_borough,
53 |   nullif(trim(r.zip_code), '') AS zip_code,
54 |   nullif(r.latitude, 0) AS latitude,
55 |   nullif(r.longitude, 0) AS longitude,
56 |   nullif(lower(trim(regexp_replace(r.on_street_name, '\s+', ' ', 'g'))), '') AS on_street_name,
57 |   nullif(lower(trim(regexp_replace(r.cross_street_name, '\s+', ' ', 'g'))), '') AS cross_street_name,
58 |   nullif(lower(trim(regexp_replace(r.off_street_name, '\s+', ' ', 'g'))), '') AS off_street_name,
59 |   r.number_of_persons_injured,
60 |   r.number_of_persons_killed,
61 |   r.number_of_pedestrians_injured,
62 |   r.number_of_pedestrians_killed,
63 |   r.number_of_cyclists_injured,
64 |   r.number_of_cyclists_killed,
65 |   r.number_of_motorists_injured,
66 |   r.number_of_motorists_killed,
67 |   nullif(lower(trim(r.contributing_factor_vehicle_1)), '') AS contributing_factor_vehicle_1,
68 |   nullif(lower(trim(r.contributing_factor_vehicle_2)), '') AS contributing_factor_vehicle_2,
69 |   nullif(lower(trim(r.contributing_factor_vehicle_3)), '') AS contributing_factor_vehicle_3,
70 |   nullif(lower(trim(r.contributing_factor_vehicle_4)), '') AS contributing_factor_vehicle_4,
71 |   nullif(lower(trim(r.contributing_factor_vehicle_5)), '') AS contributing_factor_vehicle_5,
72 |   nullif(lower(trim(r.vehicle_type_code_1)), '') AS vehicle_type_code_1,
73 |   nullif(lower(trim(r.vehicle_type_code_2)), '') AS vehicle_type_code_2,
74 |   nullif(lower(trim(r.vehicle_type_code_3)), '') AS vehicle_type_code_3,
75 |   nullif(lower(trim(r.vehicle_type_code_4)), '') AS vehicle_type_code_4,
76 |   nullif(lower(trim(r.vehicle_type_code_5)), '') AS vehicle_type_code_5,
77 |   CASE
78 |     WHEN nullif(r.latitude, 0) IS NOT NULL AND nullif(r.longitude, 0) IS NOT NULL
79 |     THEN 'raw_data'
80 |   END AS coordinates_source
81 | FROM collisions_raw r
82 |   LEFT JOIN tmp_zones z ON r.unique_key = z.unique_key
83 |   LEFT JOIN tmp_tracts t ON r.unique_key = t.unique_key
84 | ORDER BY r.date, r.time;
85 | 
86 | TRUNCATE TABLE collisions_raw;
87 | DROP TABLE tmp_points;
88 | DROP TABLE tmp_zones;
89 | DROP TABLE tmp_tracts;
90 | 


--------------------------------------------------------------------------------
/setup_files/populate_vehicles_and_factors.sql:
--------------------------------------------------------------------------------
 1 | INSERT INTO collisions_vehicles
 2 |   (collision_unique_key, vehicle_number, vehicle_type)
 3 | WITH vehicles AS (
 4 |   SELECT
 5 |     unique_key,
 6 |     ARRAY[1, 2, 3, 4, 5] AS vehicle_number_array,
 7 |     ARRAY[
 8 |       vehicle_type_code_1,
 9 |       vehicle_type_code_2,
10 |       vehicle_type_code_3,
11 |       vehicle_type_code_4,
12 |       vehicle_type_code_5
13 |     ] AS vehicle_type_array
14 |   FROM collisions
15 | ),
16 | unnested AS (
17 |   SELECT
18 |     unique_key,
19 |     unnest(vehicle_number_array) AS vehicle_number,
20 |     unnest(vehicle_type_array) AS vehicle_type
21 |   FROM vehicles
22 | )
23 | SELECT unique_key, vehicle_number, vehicle_type
24 | FROM unnested
25 | WHERE vehicle_type IS NOT NULL
26 | ORDER BY unique_key, vehicle_number
27 | ON CONFLICT (collision_unique_key, vehicle_number)
28 | DO UPDATE SET vehicle_type = EXCLUDED.vehicle_type;
29 | 
30 | INSERT INTO collisions_contributing_factors
31 |   (collision_unique_key, contributing_number, contributing_factor)
32 | WITH vehicles AS (
33 |   SELECT
34 |     unique_key,
35 |     ARRAY[1, 2, 3, 4, 5] AS contributing_number_array,
36 |     ARRAY[
37 |       contributing_factor_vehicle_1,
38 |       contributing_factor_vehicle_2,
39 |       contributing_factor_vehicle_3,
40 |       contributing_factor_vehicle_4,
41 |       contributing_factor_vehicle_5
42 |     ] AS contributing_factor_array
43 |   FROM collisions
44 | ),
45 | unnested AS (
46 |   SELECT
47 |     unique_key,
48 |     unnest(contributing_number_array) AS contributing_number,
49 |     unnest(contributing_factor_array) AS contributing_factor
50 |   FROM vehicles
51 | )
52 | SELECT unique_key, contributing_number, contributing_factor
53 | FROM unnested
54 | WHERE contributing_factor IS NOT NULL
55 | ORDER BY unique_key, contributing_number
56 | ON CONFLICT (collision_unique_key, contributing_number)
57 | DO UPDATE SET contributing_factor = EXCLUDED.contributing_factor;
58 | 


--------------------------------------------------------------------------------
/shapefiles/nyct2010wi_18d/nyct2010wi.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/nyct2010wi_18d/nyct2010wi.dbf


--------------------------------------------------------------------------------
/shapefiles/nyct2010wi_18d/nyct2010wi.prj:
--------------------------------------------------------------------------------
1 | PROJCS["NAD_1983_StatePlane_New_York_Long_Island_FIPS_3104_Feet",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",984250.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-74.0],PARAMETER["Standard_Parallel_1",40.66666666666666],PARAMETER["Standard_Parallel_2",41.03333333333333],PARAMETER["Latitude_Of_Origin",40.16666666666666],UNIT["Foot_US",0.3048006096012192]]


--------------------------------------------------------------------------------
/shapefiles/nyct2010wi_18d/nyct2010wi.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/nyct2010wi_18d/nyct2010wi.shp


--------------------------------------------------------------------------------
/shapefiles/nyct2010wi_18d/nyct2010wi.shp.xml:
--------------------------------------------------------------------------------
1 | <metadata xml:lang="en"><Esri><CreaDate>20181116</CreaDate><CreaTime>08534400</CreaTime><ArcGISFormat>1.0</ArcGISFormat><ArcGISstyle>North American Profile of ISO19115 2003</ArcGISstyle><SyncOnce>FALSE</SyncOnce><DataProperties><itemProps><itemName Sync="TRUE">nyct2010wi</itemName><itemLocation><linkage Sync="FALSE">withheld</linkage><protocol Sync="TRUE">Local Area Network</protocol></itemLocation><imsContentType Sync="TRUE">002</imsContentType><nativeExtBox><westBL Sync="TRUE">912287.068787</westBL><eastBL Sync="TRUE">1067382.508423</eastBL><southBL Sync="TRUE">116411.371399</southBL><northBL Sync="TRUE">273617.843201</northBL><exTypeCode Sync="TRUE">1</exTypeCode></nativeExtBox><itemSize Sync="TRUE">0.000</itemSize></itemProps><coordRef><type Sync="TRUE">Projected</type><geogcsn Sync="TRUE">GCS_North_American_1983</geogcsn><csUnits Sync="TRUE">Linear Unit: Foot_US (0.304801)</csUnits><projcsn Sync="TRUE">NAD_1983_StatePlane_New_York_Long_Island_FIPS_3104_Feet</projcsn><peXml Sync="TRUE">&lt;ProjectedCoordinateSystem xsi:type='typens:ProjectedCoordinateSystem' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema' xmlns:typens='http://www.esri.com/schemas/ArcGIS/10.1'&gt;&lt;WKT&gt;PROJCS[&amp;quot;NAD_1983_StatePlane_New_York_Long_Island_FIPS_3104_Feet&amp;quot;,GEOGCS[&amp;quot;GCS_North_American_1983&amp;quot;,DATUM[&amp;quot;D_North_American_1983&amp;quot;,SPHEROID[&amp;quot;GRS_1980&amp;quot;,6378137.0,298.257222101]],PRIMEM[&amp;quot;Greenwich&amp;quot;,0.0],UNIT[&amp;quot;Degree&amp;quot;,0.0174532925199433]],PROJECTION[&amp;quot;Lambert_Conformal_Conic&amp;quot;],PARAMETER[&amp;quot;False_Easting&amp;quot;,984250.0],PARAMETER[&amp;quot;False_Northing&amp;quot;,0.0],PARAMETER[&amp;quot;Central_Meridian&amp;quot;,-74.0],PARAMETER[&amp;quot;Standard_Parallel_1&amp;quot;,40.66666666666666],PARAMETER[&amp;quot;Standard_Parallel_2&amp;quot;,41.03333333333333],PARAMETER[&amp;quot;Latitude_Of_Origin&amp;quot;,40.16666666666666],UNIT[&amp;quot;Foot_US&amp;quot;,0.3048006096012192],AUTHORITY[&amp;quot;EPSG&amp;quot;,2263]]&lt;/WKT&gt;&lt;XOrigin&gt;-120039300&lt;/XOrigin&gt;&lt;YOrigin&gt;-96540300&lt;/YOrigin&gt;&lt;XYScale&gt;37212589.015695661&lt;/XYScale&gt;&lt;ZOrigin&gt;-100000&lt;/ZOrigin&gt;&lt;ZScale&gt;10000&lt;/ZScale&gt;&lt;MOrigin&gt;-100000&lt;/MOrigin&gt;&lt;MScale&gt;10000&lt;/MScale&gt;&lt;XYTolerance&gt;0.0032808333333333331&lt;/XYTolerance&gt;&lt;ZTolerance&gt;0.001&lt;/ZTolerance&gt;&lt;MTolerance&gt;0.001&lt;/MTolerance&gt;&lt;HighPrecision&gt;true&lt;/HighPrecision&gt;&lt;WKID&gt;102718&lt;/WKID&gt;&lt;LatestWKID&gt;2263&lt;/LatestWKID&gt;&lt;/ProjectedCoordinateSystem&gt;</peXml></coordRef><lineage><Process ToolSource="c:\program files (x86)\arcgis\desktop10.2\ArcToolbox\Toolboxes\Analysis Tools.tbx\Select" Date="20181116" Time="090538">Select c:\temp\BYTES_GP\Districts.gdb\nyct2010wi C:\temp\18D\GIS_OUTPUT\Districts\shp\nyct2010wi__Iteration.shp "ObjectID&gt;=0 AND ObjectID&lt;3000"</Process><Process ToolSource="c:\program files (x86)\arcgis\desktop10.2\ArcToolbox\Toolboxes\Analysis Tools.tbx\Select" Date="20181116" Time="090539">Select C:\temp\18D\GIS_OUTPUT\Districts\shp\nyct2010wi__Iteration.shp C:\temp\18D\GIS_OUTPUT\Districts\shp\nyct2010wi.shp #</Process></lineage></DataProperties><SyncDate>20181116</SyncDate><SyncTime>09053900</SyncTime><ModDate>20181116</ModDate><ModTime>09053900</ModTime><scaleRange><minScale>150000000</minScale><maxScale>5000</maxScale></scaleRange></Esri><mdLang><languageCode value="eng"/><countryCode value="USA" Sync="TRUE"/></mdLang><mdChar><CharSetCd value="004"/></mdChar><mdHrLv><ScopeCd value="005"/></mdHrLv><mdContact><rpOrgName>New York City Department of City Planning</rpOrgName><rpCntInfo><cntAddress addressType="both"><delPoint>120 Broadway, 31st Floor</delPoint><city>New York</city><adminArea>New York</adminArea><postCode>10271</postCode><country>US</country></cntAddress></rpCntInfo><role><RoleCd value="007"/></role></mdContact><distInfo><distributor><distorCont><rpOrgName>New York City Department of City Planning</rpOrgName><rpCntInfo><cntAddress addressType="both"><delPoint>120 Broadway, 31st Floor</delPoint><city>New York</city><adminArea>New York</adminArea><postCode>10271</postCode><country>US</country></cntAddress><cntInstr>Available at the following website: https://www1.nyc.gov/site/planning/data-maps/open-data.page</cntInstr></rpCntInfo><role><RoleCd value="005"/></role></distorCont><distorOrdPrc><resFees>Free</resFees></distorOrdPrc></distributor><distTranOps><onLineSrc><linkage>https://www1.nyc.gov/site/planning/data-maps/open-data.page</linkage></onLineSrc><transSize Sync="TRUE">0.000</transSize></distTranOps><distFormat><formatName Sync="FALSE">ESRI Shapefile</formatName><formatVer>Open Specification</formatVer></distFormat></distInfo><dataIdInfo><idCitation><resTitle Sync="FALSE">New York City Census Tracts for 2010 US Census Water Included</resTitle><resEd>18D</resEd><citRespParty><rpOrgName>New York City Department of City Planning</rpOrgName><role><RoleCd value="006"/></role></citRespParty><citRespParty><rpOrgName>New York City Department of City Planning</rpOrgName><role><RoleCd value="010"/></role></citRespParty><datasetSeries><seriesName>BYTES of the BIG APPLE</seriesName><issId>18D</issId></datasetSeries><date><createDate>10/23/2018 8:49:29 AM</createDate><pubDate>11/19/2018 8:49:49 AM</pubDate></date><presForm><PresFormCd value="005" Sync="TRUE"/></presForm></idCitation><idAbs>&lt;DIV STYLE="text-align:Left;"&gt;&lt;DIV&gt;&lt;P&gt;&lt;SPAN&gt;The Census Tracts for the 2010 US Census. These boundary files are derived from the US Census Bureau's TIGER project and have been geographically modified to fit the New York City base map.&lt;/SPAN&gt;&lt;/P&gt;&lt;/DIV&gt;&lt;/DIV&gt;</idAbs><idPurp>These districts were created by the Department of City Planning to aid city agencies in administering public services.</idPurp><idStatus><ProgCd value="001"/></idStatus><idPoC><rpOrgName>New York City Department of City Planning</rpOrgName><rpCntInfo><cntAddress addressType="both"><delPoint>120 Broadway, 31st Floor</delPoint><city>New York</city><adminArea>NY</adminArea><postCode>10271</postCode><country>US</country></cntAddress></rpCntInfo><role><RoleCd value="007"/></role></idPoC><resMaint><maintFreq><MaintFreqCd value="006"/></maintFreq></resMaint><placeKeys><keyword>New York</keyword><keyword>Manhattan</keyword><keyword>Queens</keyword><keyword>Brooklyn</keyword><keyword>Richmond</keyword><keyword>Bronx</keyword><keyword>Kings</keyword><keyword>Staten Island</keyword><keyword>New York City</keyword></placeKeys><themeKeys><keyword>boundaries</keyword><keyword>boundary</keyword><keyword>borough</keyword></themeKeys><searchKeys><keyword>New York</keyword><keyword>Manhattan</keyword><keyword>Queens</keyword><keyword>Brooklyn</keyword><keyword>Richmond</keyword><keyword>boundaries</keyword><keyword>Bronx</keyword><keyword>Kings</keyword><keyword>boundary</keyword><keyword>borough</keyword><keyword>Staten Island</keyword><keyword>New York City</keyword></searchKeys><resConst><LegConsts><othConsts>The data is freely available to all New York City agencies and the public.</othConsts></LegConsts></resConst><resConst><SecConsts><class><ClasscationCd value="001"/></class></SecConsts></resConst><resConst><Consts><useLimit>&lt;DIV STYLE="text-align:Left;"&gt;&lt;P&gt;&lt;SPAN&gt;This dataset is being provided by the Department of City Planning (DCP) on DCP’s website for informational purposes only. DCP does not warranty the completeness, accuracy, content, or fitness for any particular purpose or use of the dataset, nor are any such warranties to be implied or inferred with respect to the dataset as furnished on the website. DCP and the City are not liable for any deficiencies in the completeness, accuracy, content, or fitness for any particular purpose or use the dataset, or applications utilizing the dataset, provided by any third party.&lt;/SPAN&gt;&lt;/P&gt;&lt;/DIV&gt;</useLimit></Consts></resConst><dataLang><languageCode value="eng"/><countryCode value="USA" Sync="TRUE"/></dataLang><dataExt><exDesc>ground condition</exDesc></dataExt><idCredit>Department of City Planning.</idCredit><tpCat><TopicCatCd value="003"/></tpCat><envirDesc Sync="TRUE">Microsoft Windows 7 Version 6.1 (Build 7601) Service Pack 1; Esri ArcGIS 10.2.0.3348</envirDesc><spatRpType><SpatRepTypCd value="001" Sync="TRUE"/></spatRpType><dataExt><geoEle><GeoBndBox esriExtentType="search"><exTypeCode Sync="TRUE">1</exTypeCode><westBL Sync="TRUE">-74.260380</westBL><eastBL Sync="TRUE">-73.699206</eastBL><northBL Sync="TRUE">40.917691</northBL><southBL Sync="TRUE">40.485808</southBL></GeoBndBox></geoEle></dataExt></dataIdInfo><mdMaint><maintFreq><MaintFreqCd value="006"/></maintFreq></mdMaint><dqInfo><dqScope><scpLvl><ScopeCd value="005"/></scpLvl></dqScope><report type="DQCompOm"><measDesc>These data are accurate as of US Census 2010.</measDesc></report><report dimension="horizontal" type="DQAbsExtPosAcc"><measDesc>The District files are created from the same release version of the Department of City Planning LION file. The LION file is spatially aligned with NYCMap aerial photography.</measDesc></report></dqInfo><eainfo Sync="FALSE"><detailed Name="nyct2010wi"><enttyp><enttypl Sync="TRUE">nyct2010wi</enttypl><enttypt Sync="TRUE">Feature Class</enttypt><enttypc Sync="TRUE">0</enttypc></enttyp><attr><attrlabl Sync="TRUE">FID</attrlabl><attalias Sync="TRUE">FID</attalias><attrtype Sync="TRUE">OID</attrtype><attwidth Sync="TRUE">4</attwidth><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale><attrdef Sync="TRUE">Internal feature number.</attrdef><attrdefs Sync="TRUE">Esri</attrdefs><attrdomv><udom Sync="TRUE">Sequential unique whole numbers that are automatically generated.</udom></attrdomv></attr><attr><attrlabl>CTLabel</attrlabl><attalias Sync="TRUE">CTLabel</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">7</attwidth><attrdef>The census tract identifier for the polygon. Each census tract number is unique to its borough.</attrdef><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl>BoroCode</attrlabl><attalias Sync="TRUE">BoroCode</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">1</attwidth><attrdef>Borough of census tract.</attrdef><attrdomv><edom><edomv>1</edomv><edomvd>Manhattan</edomvd></edom><edom><edomv>2</edomv><edomvd>Bronx</edomvd></edom><edom><edomv>3</edomv><edomvd>Brooklyn</edomvd></edom><edom><edomv>4</edomv><edomvd>Queens</edomvd></edom><edom><edomv>5</edomv><edomvd>Staten Island</edomvd></edom></attrdomv><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl>BoroName</attrlabl><attalias Sync="TRUE">BoroName</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">32</attwidth><attrdef>Borough Name.</attrdef><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl>CDEligibil</attrlabl><attalias Sync="TRUE">CDEligibil</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">1</attwidth><attrdef>Community Development Block Grant Eligibility.</attrdef><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl>CT2010</attrlabl><attalias Sync="TRUE">CT2010</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">6</attwidth><attrdef>String value of the census tract number.</attrdef><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl>BoroCT2010</attrlabl><attalias Sync="TRUE">BoroCT2010</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">7</attwidth><attrdef>Merged string of borough code and census tract number.</attrdef><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl>PUMA</attrlabl><attalias Sync="TRUE">PUMA</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">4</attwidth><attrdef>Public Use Microdata Areas</attrdef><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl Sync="TRUE">Shape</attrlabl><attalias Sync="TRUE">Shape</attalias><attrtype Sync="TRUE">Geometry</attrtype><attwidth Sync="TRUE">0</attwidth><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale><attrdef Sync="TRUE">Feature geometry.</attrdef><attrdefs Sync="TRUE">ESRI</attrdefs><attrdomv><udom Sync="TRUE">Coordinates defining the features.</udom></attrdomv></attr><attr><attrlabl Sync="TRUE">SHAPE_Area</attrlabl><attalias Sync="TRUE">Shape_Area</attalias><attrtype Sync="TRUE">Double</attrtype><attwidth Sync="TRUE">19</attwidth><attrdef Sync="TRUE">Area of feature in internal units squared.</attrdef><attrdefs Sync="TRUE">ESRI</attrdefs><attrdomv><udom Sync="TRUE">Positive real numbers that are automatically generated.</udom></attrdomv><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl Sync="TRUE">NTACode</attrlabl><attalias Sync="TRUE">NTACode</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">4</attwidth><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale><attrdef>Neighborhood Tabulation Area Code</attrdef></attr><attr><attrlabl Sync="TRUE">Shape_Leng</attrlabl><attalias Sync="TRUE">Shape_Leng</attalias><attrtype Sync="TRUE">Double</attrtype><attwidth Sync="TRUE">19</attwidth><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl Sync="TRUE">NTAName</attrlabl><attalias Sync="TRUE">NTAName</attalias><attrtype Sync="TRUE">String</attrtype><attwidth Sync="TRUE">75</attwidth><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale><attrdef>Neighborhood Tabulation Area Name</attrdef></attr></detailed></eainfo><mdHrLvName Sync="TRUE">dataset</mdHrLvName><refSysInfo><RefSystem><refSysID><identCode code="2263" Sync="TRUE"/><idCodeSpace Sync="TRUE">EPSG</idCodeSpace><idVersion Sync="TRUE">8.1.1</idVersion></refSysID></RefSystem></refSysInfo><spatRepInfo><VectSpatRep><geometObjs Name="nyct2010wi"><geoObjTyp><GeoObjTypCd value="002" Sync="TRUE"></GeoObjTypCd></geoObjTyp><geoObjCnt Sync="TRUE">0</geoObjCnt></geometObjs><topLvl><TopoLevCd value="001" Sync="TRUE"></TopoLevCd></topLvl></VectSpatRep></spatRepInfo><spdoinfo><ptvctinf><esriterm Name="nyct2010wi"><efeatyp Sync="TRUE">Simple</efeatyp><efeageom code="4" Sync="TRUE"></efeageom><esritopo Sync="TRUE">FALSE</esritopo><efeacnt Sync="TRUE">0</efeacnt><spindex Sync="TRUE">FALSE</spindex><linrefer Sync="TRUE">FALSE</linrefer></esriterm></ptvctinf></spdoinfo><mdDateSt Sync="TRUE">20181116</mdDateSt></metadata>
2 | 


--------------------------------------------------------------------------------
/shapefiles/nyct2010wi_18d/nyct2010wi.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/nyct2010wi_18d/nyct2010wi.shx


--------------------------------------------------------------------------------
/shapefiles/taxi_zones/taxi_zones.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/taxi_zones/taxi_zones.dbf


--------------------------------------------------------------------------------
/shapefiles/taxi_zones/taxi_zones.prj:
--------------------------------------------------------------------------------
1 | PROJCS["NAD_1983_StatePlane_New_York_Long_Island_FIPS_3104_Feet",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",984250.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-74.0],PARAMETER["Standard_Parallel_1",40.66666666666666],PARAMETER["Standard_Parallel_2",41.03333333333333],PARAMETER["Latitude_Of_Origin",40.16666666666666],UNIT["Foot_US",0.3048006096012192]]


--------------------------------------------------------------------------------
/shapefiles/taxi_zones/taxi_zones.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/taxi_zones/taxi_zones.sbn


--------------------------------------------------------------------------------
/shapefiles/taxi_zones/taxi_zones.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/taxi_zones/taxi_zones.sbx


--------------------------------------------------------------------------------
/shapefiles/taxi_zones/taxi_zones.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/taxi_zones/taxi_zones.shp


--------------------------------------------------------------------------------
/shapefiles/taxi_zones/taxi_zones.shp.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <metadata xml:lang="en"><Esri><CreaDate>20150921</CreaDate><CreaTime>14042600</CreaTime><ArcGISFormat>1.0</ArcGISFormat><SyncOnce>TRUE</SyncOnce><DataProperties><lineage><Process ToolSource="c:\program files (x86)\arcgis\desktop10.1\ArcToolbox\Toolboxes\Data Management Tools.tbx\CalculateField" Date="20150921" Time="140309">CalculateField taxi_zones zone [Taxi_zone] VB #</Process><Process ToolSource="c:\program files (x86)\arcgis\desktop10.1\ArcToolbox\Toolboxes\Data Management Tools.tbx\CalculateField" Date="20150921" Time="140325">CalculateField taxi_zones LocationID [taxi_zone_] VB #</Process><Process ToolSource="c:\program files (x86)\arcgis\desktop10.1\ArcToolbox\Toolboxes\Data Management Tools.tbx\CalculateField" Date="20150921" Time="140338">CalculateField taxi_zones borough [FIRST_Boro] VB #</Process></lineage></DataProperties></Esri></metadata>
3 | 


--------------------------------------------------------------------------------
/shapefiles/taxi_zones/taxi_zones.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toddwschneider/nyc-motor-vehicle-collisions/42ccff38b4d9cb5ae2b26e988bf36a18c23ed96d/shapefiles/taxi_zones/taxi_zones.shx


--------------------------------------------------------------------------------