├── .DS_Store ├── .Rhistory ├── .gitignore ├── LICENSE ├── README.md ├── address_data ├── .DS_Store ├── formatted │ ├── oak_liq_census_format.csv │ ├── oak_liq_gearth_format.csv │ ├── oak_liq_gearth_format.kdx │ ├── oak_liq_gfusion_format.csv │ ├── oak_liq_w_ids.csv │ ├── oak_liq_w_ids_types_headers.csv │ ├── oak_liq_w_ids_types_headers_copy.csv │ └── oak_liq_w_ids_types_headers_geocoded.csv ├── oak_liquor_stores.csv ├── sample │ ├── data_copy.csv │ ├── geocoded_output.csv │ ├── oak_liq_stores_raw.csv │ ├── sample_10_addresses.csv │ ├── sample_10_addresses_geocoded.csv │ ├── sample_2500_addresses.csv │ ├── sample_2500_addresses_geocoded.csv │ ├── sample_5k_addresses.csv │ ├── sample_5k_addresses_geocoded.csv │ └── sample_cal_parcels_all_v2.csv └── trulia_avgprice_bystate_2017.csv ├── draft ├── rgeodocoding2.Rmd └── rgeodocoding2.html ├── output ├── address_data_geocoded2.csv ├── address_data_geocoded_esri.csv ├── address_data_geocoded_google.csv ├── geocoded_addresses_out.csv ├── geocoded_addresses_single_out.csv ├── tracts2010.dbf ├── tracts2010.prj ├── tracts2010.shp └── tracts2010.shx ├── rgeocoding-slides.Rmd ├── rgeocoding-slides.html ├── rgeocoding.Rmd ├── rgeocoding.html ├── screenshots ├── .DS_Store ├── addresses1.png ├── census_api_key_apply.png ├── census_geo.png ├── dual_address_match.png ├── esri_wgs_token.png ├── fcc_api.png ├── fips_code.png ├── geocode_details1.png ├── geocoding_details1.png ├── geocoding_details2.png ├── ggmap_geocode_help.png ├── ggmap_plot1.png ├── gmap_barrows.png ├── google_limits.png ├── output_compare.png ├── popdens.png ├── ref_data_quality.png ├── social_explorer.png ├── ydn_boss_placefinder.png ├── ydn_create_application.png ├── ydn_keys.png ├── ydn_landing.png ├── ydn_signup.png └── ydn_usage_limits.png ├── scripts ├── .DS_Store ├── .Rapp.history ├── draft │ └── tiger_geocoding.R ├── esri_wgs_geocoding.R ├── fcc_latlon2fips.R ├── google_geocoding_ggmap.R ├── google_geocoding_ggmap_v2.R ├── oakland_liquor_stores.csv └── older_scripts │ ├── census_geocoding_batch.R │ ├── census_geocoding_batch_v2.R │ ├── census_geocoding_single_address.R │ ├── esri_wgs_geocoding.R │ ├── geocode_it.R │ ├── getFipsForPoints.R │ ├── ggmap_geocoding_examples.R │ ├── ggmap_google.R │ ├── google_batch_big.R │ ├── google_geocode_in_limits.R │ ├── spatial_analysis_examples.R │ ├── tiger_geocoding.R │ ├── tiger_geocoding_batch.R │ ├── tiger_one_at_a_time.R │ ├── tigris_acs_census.R │ └── yahoo_geocoding.R ├── shapefiles ├── .DS_Store ├── AlamedaCommunityCollegeDistricts │ ├── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.dbf │ ├── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.prj │ ├── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shp │ └── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shx └── AlamedaCountySchools │ ├── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.dbf │ ├── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.prj │ ├── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shp │ └── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shx └── tiger ├── test_out.txt ├── test_out2.txt └── tiger_12addresses_to_geocode.csv /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/.DS_Store -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | head(address_data) 2 | pop2$pct_under18 <- round((pop2$under18 / pop2$totpop) * 100, 1) 3 | address_data2 <- merge(address_data, pop2, by="GEOID10", all.x=T) 4 | # Take a look - what do you think? 5 | head(address_data2) 6 | head(address_data) 7 | pop2$pct_under18 <- round((pop2$under18 / pop2$totpop) * 100, 1) 8 | address_data2 <- merge(address_data, pop2, by="GEOID10", all.x=T) 9 | # Take a look - what do you think? 10 | head(address_data2) 11 | tracts2010 <- merge(tracts2010, pop2, by="GEOID10") 12 | quantColors <- colorQuantile("Reds", tracts2010$pct_under18, n=5) 13 | point_map <- leaflet() %>% 14 | addTiles() %>% 15 | addPolygons(data=tracts2010, 16 | color="white", 17 | weight=1, 18 | opacity=0.5, 19 | fillColor= ~quantColors(pct_under18), 20 | fillOpacity = 0.75, 21 | popup = paste0("Percent under 18: ", tracts2010$pct_under18, "%")) %>% 22 | addMarkers(data=address_data, lat=~glat, lng=~glon, 23 | popup=(paste0(address_data$name, "
", 24 | address_data$full_address) 25 | ) 26 | ) 27 | point_map 28 | library(crosstalk) 29 | address_data2$sview <- paste0("http://maps.googleapis.com/maps/api/streetview?size=250x190&location=",address_data2$glat,",",address_data2$glon,"&sensor=false&fov=110") 30 | address_data2$popup_content <- paste("Name:", address_data2$name,"
", 31 | "Address: ", address_data2$full_address, "
", 32 | "Percent Under 18: ", address_data2$pct_under18, "
", 33 | "" 34 | ) 35 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 36 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_under18, n=5) 37 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 38 | addPolygons(data=tracts2010, 39 | color="white", 40 | weight=1, 41 | opacity=0.5, 42 | fillColor=~quantColors(pct_under18), 43 | fillOpacity = 0.65, 44 | popup = paste0(tracts2010$pct_under18, "% under 18"), 45 | group="Percent Under 18") %>% 46 | addMarkers(group="Liquor Stores", popup=~popup_content) %>% 47 | addLayersControl( 48 | overlayGroups = c("Liquor Stores","Percent Under 18"), 49 | options = layersControlOptions(collapsed = FALSE) 50 | ) 51 | library(ggmap) 52 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding") 53 | #mykey <- "AIzaSyxxxxxxxxxxxxxxxxxxxxxxxxxOQyOFWrTw" 54 | mykey <- "AIzaSyDf-SZG8O4hj1c06VQ-k6hkBrOQyOFWrTw" 55 | register_google(key=mykey) 56 | # File of addresses 57 | address_data <- read.csv("address_data/oak_liquor_stores.csv", stringsAsFactors = F) 58 | # Take a look 59 | head(address_data) 60 | # Full addres format: "2625 Dana St, Berkeley CA, 94704" 61 | address_data$full_address <- paste0(address_data$street, ", " , 62 | address_data$city, ", " , 63 | address_data$state, " ", 64 | address_data$zip) 65 | # Take a look 66 | head(address_data, 3) 67 | head(google_geocoded) 68 | source("./scripts/fcc_latlon2fips.R") 69 | # test one coordinate pair 70 | latlon2fips(latitude=37.852562, longitude=-122.273634) 71 | View(pop_acs5_2016) 72 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F) 73 | writeOGR(tracts2010, "tracts2010.shp") 74 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F) 75 | writeOGR(tracts2010, "tracts2010.shp", driver="ESRI Shapefile") 76 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F) 77 | writeOGR(tracts2010, layer="tracts2010", driver="ESRI Shapefile") 78 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F) 79 | writeOGR(tracts2010, dsn=".", layer="tracts2010", driver="ESRI Shapefile") 80 | address_data2 < read.csv("address_data_geocoded2.csv", stringsAsFactors = F) 81 | address_data2 < read.csv("address_data_geocoded2.csv", stringsAsFactors = F) 82 | address_data2 <- read.csv("address_data_geocoded2.csv", stringsAsFactors = F) 83 | address_data2 <- read.csv("address_data_geocoded2.csv", stringsAsFactors = F) 84 | tracts2010 <- readOGR(dsn=".",layer="tracts2010") 85 | address_data2 <- read.csv("address_data_geocoded2.csv", stringsAsFactors = F) 86 | tracts2010 <- readOGR(dsn=".",layer="tracts2010") 87 | address_data2$sview <- paste0("http://maps.googleapis.com/maps/api/streetview?size=250x190&location=",address_data2$glat,",",address_data2$glon,"&sensor=false&fov=110") 88 | address_data2$popup_content <- paste("Name:", address_data2$name,"
", 89 | "Address: ", address_data2$full_address, "
", 90 | "Percent Under 18: ", address_data2$pct_under18, "
", 91 | "" 92 | ) 93 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 94 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_under18, n=5) 95 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 96 | addPolygons(data=tracts2010, 97 | color="white", 98 | weight=1, 99 | opacity=0.5, 100 | fillColor=~quantColors(pct_under18), 101 | fillOpacity = 0.65, 102 | popup = paste0(tracts2010$pct_under18, "% under 18"), 103 | group="Percent Under 18") %>% 104 | addMarkers(group="Liquor Stores", popup=~popup_content) %>% 105 | addLayersControl( 106 | overlayGroups = c("Liquor Stores","Percent Under 18"), 107 | options = layersControlOptions(collapsed = FALSE) 108 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 109 | addPolygons(data=tracts2010, 110 | color="white", 111 | weight=1, 112 | opacity=0.5, 113 | fillColor=~quantColors(pct_under18), 114 | fillOpacity = 0.65, 115 | popup = paste0(tracts2010$pct_under18, "% under 18"), 116 | group="Percent Under 18") %>% 117 | addMarkers(group="Liquor Stores", popup=~popup_content) %>% 118 | addLayersControl( 119 | overlayGroups = c("Liquor Stores","Percent Under 18"), 120 | options = layersControlOptions(collapsed = FALSE) 121 | ) 122 | View(tracts2010) 123 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 124 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5) 125 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 126 | addPolygons(data=tracts2010, 127 | color="white", 128 | weight=1, 129 | opacity=0.5, 130 | fillColor=~quantColors(pct_n18), 131 | fillOpacity = 0.65, 132 | popup = paste0(tracts2010$pct_n18, "% under 18"), 133 | group="Percent Under 18") %>% 134 | addMarkers(group="Liquor Stores", popup=~popup_content) %>% 135 | addLayersControl( 136 | overlayGroups = c("Liquor Stores","Percent Under 18"), 137 | options = layersControlOptions(collapsed = FALSE) 138 | ) 139 | bscols( widths = c(9,3), 140 | map, 141 | list( 142 | filter_select("name", "Store:", shared_df, ~name), 143 | filter_select("pct_under18", "Percent under 18", shared_df, ~pct_under18) 144 | ) 145 | ) 146 | getwd() 147 | write.csv(google_geocoded,file="output/address_data_geocoded_google.csv", 148 | row.names=FALSE) 149 | write.csv(esri_geocoded,file="output/address_data_geocoded_esri.csv", 150 | row.names=FALSE) 151 | write.csv(address_data2, file="output/address_data_geocoded2.csv", row.names=F) 152 | writeOGR(tracts2010, dsn="./output", layer="tracts2010", driver="ESRI Shapefile") 153 | library(crosstalk) 154 | library(tidyverse) 155 | library(leaflet) 156 | library(rgdal) 157 | library(sp) 158 | #library(DT) 159 | address_data2 <- read.csv("output/address_data_geocoded2.csv" ) 160 | =tracts2010 <- readOGR(dsn="./output",layer="tracts2010") 161 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 162 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5) 163 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 164 | addPolygons(data=tracts2010, 165 | color="white", 166 | weight=1, 167 | opacity=0.5, 168 | fillColor=~quantColors(pct_n18), 169 | fillOpacity = 0.65, 170 | popup = paste0(tracts2010$pct_n18, "% under 18"), 171 | group="Percent Under 18") %>% 172 | addMarkers(group="Liquor Stores", popup=~popup_content) %>% 173 | addLayersControl( 174 | overlayGroups = c("Liquor Stores","Percent Under 18"), 175 | options = layersControlOptions(collapsed = FALSE) 176 | ) 177 | library(crosstalk) 178 | library(tidyverse) 179 | library(leaflet) 180 | library(rgdal) 181 | library(sp) 182 | #library(DT) 183 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F) 184 | =tracts2010 <- readOGR(dsn="./output",layer="tracts2010") 185 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding") 186 | library(crosstalk) 187 | library(tidyverse) 188 | library(leaflet) 189 | library(rgdal) 190 | library(sp) 191 | #library(DT) 192 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F) 193 | =tracts2010 <- readOGR(dsn="output",layer="tracts2010") 194 | library(crosstalk) 195 | library(tidyverse) 196 | library(leaflet) 197 | library(rgdal) 198 | library(sp) 199 | #library(DT) 200 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F) 201 | tracts2010 <- readOGR(dsn="output",layer="tracts2010") 202 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 203 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5) 204 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 205 | addPolygons(data=tracts2010, 206 | color="white", 207 | weight=1, 208 | opacity=0.5, 209 | fillColor=~quantColors(pct_n18), 210 | fillOpacity = 0.65, 211 | popup = paste0(tracts2010$pct_n18, "% under 18"), 212 | group="Percent Under 18") %>% 213 | addMarkers(group="Liquor Stores", popup=~popup_content) %>% 214 | addLayersControl( 215 | overlayGroups = c("Liquor Stores","Percent Under 18"), 216 | options = layersControlOptions(collapsed = FALSE) 217 | ) 218 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 219 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5) 220 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 221 | addPolygons(data=tracts2010, 222 | color="white", 223 | weight=1, 224 | opacity=0.5, 225 | fillColor=~quantColors(pct_n18), 226 | fillOpacity = 0.65, 227 | popup = paste0(tracts2010$pct_n18, "% under 18"), 228 | group="Percent Under 18") %>% 229 | addMarkers(group="Liquor Stores") %>% 230 | addLayersControl( 231 | overlayGroups = c("Liquor Stores","Percent Under 18"), 232 | options = layersControlOptions(collapsed = FALSE) 233 | ) 234 | bscols( widths = c(9,3), 235 | map, 236 | list( 237 | filter_select("name", "Store:", shared_df, ~name), 238 | # Create a filter input 239 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.5, width=250) 240 | ) 241 | ) 242 | bscols( widths = c(9,3), 243 | map, 244 | list( 245 | filter_select("name", "Store:", shared_df, ~name), 246 | # Create a filter input 247 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.15, width=250) 248 | ) 249 | ) 250 | bscols( widths = c(9,3), 251 | map, 252 | list( 253 | filter_select("name", "Store:", shared_df, ~name), 254 | # Create a filter input 255 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.1, width=250) 256 | ) 257 | ) 258 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 259 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5) 260 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 261 | addPolygons(data=tracts2010, 262 | color="white", 263 | weight=1, 264 | opacity=0.5, 265 | fillColor=~quantColors(pct_n18), 266 | fillOpacity = 0.65, 267 | popup = paste0(tracts2010$pct_n18, "% under 18"), 268 | group="Percent Under 18") %>% 269 | addMarkers(group="Liquor Stores", popup=shared_df$pct_under18) %>% 270 | addLayersControl( 271 | overlayGroups = c("Liquor Stores","Percent Under 18"), 272 | options = layersControlOptions(collapsed = FALSE) 273 | ) 274 | bscols( widths = c(9,3), 275 | map, 276 | list( 277 | filter_select("name", "Store:", shared_df, ~name), 278 | # Create a filter input 279 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.1, width=250) 280 | ) 281 | ) 282 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding") 283 | library(ggmap) 284 | mykey <- "AIzaSyDf-SZG8O4hj1c06VQ-k6hkBrOQyOFWrTw" 285 | register_google(key=mykey) 286 | geocode("San Francisco, CA", key=mykey) 287 | geocode 288 | ?geocode 289 | geocode("San Francisco, CA") 290 | geocode("San Francisco, CA", "more") 291 | x<-geocode("San Francisco, CA", "more") 292 | View(x) 293 | x<-geocode("Barrows Hall, Berkeley", "more") 294 | View(x) 295 | x<-geocode("2465 Dana St, Berkeley", "more") 296 | x<-geocode("2465 Dana St, Berkeley", "all") 297 | revgeocode(c(-122.4194,37.77493), output="more") 298 | # File of addresses 299 | address_data <- read.csv("address_data/oak_liquor_stores.csv", stringsAsFactors = F) 300 | # Take a look 301 | head(address_data) 302 | address_data$full_address <- paste0(address_data$street, ", " , 303 | address_data$city, ", " , 304 | address_data$state, " ", 305 | address_data$zip) 306 | # File of addresses 307 | address_data <- read.csv("address_data/oak_liquor_stores.csv", stringsAsFactors = F) 308 | # Take a look 309 | head(address_data) 310 | View(address_data) 311 | address_data$full_address <- paste0(address_data$street, ", " , 312 | address_data$city, ", " , 313 | address_data$state, " ", 314 | address_data$zip) 315 | View(address_data) 316 | google_geocoded <- geocode(address_data$full_address, output = "more", 317 | source = "google", key=mykey) 318 | View(google_geocoded) 319 | address_data$glat <- google_geocoded$lat 320 | address_data$glon <- google_geocoded$glon 321 | address_data$glon <- google_geocoded$glon 322 | View(address_data) 323 | head(address_data) 324 | address_data$glon <- google_geocoded$lon 325 | library(leaflet) 326 | point_map <- leaflet(address_data) %>% 327 | addTiles() %>% 328 | addMarkers(lat=~glat, lng=~glon, 329 | popup=(paste0(address_data$name, "
", 330 | address_data$full_address) 331 | ) 332 | ) 333 | point_map 334 | esri_token<-"rfUfTu_yJczZESOq6S50x-kcrzFr9oBvpqhRpp70Pfu_P9uNjMgNtVXGq0iH6miRPGL6yoQMg2IlNROHzErtjCeHphbAfkETqS_Ksd5loXT1BGmU0U1wI5KtKxvfVxjaxWG3AIre2Ngf_NQ9XlDB5w.." 335 | my_esri_token<-"rfUfTu_yJczZESOq6S50x-kcrzFr9oBvpqhRpp70Pfu_P9uNjMgNtVXGq0iH6miRPGL6yoQMg2IlNROHzErtjCeHphbAfkETqS_Ksd5loXT1BGmU0U1wI5KtKxvfVxjaxWG3AIre2Ngf_NQ9XlDB5w.." 336 | source("./scripts/esri_wgs_geocoding.R") 337 | geocode_one("2625 Dana St, Berkeley, CA, 94704", my_esri_token, 338 | postal = TRUE) 339 | esri_geocoded <- geocode_many(address_data$id, address_data$street, 340 | address_data$city, address_data$state, 341 | as.character(address_data$zip), my_esri_token) 342 | View(esri_geocoded) 343 | address_data <- merge(address_data, esri_geocoded[c("ID","lon","lat")], 344 | by.x="id",by.y = "ID", all.x = T) 345 | # Take a look 346 | head(address_data, 3) 347 | View(address_data) 348 | point_map <- leaflet() %>% 349 | addTiles() %>% 350 | addMarkers(lat=address_data$glat, lng=address_data$glon, 351 | popup=(paste0(address_data$name, "
", 352 | address_data$full_address)) 353 | ) %>% 354 | addCircleMarkers(lat=address_data$lat, lng=address_data$lon, 355 | color="black",fillColor="red", 356 | popup=(paste0(address_data$name, "
", 357 | address_data$full_address)) 358 | ) 359 | point_map 360 | source("./scripts/fcc_latlon2fips.R") 361 | latlon2fips(latitude=37.852562, longitude=-122.273634) 362 | x<- latlon2fips(latitude=37.852562, longitude=-122.273634) 363 | x 364 | substr(x,0,11) 365 | x<- latlon2fips(37.852562,-122.273634) 366 | x 367 | address_data$fips<- mapply(latlon2fips, address_data$glat, 368 | address_data$glon) 369 | head(address_data, 3) 370 | View(address_data) 371 | library(sp) 372 | library(tigris) 373 | options(tigris_class = "sp") # options are sp or sf 374 | options(tigris_use_cache = F) # set to true to save locally 375 | tracts2010 <- tracts(state = '06', county= '001', cb = F, year=2010) 376 | plot(tracts2010) 377 | tracts2010cb <- tracts(state = '06', county= '001', cb = T, year=2010) 378 | plot(tracts2010cb) 379 | plot(tracts2010) 380 | plot(tracts2010) 381 | address_data_sp<- address_data #make copy 382 | coordinates(address_data_sp) <-c("glon", "glat") 383 | proj4string(address_data_sp) <- CRS(proj4string(tracts2010)) 384 | proj4string(address_data_sp) <- CRS(proj4string(tracts2010)) 385 | points(address_data_sp, col="red") 386 | fips2010 <-over(address_data_sp, tracts2010) 387 | View(fips2010) 388 | address_data$GEOID10 <- fips2010$GEOID10 389 | View(address_data) 390 | library(tidycensus) 391 | library(tigris) 392 | library(tidycensus) 393 | my_census_api_key <- "f2d6f4f743545d3a42a67412b05935dc7712c432" 394 | census_api_key(my_census_api_key) 395 | my_states<- c("06") # CA 396 | my_counties <- c("001") # Alameda County 397 | cenvar_table <-load_variables(year=2016, dataset = "acs5", cache=T) 398 | View(cenvar_table) 399 | pop_total <- "B01001_001E" # Total population 400 | pop_under18 <- "B09001_001E" # POPULATION UNDER 18 YEARS BY AGE 401 | pop_acs5_2016 <-get_acs(geography = "tract", 402 | variables = c(pop_total,pop_under18), 403 | year=2016, survey="acs5", 404 | state = my_states, county = my_counties, 405 | geometry = F) 406 | View(pop_acs5_2016) 407 | library(tidyr) 408 | library(dplyr) 409 | library(tidyr) 410 | library(dplyr) 411 | # Select the columnbs of interest 412 | # and put `totpop` and `under18` in their own columns 413 | pop2 <- pop_acs5_2016 %>% 414 | select("GEOID","variable","estimate") %>% 415 | spread(key=variable, value=estimate) 416 | # Rename columns 417 | colnames(pop2)<-c("GEOID10","totpop","under18") 418 | head(pop2) 419 | pop2$pct_under18 <- round((pop2$under18 / pop2$totpop) * 100, 1) 420 | head(pop2) 421 | address_data2 <- merge(address_data, pop2, by="GEOID10", all.x=T) 422 | View(address_data2) 423 | View(tracts2010@data) 424 | tracts2010 <- merge(tracts2010, pop2, by="GEOID10") 425 | View(tracts2010@data) 426 | quantColors <- colorQuantile("Reds", tracts2010$pct_under18, n=5) 427 | point_map <- leaflet() %>% 428 | addTiles() %>% 429 | addPolygons(data=tracts2010, 430 | color="white", 431 | weight=1, 432 | opacity=0.5, 433 | fillColor= ~quantColors(pct_under18), 434 | fillOpacity = 0.75, 435 | popup = paste0("Percent under 18: ", tracts2010$pct_under18, "%")) %>% 436 | addMarkers(data=address_data, lat=~glat, lng=~glon, 437 | popup=(paste0(address_data$name, "
", 438 | address_data$full_address) 439 | ) 440 | ) 441 | point_map 442 | library(htmlwidgets) 443 | saveWidget(point_map, file="pointmap.html") 444 | # Chunk 1 445 | library(crosstalk) 446 | library(tidyverse) 447 | library(leaflet) 448 | library(rgdal) 449 | library(sp) 450 | #library(DT) 451 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F) 452 | tracts2010 <- readOGR(dsn="output",layer="tracts2010") 453 | # Chunk 2 454 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store") 455 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5) 456 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 457 | addPolygons(data=tracts2010, 458 | color="white", 459 | weight=1, 460 | opacity=0.5, 461 | fillColor=~quantColors(pct_n18), 462 | fillOpacity = 0.65, 463 | popup = paste0(tracts2010$pct_n18, "% under 18"), 464 | group="Percent Under 18") %>% 465 | addMarkers(group="Liquor Stores", popup=shared_df$pct_under18) %>% 466 | addLayersControl( 467 | overlayGroups = c("Liquor Stores","Percent Under 18"), 468 | options = layersControlOptions(collapsed = FALSE) 469 | ) 470 | # Chunk 3 471 | bscols( widths = c(9,3), 472 | map, 473 | list( 474 | filter_select("name", "Store:", shared_df, ~name), 475 | # Create a filter input 476 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.1, width=250) 477 | ) 478 | ) 479 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>% 480 | addPolygons(data=tracts2010, 481 | color="white", 482 | weight=1, 483 | opacity=0.5, 484 | fillColor=~quantColors(pct_n18), 485 | fillOpacity = 0.65, 486 | popup = paste0(tracts2010$pct_n18, "% under 18"), 487 | group="Percent Under 18") %>% 488 | addMarkers(group="Liquor Stores", popup=shared_df$pct_under18) %>% 489 | addLayersControl( 490 | overlayGroups = c("Liquor Stores","Percent Under 18"), 491 | options = layersControlOptions(collapsed = FALSE) 492 | ) %>% hideGroup("Liquor Stores") 493 | point_map 494 | point_map <- leaflet() %>% 495 | addTiles() %>% 496 | addPolygons(data=tracts2010, 497 | color="white", 498 | weight=1, 499 | opacity=0.5, 500 | fillColor= ~quantColors(pct_under18), 501 | fillOpacity = 0.75, 502 | popup = paste0("Percent under 18: ", tracts2010$pct_under18, "%")) %>% 503 | addMarkers(data=address_data, lat=~glat, lng=~glon, 504 | popup=(paste0(address_data$name, "
", 505 | address_data$full_address) 506 | ) 507 | ) %>% hideGroup("Liquor Stores") 508 | point_map 509 | ?tmap 510 | library(tmap) 511 | ?addPolygons 512 | ?layersControlOptions 513 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | keys/* 2 | *.zip 3 | draft/* 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2015-2016 D-Lab UC Berkeley 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RGeocoding 2 | 3 | ## About 4 | These files show how to geocode in R using three online services: 5 | 6 | - Google Geocoding API 7 | - ESRI World Geocoding Service 8 | - US Census Geocoder 9 | 10 | 11 | -------------------------------------------------------------------------------- /address_data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/address_data/.DS_Store -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_census_format.csv: -------------------------------------------------------------------------------- 1 | 1,2101 8th Ave,Oakland,CA,94606 2 | 2,1615 Macarthur Blvd,Oakland,CA,94602 3 | 3,394 12th St,Oakland,CA,94607 4 | 4,1500 23th Ave,Oakland,CA,94606 5 | 5,6193 Ridgemont Dr,Oakland,CA,94619 6 | 6,525 Embarcadero W, Oakland,CA,94607 7 | 7,5403 Foothill Blvd,Oakland,CA,94601 8 | 8,1200 78th Ave,Oakland,CA,94621 9 | 9,828 Franklin St,Oakland,CA,94607 10 | 10,5913 International Blvd,Oakland,CA,94621 11 | 11,3210 Harrison St,Oakland,CA,94611 12 | 12,1460 7th St,Oakland,CA,94607 13 | 13,1333 Peralta St,Oakland,CA,94607 14 | 14,3710 Telegraph Ave,Oakland,CA,94609 15 | 15,3293 Lakeshore Ave,Oakland,CA,94610 16 | 16,1647 8th St,Oakland,CA,94607 17 | 17,3849 Martin Luther King Jr Way,Oakland,CA,94609 18 | 18,3900 Grand Ave,Oakland,CA,94610 19 | 19,7305 Edgewater Dr #D,Oakland,CA,94621 20 | 20,350 E 18th St,Oakland,CA,94606 21 | -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_gearth_format.csv: -------------------------------------------------------------------------------- 1 | ID,Store,Street,City,State,Zip 2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606 3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602 4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607 5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606 6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619 7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607 8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601 9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621 10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607 11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621 12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611 13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607 14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607 15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609 16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610 17 | 16,Happy Time,1647 8th St,Oakland,CA,94607 18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609 19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610 20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621 21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606 22 | -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_gearth_format.kdx: -------------------------------------------------------------------------------- 1 | Layout { 2 | FileType : "delimited" 3 | Delimiter : "," 4 | Street : "2" 5 | City : "3" 6 | State : "4" 7 | Zip : "5" 8 | SkipRows : "1" 9 | TextCodec : "ISO-8859-1" 10 | FieldDefinitions { 11 | 0 { 12 | Name : "ID" 13 | Type : "int" 14 | } 15 | 1 { 16 | Name : "Store" 17 | Type : "string" 18 | } 19 | 2 { 20 | Name : "Street" 21 | Type : "string" 22 | } 23 | 3 { 24 | Name : "City" 25 | Type : "string" 26 | } 27 | 4 { 28 | Name : "State" 29 | Type : "string" 30 | } 31 | 5 { 32 | Name : "Zip" 33 | Type : "string" 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_gfusion_format.csv: -------------------------------------------------------------------------------- 1 | ID,Store,Address 2 | 1,Wah Fay Liquors,2101 8th Ave Oakland CA 94606 3 | 2,Vision Liquor,1615 Macarthur Blvd Oakland CA 94602 4 | 3,Souza's Liquors,394 12th St Oakland CA 94607 5 | 4,Tk Liquors,1500 23th Ave Oakland CA 94606 6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr Oakland CA 94619 7 | -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_w_ids.csv: -------------------------------------------------------------------------------- 1 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606 2 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602 3 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607 4 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606 5 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619 6 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607 7 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601 8 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621 9 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607 10 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621 11 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611 12 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607 13 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607 14 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609 15 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610 16 | 16,Happy Time,1647 8th St,Oakland,CA,94607 17 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609 18 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610 19 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621 20 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606 21 | -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_w_ids_types_headers.csv: -------------------------------------------------------------------------------- 1 | id,name,street,city,state,zip,type 2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606,p 3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602,p 4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607,p 5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606,p 6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619,p 7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607,c 8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601,p 9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621,m 10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607,p 11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621,p 12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611,m 13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607,m 14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607,p 15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609,m 16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610,p 17 | 16,Happy Time,1647 8th St,Oakland,CA,94607,p 18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609,m 19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610,p 20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621,p 21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606,p -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_w_ids_types_headers_copy.csv: -------------------------------------------------------------------------------- 1 | "id","name","street","city","state","zip","type","address" 2 | -------------------------------------------------------------------------------- /address_data/formatted/oak_liq_w_ids_types_headers_geocoded.csv: -------------------------------------------------------------------------------- 1 | "id","name","street","city","state","zip","type","address","lon","lat","address.1" 2 | 1,"Wah Fay Liquors","2101 8th Ave","Oakland","CA",94606,"p","2101 8th Ave,Oakland,CA,94606",-122.2448899,37.7983669,"2101 8th ave, oakland, ca 94606, usa" 3 | 2,"Vision Liquor","1615 Macarthur Blvd","Oakland","CA",94602,"p","1615 Macarthur Blvd,Oakland,CA,94602",-122.223667,37.800329,"1615 macarthur blvd, oakland, ca 94602, usa" 4 | 3,"Souza's Liquors","394 12th St","Oakland","CA",94607,"p","394 12th St,Oakland,CA,94607",-122.2703368,37.8026337,"394 12th st, oakland, ca 94607, usa" 5 | 4,"Tk Liquors","1500 23th Ave","Oakland","CA",94606,"p","1500 23th Ave,Oakland,CA,94606",-122.2351333,37.7842433,"1500 23rd ave, oakland, ca 94606, usa" 6 | 5,"Quadriga Wines Inc","6193 Ridgemont Dr","Oakland","CA",94619,"p","6193 Ridgemont Dr,Oakland,CA,94619",-122.167191,37.784339,"6193 ridgemont dr, oakland, ca 94619, usa" 7 | 6,"Bev Mo","525 Embarcadero W"," Oakland","CA",94607,"c","525 Embarcadero W, Oakland,CA,94607",-122.2790176,37.7959797,"525 embarcadero west, oakland, ca 94607, usa" 8 | 7,"Fairfax Liquor","5403 Foothill Blvd","Oakland","CA",94601,"p","5403 Foothill Blvd,Oakland,CA,94601",-122.1984536,37.772621,"5403 foothill blvd, oakland, ca 94601, usa" 9 | 8,"Saleen Market","1200 78th Ave","Oakland","CA",94621,"m","1200 78th Ave,Oakland,CA,94621",-122.186272,37.755498,"1200 78th ave, oakland, ca 94621, usa" 10 | 9,"Park Liquors","828 Franklin St","Oakland","CA",94607,"p","828 Franklin St,Oakland,CA,94607",-122.2719881,37.8002274,"828 franklin st, oakland, ca 94607, usa" 11 | 10,"Los Camellos","5913 International Blvd","Oakland","CA",94621,"p","5913 International Blvd,Oakland,CA,94621",-122.1993192,37.7649979,"5913 international blvd, oakland, ca 94621, usa" 12 | 11,"Vernon Market","3210 Harrison St","Oakland","CA",94611,"m","3210 Harrison St,Oakland,CA,94611",-122.2557939,37.8180419,"3210 harrison st, oakland, ca 94611, usa" 13 | 12,"Seventh Street Food & Liquor","1460 7th St","Oakland","CA",94607,"m","1460 7th St,Oakland,CA,94607",-122.2951698,37.8056645,"1460 7th st, oakland, ca 94607, usa" 14 | 13,"Sav-Mor Liquor","1333 Peralta St","Oakland","CA",94607,"p","1333 Peralta St,Oakland,CA,94607",-122.2941054,37.8112027,"1333 peralta st, oakland, ca 94607, usa" 15 | 14,"Vernon Market","3710 Telegraph Ave","Oakland","CA",94609,"m","3710 Telegraph Ave,Oakland,CA,94609",-122.2650554,37.8256134,"3710 telegraph ave, oakland, ca 94609, usa" 16 | 15,"Buckingham Wine & Spirits","3293 Lakeshore Ave","Oakland","CA",94610,"p","3293 Lakeshore Ave,Oakland,CA,94610",-122.244289,37.810986,"3293 lakeshore ave, oakland, ca 94610, usa" 17 | 16,"Happy Time","1647 8th St","Oakland","CA",94607,"p","1647 8th St,Oakland,CA,94607",-122.298867,37.807129,"1647 8th st, oakland, ca 94607, usa" 18 | 17,"Gallager Market","3849 Martin Luther King Jr Way","Oakland","CA",94609,"m","3849 Martin Luther King Jr Way,Oakland,CA,94609",-122.2691424,37.8281099,"3849 m.l.k. jr way, oakland, ca 94609, usa" 19 | 18,"Grand Piedmont Liquors","3900 Grand Ave","Oakland","CA",94610,"p","3900 Grand Ave,Oakland,CA,94610",-122.24452,37.8185089,"3900 grand ave, oakland, ca 94610, usa" 20 | 19,"J J Buckley Fine Wines","7305 Edgewater Dr #D","Oakland","CA",94621,"p","7305 Edgewater Dr,Oakland,CA,94621",-122.2063558,37.744643,"7305 edgewater dr, oakland, ca 94621, usa" 21 | 20,"Carriage Trade Liquors"," 350 E 18th St","Oakland","CA",94606,"p"," 350 E 18th St,Oakland,CA,94606",-122.2510741,37.7994208,"350 e 18th st, oakland, ca 94606, usa" 22 | -------------------------------------------------------------------------------- /address_data/oak_liquor_stores.csv: -------------------------------------------------------------------------------- 1 | id,name,street,city,state,zip,type 2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606,p 3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602,p 4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607,p 5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606,p 6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619,p 7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607,c 8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601,p 9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621,m 10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607,p 11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621,p 12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611,m 13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607,m 14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607,p 15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609,m 16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610,p 17 | 16,Happy Time,1647 8th St,Oakland,CA,94607,p 18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609,m 19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610,p 20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621,p 21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606,p -------------------------------------------------------------------------------- /address_data/sample/data_copy.csv: -------------------------------------------------------------------------------- 1 | "id","name","street","city","state","zip","type","address" 2 | -------------------------------------------------------------------------------- /address_data/sample/geocoded_output.csv: -------------------------------------------------------------------------------- 1 | "id","name","street","city","state","zip","type","address","lon","lat","address.1" 2 | 1,"Wah Fay Liquors","2101 8th Ave","Oakland","CA",94606,"p","2101 8th Ave,Oakland,CA,94606",-122.2448899,37.7983669,"2101 8th ave, oakland, ca 94606, usa" 3 | 2,"Vision Liquor","1615 Macarthur Blvd","Oakland","CA",94602,"p","1615 Macarthur Blvd,Oakland,CA,94602",-122.223667,37.800329,"1615 macarthur blvd, oakland, ca 94602, usa" 4 | 3,"Souza's Liquors","394 12th St","Oakland","CA",94607,"p","394 12th St,Oakland,CA,94607",-122.2703368,37.8026337,"394 12th st, oakland, ca 94607, usa" 5 | 4,"Tk Liquors","1500 23th Ave","Oakland","CA",94606,"p","1500 23th Ave,Oakland,CA,94606",-122.2351333,37.7842433,"1500 23rd ave, oakland, ca 94606, usa" 6 | 5,"Quadriga Wines Inc","6193 Ridgemont Dr","Oakland","CA",94619,"p","6193 Ridgemont Dr,Oakland,CA,94619",-122.167191,37.784339,"6193 ridgemont dr, oakland, ca 94619, usa" 7 | 6,"Bev Mo","525 Embarcadero W"," Oakland","CA",94607,"c","525 Embarcadero W, Oakland,CA,94607",-122.2790176,37.7959797,"525 embarcadero west, oakland, ca 94607, usa" 8 | 7,"Fairfax Liquor","5403 Foothill Blvd","Oakland","CA",94601,"p","5403 Foothill Blvd,Oakland,CA,94601",-122.1984536,37.772621,"5403 foothill blvd, oakland, ca 94601, usa" 9 | 8,"Saleen Market","1200 78th Ave","Oakland","CA",94621,"m","1200 78th Ave,Oakland,CA,94621",-122.186272,37.755498,"1200 78th ave, oakland, ca 94621, usa" 10 | 9,"Park Liquors","828 Franklin St","Oakland","CA",94607,"p","828 Franklin St,Oakland,CA,94607",-122.2719881,37.8002274,"828 franklin st, oakland, ca 94607, usa" 11 | 10,"Los Camellos","5913 International Blvd","Oakland","CA",94621,"p","5913 International Blvd,Oakland,CA,94621",-122.1993192,37.7649979,"5913 international blvd, oakland, ca 94621, usa" 12 | 11,"Vernon Market","3210 Harrison St","Oakland","CA",94611,"m","3210 Harrison St,Oakland,CA,94611",-122.2557939,37.8180419,"3210 harrison st, oakland, ca 94611, usa" 13 | 12,"Seventh Street Food & Liquor","1460 7th St","Oakland","CA",94607,"m","1460 7th St,Oakland,CA,94607",-122.2951698,37.8056645,"1460 7th st, oakland, ca 94607, usa" 14 | 13,"Sav-Mor Liquor","1333 Peralta St","Oakland","CA",94607,"p","1333 Peralta St,Oakland,CA,94607",-122.2941054,37.8112027,"1333 peralta st, oakland, ca 94607, usa" 15 | 14,"Vernon Market","3710 Telegraph Ave","Oakland","CA",94609,"m","3710 Telegraph Ave,Oakland,CA,94609",-122.2650554,37.8256134,"3710 telegraph ave, oakland, ca 94609, usa" 16 | 15,"Buckingham Wine & Spirits","3293 Lakeshore Ave","Oakland","CA",94610,"p","3293 Lakeshore Ave,Oakland,CA,94610",-122.244289,37.810986,"3293 lakeshore ave, oakland, ca 94610, usa" 17 | 16,"Happy Time","1647 8th St","Oakland","CA",94607,"p","1647 8th St,Oakland,CA,94607",-122.298867,37.807129,"1647 8th st, oakland, ca 94607, usa" 18 | 17,"Gallager Market","3849 Martin Luther King Jr Way","Oakland","CA",94609,"m","3849 Martin Luther King Jr Way,Oakland,CA,94609",-122.2691424,37.8281099,"3849 m.l.k. jr way, oakland, ca 94609, usa" 19 | 18,"Grand Piedmont Liquors","3900 Grand Ave","Oakland","CA",94610,"p","3900 Grand Ave,Oakland,CA,94610",-122.24452,37.8185089,"3900 grand ave, oakland, ca 94610, usa" 20 | 19,"J J Buckley Fine Wines","7305 Edgewater Dr #D","Oakland","CA",94621,"p","7305 Edgewater Dr,Oakland,CA,94621",-122.2063558,37.744643,"7305 edgewater dr, oakland, ca 94621, usa" 21 | 20,"Carriage Trade Liquors"," 350 E 18th St","Oakland","CA",94606,"p"," 350 E 18th St,Oakland,CA,94606",-122.2510741,37.7994208,"350 e 18th st, oakland, ca 94606, usa" 22 | -------------------------------------------------------------------------------- /address_data/sample/oak_liq_stores_raw.csv: -------------------------------------------------------------------------------- 1 | Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606 2 | Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602 3 | Souza's Liquors,394 12th,Oakland,CA,94607 4 | Tk Liquors,1500 23th Ave,Oakland,CA,94606 5 | Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA 6 | Bev Mo,525 Embarcadero W, Oakland,CA,94607 7 | Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601 8 | Saleen Market,1200 78th Ave,Oakland,94621 9 | Park Liquors,828 Franklin St,Oakland,CA,94607 10 | Los Camellos,5913 International Blvd,Oakland,CA,94621 11 | -------------------------------------------------------------------------------- /address_data/sample/sample_10_addresses.csv: -------------------------------------------------------------------------------- 1 | "apn","pstreet_addr","pcity","pstate","pzip","plon","plat","pminx","pminy","pmaxx","pmaxy","planduse","pfull_address","parcel_dir" 2 | "029345130","26362 LAWTON AVE","Loma Linda","CA",92354,-117.229239498,34.0449167153,-117.229323238,34.0448060271,-117.229155764,34.0450273813,"532","26362 LAWTON AVE,Loma Linda,CA,92354",6071 3 | "23504800370000","9147 PERSHING AVE","Orangevale","CA",95662,-121.213901675,38.6708152346,-121.21401323,38.6706055214,-121.213792183,38.6710265909,NA,"9147 PERSHING AVE,Orangevale,CA,95662",6067 4 | "232052005","1153 SIERRA VISTA WAY","Lafayette","CA",94549,-122.114609607,37.8982677233,-122.114886371,37.8978975253,-122.114412289,37.898760217,NA,"1153 SIERRA VISTA WAY,Lafayette,CA,94549",6013 5 | "8589026023","5033 Baldwin Ave","Temple City","CA",91780,-118.049846129,34.0945402847,-118.04997662,34.0944167133,-118.049716371,34.0946635985,"0100","5033 Baldwin Ave,Temple City,CA,91780",6037 6 | "200291193","460 9TH ST","Mc Farland","CA",93250,-119.237737665,35.6789471257,-119.23794007,35.6788773989,-119.237535267,35.6790168561,"0101","460 9TH ST,Mc Farland,CA,93250",6029 7 | "107618146","7017 NEWTON PL","Alta Loma","CA",91701,-117.592751185,34.1268426075,-117.592902871,34.1267837389,-117.592610044,34.1269016316,"510","7017 NEWTON PL,Alta Loma,CA,91701",6071 8 | "8468015024","1600 W Cameron Ave","West Covina","CA",91790,-117.943032896,34.0685424244,-117.943538578,34.0681056101,-117.942527964,34.0689799637,"1800","1600 W Cameron Ave,West Covina,CA,91790",6037 9 | "2203112937","500 RANCHEROS DR","San Marcos","CA",92069,-117.153226611,33.1412541249,-117.155567866,33.1393828965,-117.150832985,33.1430624795,NA,"500 RANCHEROS DR,San Marcos,CA,92069",6073 10 | "110-120-009-000","3110 ASPEN GROVE RD","Truckee","CA",96161,-120.118140589,39.277759796,-120.118190768,39.27773372,-120.11808974,39.2777866452,"04","3110 ASPEN GROVE RD,Truckee,CA,96161",6061 11 | -------------------------------------------------------------------------------- /address_data/sample/sample_10_addresses_geocoded.csv: -------------------------------------------------------------------------------- 1 | "id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side","state_fips","county_fips","tract_fips","block_fips","lon","lat" 2 | "2203112937","500 RANCHEROS DR, San Marcos, CA, 92069","Match","Exact","500 RANCHEROS DR, SAN MARCOS, CA, 92069","-117.15157,33.140068",195719910,"L",NA,NA,NA,NA,-117.15157,33.140068 3 | "200291193","460 9TH ST, Mc Farland, CA, 93250","Match","Exact","460 9TH ST, MC FARLAND, CA, 93250","-119.23804,35.678898",109003113,"L",NA,NA,NA,NA,-119.23804,35.678898 4 | "110-120-009-000","3110 ASPEN GROVE RD, Truckee, CA, 96161","Match","Exact","3110 ASPEN GROVE RD, TRUCKEE, CA, 96161","-120.11933,39.278297",636420421,"L",NA,NA,NA,NA,-120.11933,39.278297 5 | "232052005","1153 SIERRA VISTA WAY, Lafayette, CA, 94549","Match","Exact","1153 SIERRA VISTA WAY, LAFAYETTE, CA, 94549","-122.11476,37.897873",192046673,"L",NA,NA,NA,NA,-122.11476,37.897873 6 | "23504800370000","9147 PERSHING AVE, Orangevale, CA, 95662","Match","Exact","9147 PERSHING AVE, ORANGEVALE, CA, 95662","-121.21412,38.67051",133356827,"L",NA,NA,NA,NA,-121.21412,38.67051 7 | "107618146","7017 NEWTON PL, Alta Loma, CA, 91701","Match","Exact","7017 NEWTON PL, ALTA LOMA, CA, 91701","-117.59298,34.12686",144985211,"L",NA,NA,NA,NA,-117.59298,34.12686 8 | "029345130","26362 LAWTON AVE, Loma Linda, CA, 92354","Match","Exact","26362 LAWTON AVE, LOMA LINDA, CA, 92354","-117.22901,34.04474",145230114,"L",NA,NA,NA,NA,-117.22901,34.04474 9 | "8589026023","5033 Baldwin Ave, Temple City, CA, 91780","Match","Exact","5033 BALDWIN AVE, TEMPLE CITY, CA, 91780","-118.049385,34.093925",142744738,"L",NA,NA,NA,NA,-118.049385,34.093925 10 | "8468015024","1600 W Cameron Ave, West Covina, CA, 91790","Match","Exact","1600 W CAMERON AVE, WEST COVINA, CA, 91790","-117.94267,34.06895",241245486,"L",NA,NA,NA,NA,-117.94267,34.06895 11 | -------------------------------------------------------------------------------- /address_data/trulia_avgprice_bystate_2017.csv: -------------------------------------------------------------------------------- 1 | state,avg_listingPrice_week_aug23_2017 2 | Hawaii,905687 3 | District Of Columbia,773286 4 | California,697539 5 | Massachusetts,602210 6 | New York,565227 7 | Colorado,538477 8 | Utah,440946 9 | Connecticut,435585 10 | Oregon,416718 11 | Florida,406803 12 | Rhode Island,405450 13 | Washington,378565 14 | New Jersey,372916 15 | Maryland,369454 16 | Idaho,349000 17 | Virginia,341015 18 | Nevada,331971 19 | Arizona,322398 20 | Texas,320067 21 | Montana,314959 22 | New Hampshire,310914 23 | Vermont,306034 24 | Delaware,303971 25 | Georgia,296535 26 | Wyoming,291855 27 | South Carolina,291636 28 | Minnesota,290514 29 | Illinois,277163 30 | North Carolina,276389 31 | Maine,275717 32 | Tennessee,268692 33 | Alaska,267404 34 | New Mexico,254798 35 | South Dakota,238163 36 | Louisiana,232610 37 | Nebraska,230000 38 | North Dakota,226863 39 | Pennsylvania,224090 40 | Wisconsin,223480 41 | Kentucky,213848 42 | Alabama,212733 43 | Michigan,212694 44 | Missouri,204506 45 | Oklahoma,201091 46 | Mississippi,195390 47 | Arkansas,191446 48 | Indiana,190843 49 | Ohio,190371 50 | Kansas,187649 51 | Iowa,185087 52 | West Virginia,174865 -------------------------------------------------------------------------------- /draft/rgeodocoding2.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "RGeocoding" 3 | author: "patty" 4 | date: "December 4, 2015" 5 | output: slidy_presentation 6 | --- 7 | 8 | ## Geocoding in R 9 | 10 | Getting Started: Download the zipfile for this tutorial from 11 | https://github.com/dlab-geo/RGeocoding/archive/master.zip 12 | 13 | 14 | 15 | ## Overview 16 | 17 | - What is Geocoding 18 | - A simple example in Google Maps 19 | - Why Geocode 20 | - Geocoding in Detail 21 | - How to Geocode in R 22 | - with GGMAPS 23 | - with Yahoo Placefinder 24 | - with TIGER 25 | - Now what 26 | 27 | ## What is Geocoding 28 | 29 | Determine the geographic coordinates of a named place, street address, or zip code. 30 | 31 | - city, building, 32 | - street address, intersection, 33 | - mountain, landmark, 34 | - crime or other event location, 35 | - zip code, etc. 36 | 37 | ## Try It! 38 | 39 | ![Barrows in Google Maps](screenshots/gmap_barrows.png) 40 | 41 | maps.google.com 42 | 43 | ## Geographic Coordinates 44 | 45 | lonlat 46 | 47 | || 48 | ------------------------|-----------------------------------|- 49 | **Latitude**|+/- 90 degrees|*how far north or south of equator* 50 | **Longitude**|+/- 180 degrees|*how far E/W of prime meridian* 51 | 52 | **Decimal Degrees (DD)** 53 | 37.870145, -122.25952 54 | 55 | **Degrees, minutes, seconds (DMS)** 56 | 37° 52' 12"N, 122° 15' 36" W 57 | 58 | 59 | ## Why? 60 | 61 | - Display locations on a map 62 | - Link locations to other data 63 | - Spatial analysis 64 | - Calculate distance, direction, area, etc. 65 | - Identify patterns & relationships: 66 | - clusters, outliers, neighbors 67 | 68 | ## Address Geocoding 69 | 70 | ## Where do Addresses come from 71 | 72 | - Extract from text documents 73 | - File or Database 74 | - Web Scraping 75 | 76 | ## Process 77 | 78 | ![geocoding_details](screenshots/geocoding_details1.png) 79 | 80 | ## Evaluation 81 | 82 | Reference database extremeley important 83 | 84 | ![geocoding_details2](screenshots/geocoding_details2.png) 85 | 86 | ## Geocoder Output Comparison 87 | 88 | ![geocoding_details2](screenshots/output_compare.png) 89 | 90 | ## Considerations 91 | 92 | - Geographic scope 93 | - Time period 94 | - Output Quality 95 | - Number of addresses 96 | - Data Privacy/security 97 | - Cost 98 | 99 | ## Local Geocoding 100 | - ArcGIS 101 | 102 | ## Remote Geocoding 103 | ArcGIS 104 | Google 105 | Yahoo 106 | OpenStreetMaps 107 | Data Science Toolkit (DSTK) 108 | *and many others* 109 | 110 | ## Geocoding in R 111 | 112 | Access an online Geocoder using an API 113 | *Application Progromming Toolkit* 114 | 115 | In R via a package or script. 116 | 117 | ## Geocoding in R with 118 | 119 | - GGMAPS 120 | - Google 121 | - DSTK 122 | - RYDN & Yahoo 123 | - TIGER 124 | 125 | ## Geocoding with GGMAP 126 | - Created by David Kahle and Hadley Wickham, ggplot2 developer 127 | - Provides functionality for fetching online map data from Google and other services 128 | and overlaying other geodata using ggplot 129 | - Includes functions for Geocoding functionality using: 130 | - the Data Science Toolkit (DSTK) geocoding service 131 | - default, unlimited usage 132 | - Google's Geocoding service 133 | - limited to 2500 addresses per day 134 | - other limits may also apply! 135 | 136 | ## GGMAP 137 | - the Data Science Toolkit (DSTK) geocoding service 138 | - default, unlimited usage 139 | - solid, not great 140 | - older data, limited geographic coverage 141 | - sometimes unavailable 142 | 143 | - Google's Geocoding service 144 | - fantastic accuracy, worldwide coverage, up to date 145 | - limited to 2500 addresses per day 146 | 147 | 148 | ## Geocoding with GGMAP 149 | 150 |
151 | > library(ggmap)
152 | > geocode("Barrows Hall, Berkeley, CA", source="google")
153 | 
154 |        lon      lat
155 | 1 -122.258 37.87006
156 | 
157 | 158 | Go ahead and stick that in maps.google.com 159 | - must be in *lat,lon* format! 160 | 161 | Then try Geocoding 162 | 163 | - an address 164 | - a zipcode 165 | 166 | ## ?geocode 167 | 168 | lonlat 169 | 170 | 171 | ## Try these changes 172 | 173 | - output="latlon" or "latlona" or "more" or "all" 174 | 175 | ## Output differences 176 |
177 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="latlon")
178 | 
179 |        lon      lat
180 | 1 -122.258 37.87006
181 | 
182 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="latlona")
183 | 
184 |        lon      lat                               address
185 | 1 -122.258 37.87006 barrows hall, berkeley, ca 94720, usa
186 | 
187 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="more")
188 | 
189 |        lon      lat    type loctype                               address    north    south
190 | 1 -122.258 37.87006 premise rooftop barrows hall, berkeley, ca 94720, usa 37.87147 37.86877
191 |        east      west      premise locality administrative_area_level_2
192 | 1 -122.2566 -122.2593 Barrows Hall Berkeley              Alameda County
193 |   administrative_area_level_1       country postal_code
194 | 1                  California United States       94720
195 | 
196 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="all")
197 | 
198 | 199 | ## Accuracy 200 |
201 | one <- geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="more")
202 | two <- geocode("sather gate, berkeley, ca", source="google", output="more", messaging=TRUE)
203 | 
204 | one$loctype
205 | two$loctype
206 | three$loctype
207 | 
208 | 209 | ## Append geocoded info to source data 210 | 211 | Create a data frame with three addresses 212 | 213 |
214 | 
215 | > df <- data.frame(
216 |           address = c("1517 Shattuck Ave, Berkeley, CA 94709", 
217 |               "Barrows Hall, Berkeley, CA", 
218 |               "2332 Haste St, Berkeley, CA 94704"),
219 |           stringsAsFactors = FALSE
220 | )
221 | 
222 | > df
223 |                                 address
224 | 1 1517 Shattuck Ave, Berkeley, CA 94709
225 | 2            Barrows Hall, Berkeley, CA
226 | 3     2332 Haste St, Berkeley, CA 94704
227 | 
228 | 229 | ## Geocode the three Addresses 230 | 231 |
232 | > df2 <- geocode(df$address,source="google", output="more")
233 | # just keep lat, lon, type, and loctype
234 | > df2 <- df2[,c(1:4)]
235 | 
236 | > df2
237 |         lon      lat           type loctype
238 | 1 -122.2689 37.87959 street_address rooftop
239 | 2 -122.2580 37.87006        premise rooftop
240 | 3 -122.2615 37.86537 street_address rooftop
241 | 
242 | 243 | ## Join output to input 244 |
245 | > df3 <- data.frame(df,df2)
246 | 
247 | > df3
248 |                                 address       lon      lat           type loctype
249 | 1 1517 Shattuck Ave, Berkeley, CA 94709 -122.2689 37.87959 street_address rooftop
250 | 2            Barrows Hall, Berkeley, CA -122.2580 37.87006        premise rooftop
251 | 3     2332 Haste St, Berkeley, CA 94704 -122.2615 37.86537 street_address rooftop
252 | > 
253 | 
254 | 
255 | 256 | ## Map it 257 | 258 |
259 | > map <- get_map(location=c(lon=mean(df3$lon), lat=mean(df3$lat)), zoom=14)
260 | > ggmap(map) +
261 |   geom_point(aes(x = lon, y = lat), data=df3, size = 6, col="red" )  
262 | 
263 | 264 | Try different (or no) zoom levels! 265 | 266 | 267 | ## Geocode a file of addresses 268 | 269 |
270 | # get the input data
271 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F)
272 | head(data)
273 | 
274 |   id               name              street     city state   zip type
275 | 1  1    Wah Fay Liquors        2101 8th Ave  Oakland    CA 94606    p
276 | 2  2      Vision Liquor 1615 Macarthur Blvd  Oakland    CA 94602    p
277 | 3  3    Souza's Liquors         394 12th St  Oakland    CA 94607    p
278 | 4  4         Tk Liquors       1500 23th Ave  Oakland    CA 94606    p
279 | 5  5 Quadriga Wines Inc   6193 Ridgemont Dr  Oakland    CA 94619    p
280 | 6  6             Bev Mo   525 Embarcadero W  Oakland    CA 94607    c
281 | 
282 | 
283 | 284 | ## We need one column with address (not multiple) 285 |
286 | data$address <- with(data,paste(street,city,state,zip, sep=" "))
287 | head(data)
288 | 
289 |   id               name              street     city state   zip type                              address
290 | 1  1    Wah Fay Liquors        2101 8th Ave  Oakland    CA 94606    p        2101 8th Ave Oakland CA 94606
291 | 2  2      Vision Liquor 1615 Macarthur Blvd  Oakland    CA 94602    p 1615 Macarthur Blvd Oakland CA 94602
292 | 3  3    Souza's Liquors         394 12th St  Oakland    CA 94607    p         394 12th St Oakland CA 94607
293 | 4  4         Tk Liquors       1500 23th Ave  Oakland    CA 94606    p       1500 23th Ave Oakland CA 94606
294 | 5  5 Quadriga Wines Inc   6193 Ridgemont Dr  Oakland    CA 94619    p   6193 Ridgemont Dr Oakland CA 94619
295 | 6  6             Bev Mo   525 Embarcadero W  Oakland    CA 94607    c  525 Embarcadero W  Oakland CA 94607
296 | 
297 | 
298 | 299 | ## Irregularity is a Problem 300 | 301 |
302 | 
303 | > data[19,8]
304 | [1] "7305 Edgewater Dr #D Oakland CA 94621"
305 | 
306 | > geocode(data[19,8], source="google", output="latlona")
307 |         lon      lat                                    address
308 | 1 -81.44055 28.62331 7305 edgewater dr, lockhart, fl 32810, usa
309 | 
310 | data[19,8]<-"7305 Edgewater Dr Oakland CA 94621"  ## Why do we need to do this??
311 | 
312 | 313 | ## Geocode! 314 |
315 | geocoded_output <- geocode(data$address, output = "latlona", source = "google")
316 | geocoded_output <- data.frame(data, geocoded_output)
317 | 
318 | ## Review
319 | head(geocoded_output)
320 | 
321 | ## save output
322 | write.csv(geocoded_output,file="geocoded_output.csv", row.names=FALSE)
323 | 
324 | 325 | ## Know Your limits 326 | 327 | ####Scaling up to more than 2500 records? 328 | 329 | geocodeQueryCheck() #how am I doing? 330 | 331 | 332 | ## Working With Limits 333 |
334 | maxrecs <- geocodeQueryCheck()
335 | 
336 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F) # read data
337 | data$address <- with(data,paste(street,city,state,zip, sep=" ")) #add single column address
338 | if (!exists("data$geocoded")) {
339 |   # do this once
340 |   data$geocoded <- 0
341 | }
342 | not_geocoded <- subset(data,geocoded == 0)
343 | not_geocoded <- not_geocoded[,maxrecs]
344 | nrow(not_geocoded)
345 | not_geocoded[19,8]<-"7305 Edgewater Dr Oakland CA 94621"  ## Why do we need to do this??
346 | 
347 | geocoded_output <- geocode(not_geocoded$address, output = "latlona", source = "google")
348 | geocoded_output$geocoded <- 1
349 | geocoded_output <- data.frame(data, geocoded_output)
350 | #save output
351 | write.csv(geocoded_output,file="geocoded_output.csv", row.names=FALSE)
352 | 
353 | 354 | 355 | ## More Google Limits 356 | 357 | https://developers.google.com/maps/documentation/geocoding/usage-limits 358 | 359 | limits 360 | 361 | ## Yahoo Placefinder - non-commerical version! 362 | 363 | - Similar quality to Google 364 | - Limited to 2000 geocodes per day 365 | - **But** fewer usage restrictions 366 | 367 | - Available via *RYDN* Package 368 | - YDN = Yahoo Developers Network 369 | 370 | ## First! 371 | Apply for an account on YDN 372 | 373 | **Add slides for account** 374 | 375 | 376 | ## Next - Install RYDN 377 | # devtools::install_github("trestletech/rydn") 378 | library(rydn) 379 | 380 | ## Key in! 381 | # readin your keys from YDN (don't share) 382 | source("keys/ydn_keys.R") 383 | #mykey="dj0.......................00Zg--" 384 | #mysecret ="00....................8" 385 | 386 | ## Testing 387 | myloc <- find_place("Barrows Hall, Berkeley,ca",key=mykey,secret=mysecret) 388 | myloc #see what was returned 389 | 390 | *TO interpret response see: https://developer.yahoo.com/boss/geo/docs/supported_responses.html* 391 | 392 | ## work with subset of the returned info 393 | myloc_sub <- myloc[1 ,c("quality", "latitude", "longitude", "radius")] #subset 394 | 395 | #convert strings to numerics 396 | myloc$longitude <- as.numeric(myloc$longitude) 397 | myloc$latitude <- as.numeric(myloc$latitude) 398 | 399 | ## Now geocode! 400 | 401 | 402 | ## What's median income around my liquor stores 403 | 404 | 405 | 406 | ## References 407 | - https://cran.r-project.org/web/packages/ggmap/index.html 408 | - https://journal.r-project.org/archive/2013-1/kahle-wickham.pdf 409 | - https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/ggmap/ggmapCheatsheet.pdf 410 | 411 | 412 | 413 | -------------------------------------------------------------------------------- /output/address_data_geocoded2.csv: -------------------------------------------------------------------------------- 1 | "GEOID10","id","name","street","city","state","zip","type","full_address","glon","glat","lon","lat","fips","CTIDFP00","totpop","under18","pct_under18" 2 | "06001401000",17,"Gallager Market","3849 Martin Luther King Jr Way","Oakland","CA",94609,"m","3849 Martin Luther King Jr Way, Oakland, CA 94609",-122.2691521,37.8281185,-122.268896055,37.828055535,"060014010004018","06001401000",6193,820,13.2 3 | "06001401100",14,"Vernon Market","3710 Telegraph Ave","Oakland","CA",94609,"m","3710 Telegraph Ave, Oakland, CA 94609",-122.2650564,37.8255986,-122.265324385,37.8257232560001,"060014011003009","06001401100",4138,264,6.4 4 | "06001401800",13,"Sav-Mor Liquor","1333 Peralta St","Oakland","CA",94607,"p","1333 Peralta St, Oakland, CA 94607",-122.2942244,37.8113527,-122.294328517,37.8109490200001,"060014018001005","06001401800",1866,425,22.8 5 | "06001401800",16,"Happy Time","1647 8th St","Oakland","CA",94607,"p","1647 8th St, Oakland, CA 94607",-122.298875,37.8071804,-122.298818746,37.8072919190001,"060014018002012","06001401800",1866,425,22.8 6 | "06001402200",12,"Seventh Street Food & Liquor","1460 7th St","Oakland","CA",94607,"m","1460 7th St, Oakland, CA 94607",-122.2951819,37.8056587,-122.295257249,37.805456087,"060014022002015","06001402200",2406,501,20.8 7 | "06001403000",3,"Souza's Liquors","394 12th St","Oakland","CA",94607,"p","394 12th St, Oakland, CA 94607",-122.2705233,37.8026203,-122.270542917,37.80238021,"060014030001005","06001403000",3167,308,9.7 8 | "06001403000",9,"Park Liquors","828 Franklin St","Oakland","CA",94607,"p","828 Franklin St, Oakland, CA 94607",-122.2719881,37.8002274,-122.272205314,37.8001763230001,"060014030001017","06001403000",3167,308,9.7 9 | "06001403502",11,"Vernon Market","3210 Harrison St","Oakland","CA",94611,"m","3210 Harrison St, Oakland, CA 94611",-122.2557954,37.8180398,-122.25600678,37.818050858,"060014035022001","06001403500",2081,118,5.7 10 | "06001403800",15,"Buckingham Wine & Spirits","3293 Lakeshore Ave","Oakland","CA",94610,"p","3293 Lakeshore Ave, Oakland, CA 94610",-122.2443398,37.8110554,-122.244180914,37.8107465640001,"060014038003008","06001403800",3323,377,11.3 11 | "06001403800",18,"Grand Piedmont Liquors","3900 Grand Ave","Oakland","CA",94610,"p","3900 Grand Ave, Oakland, CA 94610",-122.2444949,37.8185137,-122.244728744,37.8186177630001,"060014038004003","06001403800",3323,377,11.3 12 | "06001404900",2,"Vision Liquor","1615 Macarthur Blvd","Oakland","CA",94602,"p","1615 Macarthur Blvd, Oakland, CA 94602",-122.223667,37.800329,-122.223598292,37.8005377910001,"060014049003026","06001404900",3995,830,20.8 13 | "06001405301",20,"Carriage Trade Liquors"," 350 E 18th St","Oakland","CA",94606,"p"," 350 E 18th St, Oakland, CA 94606",-122.2510981,37.7994349,-122.251216784,37.7992969090001,"060014053012004","06001405300",2899,318,11 14 | "06001405500",1,"Wah Fay Liquors","2101 8th Ave","Oakland","CA",94606,"p","2101 8th Ave, Oakland, CA 94606",-122.2448776,37.79836,-122.244758177,37.7982741370001,"060014055003004","06001405500",4104,683,16.6 15 | "06001406201",4,"Tk Liquors","1500 23th Ave","Oakland","CA",94606,"p","1500 23th Ave, Oakland, CA 94606",-122.2349511,37.7842636,-122.235061143,37.7842299920001,"060014062014000","06001406201",4296,1111,25.9 16 | "06001407500",7,"Fairfax Liquor","5403 Foothill Blvd","Oakland","CA",94601,"p","5403 Foothill Blvd, Oakland, CA 94601",-122.1983483,37.7724053,-122.198434586,37.7725488080001,"060014075003004","06001407500",4201,1321,31.4 17 | "06001408100",5,"Quadriga Wines Inc","6193 Ridgemont Dr","Oakland","CA",94619,"p","6193 Ridgemont Dr, Oakland, CA 94619",-122.1671766,37.7843368,-122.167020084,37.78445829,"060014081002000","06001408100",6266,765,12.2 18 | "06001408800",10,"Los Camellos","5913 International Blvd","Oakland","CA",94621,"p","5913 International Blvd, Oakland, CA 94621",-122.1994052,37.7648862,-122.199439298,37.764960681,"060014088004003","06001408800",6348,2090,32.9 19 | "06001408900",8,"Saleen Market","1200 78th Ave","Oakland","CA",94621,"m","1200 78th Ave, Oakland, CA 94621",-122.1863008,37.7555499,-122.186416836,37.7556614330001,"060014089002026","06001408900",3105,901,29 20 | "06001409000",19,"J J Buckley Fine Wines","7305 Edgewater Dr #D","Oakland","CA",94621,"p","7305 Edgewater Dr #D, Oakland, CA 94621",-122.2064413,37.744684,-122.20642817,37.745344269,"060014090002013","06001409000",3752,1080,28.8 21 | "06001983200",6,"Bev Mo","525 Embarcadero W"," Oakland","CA",94607,"c","525 Embarcadero W, Oakland, CA 94607",-122.2791286,37.7959002,-122.278601227,37.79605572,"060019832001030","06001403200",572,38,6.6 22 | -------------------------------------------------------------------------------- /output/address_data_geocoded_esri.csv: -------------------------------------------------------------------------------- 1 | "ID","lon","lat","score","locName","status","matchAddr","side","addressType" 2 | 2,-122.223598292,37.8005377910001,100,"World","M","1615 MacArthur Blvd, Oakland, California, 94602","L","PointAddress" 3 | 4,-122.235061143,37.7842299920001,100,"World","M","1500 23rd Ave, Oakland, California, 94606","R","StreetAddress" 4 | 1,-122.244758177,37.7982741370001,100,"World","M","2101 8th Ave, Oakland, California, 94606","L","PointAddress" 5 | 3,-122.270542917,37.80238021,100,"World","M","394 12th St, Oakland, California, 94607","R","PointAddress" 6 | 5,-122.167020084,37.78445829,100,"World","M","6193 Ridgemont Dr, Oakland, California, 94619","L","PointAddress" 7 | 8,-122.186416836,37.7556614330001,100,"World","M","1200 78th Ave, Oakland, California, 94621","R","PointAddress" 8 | 7,-122.198434586,37.7725488080001,100,"World","M","5403 Foothill Blvd, Oakland, California, 94601","L","StreetAddress" 9 | 6,-122.278601227,37.79605572,100,"World","M","525 Embarcadero W, Oakland, California, 94607","L","StreetAddress" 10 | 10,-122.199439298,37.764960681,100,"World","M","5913 International Blvd, Oakland, California, 94621","L","StreetAddress" 11 | 9,-122.272205314,37.8001763230001,100,"World","M","828 Franklin St, Oakland, California, 94607","R","PointAddress" 12 | 11,-122.25600678,37.818050858,100,"World","M","3210 Harrison St, Oakland, California, 94611","R","PointAddress" 13 | 12,-122.295257249,37.805456087,100,"World","M","1460 7th St, Oakland, California, 94607","R","StreetAddress" 14 | 14,-122.265324385,37.8257232560001,100,"World","M","3710 Telegraph Ave, Oakland, California, 94609","R","StreetAddress" 15 | 13,-122.294328517,37.8109490200001,100,"World","M","1333 Peralta St, Oakland, California, 94607","L","StreetAddress" 16 | 15,-122.244180914,37.8107465640001,100,"World","M","3293 Lakeshore Ave, Oakland, California, 94610","L","PointAddress" 17 | 16,-122.298818746,37.8072919190001,100,"World","M","1647 8th St, Oakland, California, 94607","L","PointAddress" 18 | 20,-122.251216784,37.7992969090001,100,"World","M","350 E 18th St, Oakland, California, 94606","R","PointAddress" 19 | 18,-122.244728744,37.8186177630001,100,"World","M","3900 Grand Ave, Oakland, California, 94610","R","PointAddress" 20 | 17,-122.268896055,37.828055535,100,"World","M","3849 Martin Luther King Jr Way, Oakland, California, 94609","L","PointAddress" 21 | 19,-122.20642817,37.745344269,100,"World","M","7305 Edgewater Dr, Oakland, California, 94621","L","StreetAddress" 22 | -------------------------------------------------------------------------------- /output/address_data_geocoded_google.csv: -------------------------------------------------------------------------------- 1 | "lon","lat","type","loctype","address","north","south","east","west","street_number","route","locality","administrative_area_level_2","administrative_area_level_1","country","postal_code","postal_code_suffix","neighborhood","subpremise" 2 | -122.2448776,37.79836,"premise","rooftop","2101 8th ave, oakland, ca 94606, usa",37.7997158302915,37.7970178697085,-122.243540919708,-122.246238880292,"2101","8th Avenue","Oakland","Alameda County","California","United States","94606","2007",NA,NA 3 | -122.223667,37.800329,"street_address","rooftop","1615 macarthur blvd, oakland, ca 94602, usa",37.8016779802915,37.7989800197085,-122.222318019708,-122.225015980292,"1615","MacArthur Boulevard","Oakland","Alameda County","California","United States","94602","1606","Glenview",NA 4 | -122.2705233,37.8026203,"street_address","rooftop","394 12th st, oakland, ca 94607, usa",37.8039692802915,37.8012713197085,-122.269174319708,-122.271872280292,"394","12th Street","Oakland","Alameda County","California","United States","94607","4249","Downtown Oakland",NA 5 | -122.2349511,37.7842636,"street_address","rooftop","1500 23rd ave, oakland, ca 94606, usa",37.7856125802915,37.7829146197085,-122.233602119708,-122.236300080291,"1500","23rd Avenue","Oakland","Alameda County","California","United States","94606","5035","Rancho San Antonio",NA 6 | -122.1671766,37.7843368,"premise","rooftop","6193 ridgemont dr, oakland, ca 94619, usa",37.7856903302915,37.7829923697085,-122.165849819708,-122.168547780292,"6193","Ridgemont Drive","Oakland","Alameda County","California","United States","94619","3724","Caballo Hills",NA 7 | -122.2791286,37.7959002,"street_address","rooftop","525 embarcadero west, oakland, ca 94607, usa",37.7972491802915,37.7945512197085,-122.277779619708,-122.280477580292,"525","Embarcadero West","Oakland","Alameda County","California","United States","94607","3565","Downtown Oakland",NA 8 | -122.1983483,37.7724053,"street_address","range_interpolated","5403 foothill blvd, oakland, ca 94601, usa",37.7737542802915,37.7710563197085,-122.196999319708,-122.199697280292,"5403","Foothill Boulevard","Oakland","Alameda County","California","United States","94601","5515","Fairfax",NA 9 | -122.1863008,37.7555499,"premise","rooftop","1200 78th ave, oakland, ca 94621, usa",37.7568967302915,37.7541987697085,-122.184959919709,-122.187657880291,"1200","78th Avenue","Oakland","Alameda County","California","United States","94621","2604","Fitchburg",NA 10 | -122.2719881,37.8002274,"street_address","rooftop","828 franklin st, oakland, ca 94607, usa",37.8015763802915,37.7988784197085,-122.270639119709,-122.273337080291,"828","Franklin Street","Oakland","Alameda County","California","United States","94607","4202","Chinatown",NA 11 | -122.1994052,37.7648862,"street_address","rooftop","5913 international blvd, oakland, ca 94621, usa",37.7662351802915,37.7635372197085,-122.198056219709,-122.200754180291,"5913","International Boulevard","Oakland","Alameda County","California","United States","94621","4202","East 14th Street Business District",NA 12 | -122.2557954,37.8180398,"premise","rooftop","3210 harrison st, oakland, ca 94611, usa",37.8193728302915,37.8166748697085,-122.254448769709,-122.257146730291,"3210","Harrison Street","Oakland","Alameda County","California","United States","94611","5527","Oakland Ave - Harrison St",NA 13 | -122.2951819,37.8056587,"premise","rooftop","1460 7th st, oakland, ca 94607, usa",37.8070176802915,37.8043197197085,-122.293811069708,-122.296509030292,"1460","7th Street","Oakland","Alameda County","California","United States","94607",NA,"Prescott",NA 14 | -122.2942244,37.8113527,"street_address","rooftop","1333 peralta st, oakland, ca 94607, usa",37.8127016802915,37.8100037197085,-122.292875419708,-122.295573380292,"1333","Peralta Street","Oakland","Alameda County","California","United States","94607","2015","Prescott",NA 15 | -122.2650564,37.8255986,"premise","rooftop","3710 telegraph ave, oakland, ca 94609, usa",37.8269623802915,37.8242644197085,-122.263706369709,-122.266404330291,"3710","Telegraph Avenue","Oakland","Alameda County","California","United States","94609",NA,"Mosswood",NA 16 | -122.2443398,37.8110554,"premise","rooftop","3293 lakeshore ave, oakland, ca 94610, usa",37.8123312802915,37.8096333197085,-122.242957869709,-122.245655830291,"3293","Lakeshore Avenue","Oakland","Alameda County","California","United States","94610","2719","Lakeshore",NA 17 | -122.298875,37.8071804,"premise","rooftop","1647 8th st, oakland, ca 94607, usa",37.8085135802915,37.8058156197085,-122.297543019708,-122.300240980292,"1647","8th Street","Oakland","Alameda County","California","United States","94607","1354","Prescott",NA 18 | -122.2691521,37.8281185,"premise","rooftop","3849 martin luther king jr way, oakland, ca 94609, usa",37.8294589302915,37.8267609697085,-122.267793469708,-122.270491430292,"3849","Martin Luther King Junior Way","Oakland","Alameda County","California","United States","94609","2313","Mosswood",NA 19 | -122.2444949,37.8185137,"premise","rooftop","3900 grand ave, oakland, ca 94610, usa",37.8198538302915,37.8171558697085,-122.243162819709,-122.245860780291,"3900","Grand Avenue","Oakland","Alameda County","California","United States","94610",NA,"Grand Lake",NA 20 | -122.2064413,37.744684,"subpremise","rooftop","7305 edgewater dr d, oakland, ca 94621, usa",37.7460985802915,37.7434006197085,-122.205147969709,-122.207845930291,"7305","Edgewater Drive","Oakland","Alameda County","California","United States","94621",NA,NA,"D" 21 | -122.2510981,37.7994349,"premise","rooftop","350 e 18th st, oakland, ca 94606, usa",37.8008464802915,37.7981485197085,-122.249700119709,-122.252398080291,"350","East 18th Street","Oakland","Alameda County","California","United States","94606","1814","Ivy Hill",NA 22 | -------------------------------------------------------------------------------- /output/geocoded_addresses_out.csv: -------------------------------------------------------------------------------- 1 | "id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side","state_fips","county_fips","tract_fips","block_fips","lon","lat" 2 | 3,"10834 GOLF LINKS RD, Oakland, CA, 94605","Match","Exact","10834 GOLF LINKS RD, OAKLAND, CA, 94605","-122.126884,37.75383",125011765,"L",6,1,409900,NA,-122.126884,37.75383 3 | 2,"4728 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4728 SCOTIA AVE, OAKLAND, CA, 94605","-122.125046,37.75488",125011836,"R",6,1,409900,NA,-122.125046,37.75488 4 | 1,"10709 COTTER ST, Oakland, , 94605","Match","Exact","10709 COTTER ST, OAKLAND, CA, 94605","-122.12373,37.755764",125011838,"R",6,1,409900,NA,-122.12373,37.755764 5 | 10,"271 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","271 ELYSIAN FIELDS DR, OAKLAND, CA, 94605","-122.13135,37.759373",125011739,"R",6,1,409900,NA,-122.13135,37.759373 6 | 7,"4855 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4855 SCOTIA AVE, OAKLAND, CA, 94605","-122.12386,37.75519",125011839,"L",6,1,409900,NA,-122.12386,37.75519 7 | 6,"111 DONNA WAY, Oakland, CA, 94605","Match","Exact","111 DONNA WAY, OAKLAND, CA, 94605","-122.13217,37.760193",125011738,"L",6,1,409900,NA,-122.13217,37.760193 8 | 5,"380 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","380 ELYSIAN FIELDS DR, OAKLAND, CA, 94605","-122.12819,37.761078",125011702,"L",6,1,409900,NA,-122.12819,37.761078 9 | 4,"4627 GRASS VALLEY RD, Oakland, CA, 94605","Match","Exact","4627 GRASS VALLEY RD, OAKLAND, CA, 94605","-122.12256,37.751083",617284248,"L",6,1,409900,NA,-122.12256,37.751083 10 | 9,"10520 PEBBLE BEACH DR, Oakland, CA, 94605","Match","Exact","10520 PEBBLE BEACH DR, OAKLAND, CA, 94605","-122.12747,37.761383",125011703,"L",6,1,409900,NA,-122.12747,37.761383 11 | 8,"248 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","248 ELYSIAN FIELDS DR, OAKLAND, CA, 94605","-122.1325,37.759678",125011736,"L",6,1,409900,NA,-122.1325,37.759678 12 | 11,"4840 SHETLAND AVE, Oakland, CA, 94605","Match","Exact","4840 SHETLAND AVE, OAKLAND, CA, 94605","-122.12222,37.75289",125011864,"R",6,1,409900,NA,-122.12222,37.75289 13 | 12,"24 KEY CT, Oakland, CA, 94605","Match","Exact","24 KEY CT, OAKLAND, CA, 94605","-122.124565,37.755417",125011835,"R",6,1,409900,NA,-122.124565,37.755417 14 | -------------------------------------------------------------------------------- /output/geocoded_addresses_single_out.csv: -------------------------------------------------------------------------------- 1 | "id","street","city","state","zip","census_format","lat","lon","geoid" 2 | 2,"4728 SCOTIA AVE","Oakland","CA",94605,"street=4728 SCOTIA AVE&city=Oakland&state=CA&zip=94605",37.75488,-122.125046,"06001409900" 3 | 3,"10834 GOLF LINKS RD","Oakland","CA",94605,"street=10834 GOLF LINKS RD&city=Oakland&state=CA&zip=94605",37.75383,-122.126884,"06001409900" 4 | 4,"4627 GRASS VALLEY RD","Oakland","CA",94605,"street=4627 GRASS VALLEY RD&city=Oakland&state=CA&zip=94605",37.751083,-122.12256,"06001409900" 5 | 5,"380 ELYSIAN FIELDS DR","Oakland","CA",94605,"street=380 ELYSIAN FIELDS DR&city=Oakland&state=CA&zip=94605",37.761078,-122.12819,"06001409900" 6 | 6,"111 DONNA WAY","Oakland","CA",94605,"street=111 DONNA WAY&city=Oakland&state=CA&zip=94605",37.760193,-122.13217,"06001409900" 7 | 7,"4855 SCOTIA AVE","Oakland","CA",94605,"street=4855 SCOTIA AVE&city=Oakland&state=CA&zip=94605",37.75519,-122.12386,"06001409900" 8 | 8,"248 ELYSIAN FIELDS DR","Oakland","CA",94605,"street=248 ELYSIAN FIELDS DR&city=Oakland&state=CA&zip=94605",37.759678,-122.1325,"06001409900" 9 | 9,"10520 PEBBLE BEACH DR","Oakland","CA",94605,"street=10520 PEBBLE BEACH DR&city=Oakland&state=CA&zip=94605",37.761383,-122.12747,"06001409900" 10 | 10,"271 ELYSIAN FIELDS DR","Oakland","CA",94605,"street=271 ELYSIAN FIELDS DR&city=Oakland&state=CA&zip=94605",37.759373,-122.13135,"06001409900" 11 | 11,"4840 SHETLAND AVE","Oakland","CA",94605,"street=4840 SHETLAND AVE&city=Oakland&state=CA&zip=94605",37.75289,-122.12222,"06001409900" 12 | 12,"24 KEY CT","Oakland","CA",94605,"street=24 KEY CT&city=Oakland&state=CA&zip=94605",37.755417,-122.124565,"06001409900" 13 | -------------------------------------------------------------------------------- /output/tracts2010.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/output/tracts2010.dbf -------------------------------------------------------------------------------- /output/tracts2010.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /output/tracts2010.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/output/tracts2010.shp -------------------------------------------------------------------------------- /output/tracts2010.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/output/tracts2010.shx -------------------------------------------------------------------------------- /screenshots/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/.DS_Store -------------------------------------------------------------------------------- /screenshots/addresses1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/addresses1.png -------------------------------------------------------------------------------- /screenshots/census_api_key_apply.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/census_api_key_apply.png -------------------------------------------------------------------------------- /screenshots/census_geo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/census_geo.png -------------------------------------------------------------------------------- /screenshots/dual_address_match.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/dual_address_match.png -------------------------------------------------------------------------------- /screenshots/esri_wgs_token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/esri_wgs_token.png -------------------------------------------------------------------------------- /screenshots/fcc_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/fcc_api.png -------------------------------------------------------------------------------- /screenshots/fips_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/fips_code.png -------------------------------------------------------------------------------- /screenshots/geocode_details1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/geocode_details1.png -------------------------------------------------------------------------------- /screenshots/geocoding_details1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/geocoding_details1.png -------------------------------------------------------------------------------- /screenshots/geocoding_details2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/geocoding_details2.png -------------------------------------------------------------------------------- /screenshots/ggmap_geocode_help.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ggmap_geocode_help.png -------------------------------------------------------------------------------- /screenshots/ggmap_plot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ggmap_plot1.png -------------------------------------------------------------------------------- /screenshots/gmap_barrows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/gmap_barrows.png -------------------------------------------------------------------------------- /screenshots/google_limits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/google_limits.png -------------------------------------------------------------------------------- /screenshots/output_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/output_compare.png -------------------------------------------------------------------------------- /screenshots/popdens.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/popdens.png -------------------------------------------------------------------------------- /screenshots/ref_data_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ref_data_quality.png -------------------------------------------------------------------------------- /screenshots/social_explorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/social_explorer.png -------------------------------------------------------------------------------- /screenshots/ydn_boss_placefinder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_boss_placefinder.png -------------------------------------------------------------------------------- /screenshots/ydn_create_application.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_create_application.png -------------------------------------------------------------------------------- /screenshots/ydn_keys.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_keys.png -------------------------------------------------------------------------------- /screenshots/ydn_landing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_landing.png -------------------------------------------------------------------------------- /screenshots/ydn_signup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_signup.png -------------------------------------------------------------------------------- /screenshots/ydn_usage_limits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_usage_limits.png -------------------------------------------------------------------------------- /scripts/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/scripts/.DS_Store -------------------------------------------------------------------------------- /scripts/.Rapp.history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/scripts/.Rapp.history -------------------------------------------------------------------------------- /scripts/draft/tiger_geocoding.R: -------------------------------------------------------------------------------- 1 | # 2 | # Geocoding with Tiger Geocoding Service 3 | # 4 | 5 | #clean environment 6 | rm(list=ls()) 7 | 8 | #Load libraries 9 | library(curl) 10 | 11 | #set working directory 12 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 13 | 14 | # CLI format for CURL 15 | #format of geocoding request for Tiger Geocoder 16 | #curl --form addressFile=@tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 http://geocoding.geo.census.gov/geocoder/geographies/addressbatch 17 | ### or this if saving to file as indicated by -o flag 18 | #curl --form addressFile=@tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 http://geocoding.geo.census.gov/geocoder/geographies/addressbatch 19 | ## -o geocoded_addresses_with_fips.csv 20 | 21 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv" 22 | 23 | tiger_url_prefix <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch" 24 | 25 | tiger_url_options <- "--form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010" 26 | 27 | tiger_url_address_options <- paste0("--form addressFile=@", tiger_input_addressFile) 28 | 29 | tiger_url <- paste(tiger_url_prefix, tiger_url_options, tiger_url_address_options) 30 | 31 | # worked: 32 | # curl --form addressFile=@tiger/tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 http://geocoding.geo.census.gov/geocoder/geographies/addressbatch 33 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch 34 | ##--form addressFile=@tiger/tiger_12addresses_to_geocode.csv 35 | ##--form benchmark=Public_AR_Census2010 36 | ##--form vintage=Census2010_Census2010 37 | 38 | 39 | library(httr) 40 | 41 | ## WORKED - thanks to: http://stackoverflow.com/questions/26611289/curl-post-statement-to-rcurl-or-httr 42 | ## add verbose() to see details of process 43 | ## Using httr I think 44 | geocoded_addresses <- POST(tiger_url_prefix, encode="multipart", 45 | body=list(addressFile=upload_file(tiger_input_addressFile), 46 | benchmark="Public_AR_Census2010", 47 | vintage="Census2010_Census2010" 48 | ) 49 | ) 50 | 51 | #write raw output to file 52 | capture.output(cat(content(geocoded_addresses)), file="test_out.txt") 53 | #read output file in to a data frame (not sure how to do these two in one step) 54 | mylocs <- read.csv("test_out.txt",header=FALSE) 55 | head(mylocs) 56 | 57 | #doh split the lat,long values into two separate columns 58 | mylocs$lon = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1])) 59 | mylocs$lat = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2])) 60 | 61 | -------------------------------------------------------------------------------- /scripts/esri_wgs_geocoding.R: -------------------------------------------------------------------------------- 1 | 2 | # This script provides an example of accessing the ESRI world geocoding service. 3 | 4 | # This code borrows HEAVILY (almost completely) and with thanks from 5 | # Claudia Engel: https://github.com/cengel/ArcGIS_geocoding 6 | # See her repo for more details and other examples 7 | 8 | ################################## 9 | ## Single Line Geocode Function ## 10 | ################################## 11 | # The function takes: 12 | # - one address at a time as one string (SingleLine) 13 | # - token - which you get from developers.arcgis.com 14 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm 15 | # - TRUE/FALSE - allow to return Postal codes if a full street address match cannot be found (default is TRUE) 16 | # 17 | # The function returns: 18 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84 19 | # score - The accuracy of the address match between 0 and 100. 20 | # locName - The component locator used to return a particular match result 21 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U) 22 | # matchAddr - Complete address returned for the geocode request. 23 | # side - The side of the street where an address resides relative to the direction 24 | # of feature digitization 25 | # addressType - The match level for a geocode request. "PointAddress" is typically the 26 | # most spatially accurate match level. "StreetAddress" differs from PointAddress 27 | # because the house number is interpolated from a range of numbers. "StreetName" is similar, 28 | # but without the house number. 29 | 30 | geocode_one <- function (address, token, postal = TRUE){ 31 | require(httr) 32 | 33 | # ESRI geolocator 34 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses" 35 | 36 | # template for SingleLine format 37 | pref <- "{'records':[{'attributes':{'OBJECTID':1,'SingleLine':'" 38 | suff <- "'}}]}" 39 | 40 | # url 41 | url <- URLencode(paste0(gserver, "?addresses=", pref, address, suff, "&token=", token, ifelse(postal, "&f=json", "&f=json&category=Address"))) 42 | 43 | # submit 44 | rawdata <- GET(url) 45 | 46 | # parse JSON and process result 47 | res <- content(rawdata, "parsed", "application/json") 48 | resdf <- with(res$locations[[1]], {data.frame(lon = as.numeric(location$x), 49 | lat = as.numeric(location$y), 50 | score = score, 51 | locName = attributes$Loc_name, 52 | status = attributes$Status, 53 | matchAddr = attributes$Match_addr, 54 | side = attributes$Side, 55 | addressType = attributes$Addr_type)}) 56 | return(resdf) 57 | } 58 | 59 | ####################################### 60 | ## Multi Line Batch Geocode Function ## 61 | ####################################### 62 | # The function takes: 63 | # - ID variable to identify records, must be numeric and should be unique 64 | # - multiple addresses as vectors, separated into: Street, City, State, Zip 65 | # - token - which you get from developers.arcgis.com 66 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm 67 | # 68 | # It can take a maximum of 1000 addresses. If more, it returns an error. 69 | # 70 | # The function returns a data frame with the following fields: 71 | # ID - Result ID can be used to join the output fields in the response to the attributes 72 | # in the original address table. 73 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84 74 | # score - The accuracy of the address match between 0 and 100. 75 | # locName - The component locator used to return a particular match result 76 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U) 77 | # matchAddr - Complete address returned for the geocode request. 78 | # side - The side of the street where an address resides relative to the direction 79 | # of feature digitization 80 | # addressType - The match level for a geocode request. "PointAddress" is typically the 81 | # most spatially accurate match level. "StreetAddress" differs from PointAddress 82 | # because the house number is interpolated from a range of numbers. "StreetName" is similar, 83 | # but without the house number. 84 | 85 | geocode_many<- function(id, street, city, state, zip, token){ 86 | require(httr) 87 | require(rjson) 88 | 89 | # check if we have more than 1000, if so stop. 90 | if (length(id) > 1000){ 91 | print(paste("length is: ", length(id))) 92 | stop("Can only process up to 1000 addresses at a time.")} 93 | 94 | # check if id is numeric 95 | if (!is.numeric(id)) { 96 | stop("id variable needs to be numeric.") 97 | } 98 | 99 | # make data frame 100 | adr_df <- data.frame(OBJECTID = id, 101 | Address = street, 102 | City = city, 103 | State = state, 104 | Zip = zip) 105 | 106 | # make json 107 | tmp_list <- apply(adr_df, 1, function(i) list(attributes = as.list(i))) 108 | 109 | # need to coerce ID back to numeric 110 | tmp_list <- lapply(tmp_list, function(i) { i$attributes$OBJECTID <- as.numeric(i$attributes$OBJECTID); i }) 111 | adr_json <- toJSON(list(records = tmp_list)) 112 | 113 | # Identify the geocoding web service URL 114 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses" 115 | 116 | # submit 117 | req <- POST( 118 | url = gserver, 119 | body = list(addresses = adr_json, f="json", token=token), 120 | encode = "form") 121 | #stop_for_status(req) # error check 122 | 123 | # process and parse 124 | res <- content(req, "parsed", "application/json") 125 | resdfr <- data.frame() 126 | for (i in seq_len(length(res$locations))){ 127 | d <- with(res$locations[[i]], {data.frame(ID = attributes$ResultID, 128 | lon = as.numeric(location$x), 129 | lat = as.numeric(location$y), 130 | score = score, 131 | locName = attributes$Loc_name, 132 | status = attributes$Status, 133 | matchAddr = attributes$Match_addr, 134 | side = attributes$Side, 135 | addressType = attributes$Addr_type)}) 136 | resdfr <- rbind(resdfr, d) 137 | } 138 | return(resdfr) 139 | } 140 | 141 | #-------------------------------------------------------------------------------------- 142 | # Some code to use the above functions 143 | #-------------------------------------------------------------------------------------- 144 | 145 | # set your access token 146 | myToken <- "enter your long ugly ESRI geocoding access token here" 147 | 148 | # --------------------------- 149 | # GEOCODE A SINGLE ADDRESS 150 | # --------------------------- 151 | # geocode_output <-geocode_one("1600 Pennsylvania Avenue NW, Washington, DC", myToken, postal = TRUE) 152 | 153 | # ----------------------------- 154 | # GEOCODE A BATCH OF ADDRESSES 155 | # ----------------------------- 156 | 157 | # make up a data frame with some addresses: 158 | adr_df <- data.frame( 159 | ID = 1:3, 160 | street = c('450 Serra Mall', '1600 Amphitheatre Pkwy', '1355 Market Street Suite 900'), 161 | city = c('Stanford', 'Mountain View', 'San Francisco'), 162 | state = 'CA', 163 | zip = c('94305', '94043', '94103')) 164 | 165 | 166 | # Batch geocode your dataframe of addresses with the following function 167 | #adr_gc <- geocode_many(adr_df$ID, adr_df$street, adr_df$city, adr_df$state, adr_df$zip, myToken) 168 | 169 | # join back with original data 170 | #merge(adr_df, adr_gc, by = "ID", all.x = T) -------------------------------------------------------------------------------- /scripts/fcc_latlon2fips.R: -------------------------------------------------------------------------------- 1 | # FCC's Census Block Conversions API 2 | # Get Block FIPS for lat/lon 3 | ## After: https://gist.githubusercontent.com/ramhiser/f09a71d96a4dec80994c/raw/d3e1d9fc1e7f38b2a402eee3237221fa9a47d1da/latlong2fips.r 4 | 5 | latlon2fips <- function(latitude, longitude) { 6 | url <- "https://geo.fcc.gov/api/census/block/find?latitude=%f&longitude=%f&showall=true&format=json" 7 | url <- sprintf(url, latitude, longitude) 8 | json <- RCurl::getURL(url) 9 | json <- RJSONIO::fromJSON(json) 10 | as.character(json$Block['FIPS']) #Block FIPS includes state, county, tract & blockgroup FIPS 11 | 12 | } -------------------------------------------------------------------------------- /scripts/google_geocoding_ggmap.R: -------------------------------------------------------------------------------- 1 | # 2 | # Using GGMAP to geocode with the Google Geocoding API 3 | # last updated 08/16/2018 4 | # 5 | # Notes: 6 | # 1. Read the docs at: https://developers.google.com/maps/documentation/geocoding/start 7 | # 2. As of July 2018 you may need to register for a new Google Geocoding API key 8 | # and associate it with a credit card. 9 | # The documentation indicates you get $200 of free Google API access per month. 10 | # That would be 40,000 free geocodes per month if that were the only thing you used it for. 11 | # So - use with care, protect your API keys so others don't use them. 12 | # 13 | 14 | library(ggmap) 15 | 16 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding") 17 | 18 | #mykey <- "AIzaSyxxxxxxxxxxxxxxxxxxxxxxxxxOQyOFWrTw" 19 | 20 | register_google(key=mykey) 21 | 22 | # Geocode a city 23 | geocode("San Francisco, CA") 24 | 25 | # Geocode a state 26 | geocode("California") 27 | 28 | # Geocode a landmark 29 | Geocode("Golden Gate Bridge") 30 | 31 | # Reverse Geocode 32 | revgeocode(c(-122.4194,37.77493), output="more") 33 | 34 | # Geocode a data frame of addresses 35 | address_data <- read.csv("address_data/formatted/oak_liq_w_ids_types_headers.csv") 36 | 37 | # Full addres format: 100 Bolyston St, Boston, MA 01952 38 | address_data$full_address <- paste0(address_data$street, ", " , address_data$city, ", " , address_data$state, " ", address_data$zip) 39 | 40 | #?geocode 41 | # See the google documentation to interpret all of the results 42 | geocoded_output <- geocode(address_data$full_address, output = "more", source = "google", key=mykey) 43 | 44 | 45 | -------------------------------------------------------------------------------- /scripts/google_geocoding_ggmap_v2.R: -------------------------------------------------------------------------------- 1 | # NOTE YOU MUST HAVE CURRENT DEV VERSION OF GGMAP 2 | # Install updated version of ggmaps 3 | # if(!requireNamespace("devtools")) install.packages("devtools") 4 | # devtools::install_github("dkahle/ggmap", ref = "tidyup") 5 | 6 | library(ggmap) 7 | library(purrr) 8 | setwd("./") 9 | 10 | # Replace with your google maps API key - don't share!!! 11 | register_google("YOUR_GOOGLE_API_KEY_HERE") 12 | 13 | # Test geocoding 14 | geocode("san francisco, ca", output="latlona") 15 | 16 | # Readin sample data 17 | # Oakland Liquor store subset 18 | # oak_liquor_stores.csv 19 | # Header looks like this: 20 | # id name street city state zip type 21 | sites <- read.csv("oakland_liquor_stores.csv", header = T, stringsAsFactors = F, strip.white = T) 22 | 23 | head(sites) 24 | 25 | # Create a single column version of the full address 26 | sites$addr <- paste0(sites$street, " ", sites$city, " ",sites$state, " ", sites$zip) 27 | 28 | ################################################## 29 | # Simple ggmap geocoding 30 | ################################################## 31 | # This is the easiest way but it bombs on bad addresses 32 | # and you lose all previous geocodes 33 | # Uncomment to use! 34 | ## UNCOMMENT BEGIN BELOW 35 | #geocoded_output_df <- geocode(sites$addr, output = "latlona") 36 | # 37 | #rename the columns 38 | #colnames(geocoded_output_df) <-c("lon","lat","google_address") 39 | # 40 | # Append columns 41 | #new_df <- cbind(sites,geocoded_output_df) 42 | ### UNCOMMENT END ABOVE 43 | 44 | ################################################## 45 | # ggmap geocoding - with error handling 46 | ################################################## 47 | geocode_many <- function(id, addr) { 48 | # Function to iterate over and geocode a set of addresses and ids with google via ggmap package 49 | # that will return lat=NA, lon=NA, address = "not found" 50 | # if google can't find input address 51 | # returns a data frame of geocoded addresses 52 | 53 | # Create empty data frame for results 54 | results_df <- data.frame() 55 | 56 | for (i in seq_len(length(addr))) { 57 | print(addr[i]) 58 | 59 | x<- geocode(addr[i], output="latlona") 60 | 61 | if (is.na(x$lat)) { 62 | x$lat <- NA 63 | x$lon <- NA 64 | x$address <- "not found" 65 | } 66 | 67 | temp_df <- data.frame( 68 | ID = id[i], 69 | lat = x$lat, 70 | lon = x$lon, 71 | google_address = x$address 72 | ) 73 | 74 | results_df <- rbind(results_df, temp_df) 75 | } 76 | 77 | return(results_df) 78 | } 79 | 80 | ######################################################################## 81 | # Test function - assumes you have an id field with the column label id 82 | ######################################################################## 83 | # test the function 84 | sites2 <- head(sites, 10) #take 10 sites 85 | the_geocodes <- geocode_many(sites2$id, sites2$addr) 86 | sites2 <- merge(sites2, the_geocodes, by.x="id", by.y = "ID", all.x = T) 87 | head(sites2) 88 | 89 | ################################################################# 90 | # Geocode ALL DATA - you can geocode 2,000 addr per day for free 91 | ################################################################# 92 | # geocode the data - **Assumes unique id for each row - in a column labeled id** 93 | the_geocodes <- geocode_many(sites$id, sites$addr) 94 | 95 | # merge geocoded output with input data 96 | sites <- merge(sites, the_geocodes, by.x="id", by.y = "ID", all.x = T) 97 | 98 | head(sites) 99 | 100 | ################################# 101 | ## Add the FIPS code to each row 102 | ################################## 103 | 104 | latlon2fips <- function(latitude, longitude) { 105 | # Return a 15 digit Census Geo identifier (geoid) 106 | # like this: "060650422121006" 107 | # 06 = state is first two digits 108 | # 065 = county digits 3 - 5 109 | # 42212 = census tract digits 6-11 110 | # 10 = block group digits 12-13 111 | # 06 = block digits 14-15 112 | fips <- "" 113 | if(is.na(latitude) | is.na(longitude)) { #minor validity checking 114 | return(fips) 115 | } else { 116 | url <- "https://geo.fcc.gov/api/census/block/find?latitude=%f&longitude=%f&showall=true&format=json" 117 | url <- sprintf(url, latitude, longitude) 118 | json <- RCurl::getURL(url) 119 | json <- RJSONIO::fromJSON(json) 120 | #print(json) 121 | fips <- as.character(json$Block['FIPS']) #Block FIPS includes state, county, tract & blockgroup FIPS 122 | return(fips) 123 | } 124 | } 125 | # test 126 | latlon2fips(NA,NA) 127 | 128 | #sites2$census_geoids <- mapply(latlon2fips,sites2$lat,sites2$lon) 129 | 130 | # Census GEOIDS for each lat/lon pair from FCC 131 | sites$census_geoids <- mapply(latlon2fips,sites$lat,sites$lon) 132 | 133 | head(sites) 134 | # Write results to file 135 | write.csv(sites, file="geocoded_ouput_data.csv", row.names = F) 136 | 137 | # 138 | 139 | #static map with ggmap 140 | # use qmplot to make a scatterplot on a map 141 | qmplot(lon, lat, data = sites, maptype = "toner-lite", color = I("red")) 142 | 143 | -------------------------------------------------------------------------------- /scripts/oakland_liquor_stores.csv: -------------------------------------------------------------------------------- 1 | id,name,street,city,state,zip,type 2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606,p 3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602,p 4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607,p 5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606,p 6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619,p 7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607,c 8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601,p 9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621,m 10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607,p 11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621,p 12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611,m 13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607,m 14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607,p 15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609,m 16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610,p 17 | 16,Happy Time,1647 8th St,Oakland,CA,94607,p 18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609,m 19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610,p 20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621,p 21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606,p -------------------------------------------------------------------------------- /scripts/older_scripts/census_geocoding_batch.R: -------------------------------------------------------------------------------- 1 | # 2 | # Batch Geocoding with the US Census Geocoding Service 3 | # 4 | # pattyf@berkeley.edu, 05/2/2016 5 | # 6 | ## Documentation: 7 | # http://geocoding.geo.census.gov/ 8 | # https://www.census.gov/geo/maps-data/data/geocoder.html 9 | # http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf 10 | 11 | #Load libraries 12 | library(httr) # to submit geocoding request 13 | library(ggplot2) # to plot output 14 | library(ggmap) # to plot output 15 | library(leaflet) # for interactive plotting 16 | library(stringr) # to format strings 17 | 18 | #clean environment 19 | rm(list=ls()) 20 | 21 | #set working directory 22 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 23 | 24 | # our file of addresses that need to be geocoded 25 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv" 26 | 27 | # the output file we will create 28 | geocoded_output_file <- "geocoded_addresses_out.csv" 29 | 30 | # The census geocoder can take as input a file of addresses to be geocode. 31 | # This file can contain up to 1000 addresses. 32 | # The census geocoder does not want column names in the file to be geocoded, 33 | # but we want to add them when we read the data into R to make sense of the data. 34 | # 35 | # For info on the correct format for submitting a file of addresses see: 36 | # https://www.census.gov/geo/maps-data/data/geocoder.html 37 | # Five columns - No headers, comma separated EVEN IF DATA NOT AVAILABLE 38 | # Unique ID, house number and street name, city, state, zipcode 39 | # Two valid examples: 40 | #1, 1600 Pennsylvania Ave NW, Washington, DC, 41 | #2, 1600 Pennsylvania Ave NW,,,20502 42 | 43 | # Read in the address that we will geocode 44 | # note in this example we are adding column names but they are not in the file 45 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip')) 46 | 47 | # Look at the data 48 | head(addresses_to_geocode) 49 | 50 | # How many addresses? 51 | num_addresses <- nrow(addresses_to_geocode) 52 | 53 | # Now that we have looked at the data we are ready to geocode. 54 | # First, remove the address data object 55 | rm(addresses_to_geocode) 56 | 57 | 58 | get_geocoded_addresses <- function(file_of_addresses) { 59 | # Create a function to submit a file of addresses to the census geocoder. 60 | 61 | # Identify the URL to which we will submit the geocoding request 62 | tiger_url <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch" 63 | 64 | # Make the request to the Census geocoding service 65 | # and store the results in the geocoded_addresses data frame 66 | # 67 | # The important parameters here are benchmark and vintage. 68 | # You can read about these in: http://www2.census.gov/geo/pdfs/education/brochures/CensusGeocoder.pdf 69 | # Or at: http://geocoding.geo.census.gov/ 70 | # The benchmark is the date the data were last updated and the vintage is the year of the census data product it links to. 71 | # below we are querying the street database with the most current data for linking to the 2014 ACS data 72 | geocoded_addresses <- POST(tiger_url, encode="multipart", 73 | body=list(addressFile=upload_file(file_of_addresses), 74 | benchmark="Public_AR_Current", 75 | vintage="ACS2014_Current" 76 | ) 77 | ) 78 | 79 | 80 | # We need to reformat the data that was returned by the Census Geocoder 81 | # First, create a temporary file to store the geocoded address data # create temp file 82 | mytempfile <- tempfile() 83 | 84 | #Write raw geocoded output to tempfile 85 | capture.output(cat(content(geocoded_addresses)), file=mytempfile) 86 | 87 | # Relable the output column names 88 | mycols <- c("id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side", "state_fips", "county_fips","tract_fips", "block_fips") 89 | 90 | # Read the data back into a data frame from the temp file and use the new column names 91 | mylocs <- read.csv(mytempfile,header=FALSE, col.names = mycols) 92 | 93 | # Delete that temp file 94 | unlink(mytempfile) 95 | 96 | # The latitude and longitude coordinates for the geocoded addresses are in one column. 97 | # Split the lat,long values into two separate columns: 98 | mylocs$lon = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1])) 99 | mylocs$lat = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2])) 100 | 101 | # Make sure the lat and lon values are numeric 102 | mylocs$lon <- as.numeric(mylocs$lon) 103 | mylocs$lat <- as.numeric(mylocs$lat) 104 | 105 | # Finally, save the geocoded addresses to a file 106 | if (processed_rows == 0) { 107 | # If we only processed one file or the first of many, create a new file 108 | # create and write to the file 109 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE) 110 | } else { 111 | # Append to the file 112 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE, append=TRUE) 113 | } 114 | 115 | # Return the data frame of geocoded addresses 116 | return(mylocs) 117 | } 118 | 119 | # some counters to keep track of the number of addresses we need to process 120 | # we can only batch geocode 1000 addresses at a time 121 | read_rows <- 1000 122 | processed_rows <- 0 123 | 124 | # Now that we have our function to submit addresses to the Census geocoder we can proceed 125 | if (num_addresses < 1000) { 126 | # If the number of addresses to geocode is less than 1000, just go ahead and geocode them 127 | geocoded_df <- get_geocoded_addresses(tiger_input_addressFile) 128 | 129 | } else { 130 | # Process 1000 addresses at a time 131 | # By reading in up to 1000 addresses to geocode from our address file 132 | # saving them to a file, geocoding that file 133 | # then write the results to our master geoceded addresses file 134 | 135 | while (processed_rows < num_addresses) { 136 | # if we still have addresses to geocode 137 | # read in the next 1000 from the file of addresses to be geocoded 138 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, nrows=read_rows, skip=processed_rows) 139 | 140 | # create a temporary file to hold the up to 1000 addresses to geocode 141 | temp_infile <- tempfile() 142 | 143 | # Save the up to 1000 addresses to geocode to a file 144 | write.csv(addresses_to_geocode,file=temp_infile, row.names=FALSE, col.names = FALSE) 145 | 146 | # geocode the batch of 1000 addresses in the temp file 147 | geocoded_df <- get_geocoded_addresses(temp_infile) 148 | 149 | # delete the temp file 150 | unlink(temp_infile) 151 | 152 | # increment the counters to see if there are more addresses to geocode 153 | processed_rows <- processed_rows + read_rows 154 | } 155 | } 156 | 157 | # We have now geocoded all of the addresses and saved them to a file. 158 | # Let's read in the file of geocoded address 159 | # and Plot them on a map using ggmap 160 | # read in geocoded addresses 161 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE) 162 | head(geocoded_results) # take a look at the results 163 | 164 | map <- get_map(location=c(lon=mean(geocoded_results$lon),lat=mean(geocoded_results$lat)), zoom=15) 165 | ggmap(map) + 166 | geom_point(aes(x = lon, y = lat), size = 4, col="red", data = geocoded_results) 167 | 168 | # save the map image to a file 169 | ggsave("mymap.png") 170 | 171 | # Now create an interactive map with Leaflet 172 | map1 <- leaflet() %>% addTiles() %>% 173 | addCircleMarkers(data = geocoded_results, lng = ~lon, 174 | lat = ~lat, radius = 5, stroke=F, 175 | popup = paste("Geocoded Address:
", geocoded_results$matched_address), 176 | color = "red", 177 | fillOpacity = 0.7) 178 | 179 | map1 # view it 180 | 181 | # You can save leaflet map to html file 182 | # So that you can open and view it anytime 183 | library(htmlwidgets) 184 | saveWidget(map1, file="map1.html", selfcontained=FALSE) 185 | 186 | -------------------------------------------------------------------------------- /scripts/older_scripts/census_geocoding_batch_v2.R: -------------------------------------------------------------------------------- 1 | # 2 | # Batch Geocoding with the US Census Geocoding Service 3 | # 4 | # pattyf@berkeley.edu, 05/2/2016 5 | # 6 | ## Documentation: 7 | # http://geocoding.geo.census.gov/ 8 | # https://www.census.gov/geo/maps-data/data/geocoder.html 9 | # http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf 10 | 11 | # TESTING 12 | # Little test - 10 records 13 | # census_batch_geocode('address_data/sample/sample_10_addresses.csv', id_col='apn', street_col='pstreet_addr', city_col='pcity', state_col='pstate',zip_col='pzip') 14 | # Medium test - 2.5K records 15 | # census_batch_geocode('address_data/sample/sample_2500_addresses.csv', id_col='apn', street_col='pstreet_addr', city_col='pcity', state_col='pstate',zip_col='pzip') 16 | # Big test - 5K records 17 | # census_batch_geocode('address_data/sample/sample_5k_addresses.csv', id_col='apn', street_col='pstreet_addr', city_col='pcity', state_col='pstate',zip_col='pzip') 18 | 19 | # TODO 20 | # 21 | # When more than 1k records, the counting and subsetting is off by 1 22 | # 23 | 24 | #Load libraries 25 | library(httr) # to submit geocoding request 26 | library(ggplot2) # to plot output 27 | library(ggmap) # to plot output 28 | library(leaflet) # for interactive plotting 29 | library(stringr) # to format strings 30 | library(htmlwidgets) 31 | 32 | #clean environment 33 | rm(list=ls()) 34 | 35 | #set working directory 36 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 37 | 38 | get_geocoded_addresses <- function(file_of_addresses, benchmark="Public_AR_Current", vintage="ACS2014_Current") { 39 | # Function to submit a file of addresses to the census geocoder. 40 | 41 | # Identify the URL to which we will submit the geocoding request 42 | tiger_url <- "https://geocoding.geo.census.gov/geocoder/geographies/addressbatch" 43 | 44 | # Make the request to the Census geocoding service 45 | # and store the results in the geocoded_addresses data frame 46 | # 47 | # The important parameters here are benchmark and vintage. 48 | # You can read about these in: http://www2.census.gov/geo/pdfs/education/brochures/CensusGeocoder.pdf 49 | # Or at: http://geocoding.geo.census.gov/ 50 | # The benchmark is the date the data were last updated and the vintage is the year of the census data product it links to. 51 | # below we are querying the street database with the most current data for linking to the 2014 ACS data 52 | geocoded_addresses <- POST(tiger_url, encode="multipart", body=list(addressFile=upload_file(file_of_addresses), benchmark="Public_AR_Current", vintage="ACS2014_Current")) 53 | 54 | if (geocoded_addresses$status_code == 200) { 55 | # We got a success status code from census api 56 | print("Successful return from census geocoder.") 57 | 58 | # We need to reformat the data that was returned by the Census Geocoder 59 | # First, create a temporary file to store the geocoded address data # create temp file 60 | mytempfile <- tempfile() 61 | 62 | #Write raw geocoded output to tempfile 63 | capture.output(cat(content(geocoded_addresses)), file=mytempfile) 64 | 65 | # Relabel the output column names - these are from the census api 66 | mycols <- c("id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side", "state_fips", "county_fips","tract_fips", "block_fips") 67 | 68 | # Read the data back into a data frame from the temp file and use the new column names 69 | mylocs <- read.csv(mytempfile,header=FALSE, col.names = mycols) 70 | 71 | # Delete that temp file 72 | unlink(mytempfile) 73 | 74 | # The latitude and longitude coordinates for the geocoded addresses are in one column. 75 | # Split the lat,long values into two separate columns: 76 | mylocs$lon = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1])) 77 | mylocs$lat = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2])) 78 | 79 | # Make sure the lat and lon values are numeric 80 | mylocs$lon <- as.numeric(mylocs$lon) 81 | mylocs$lat <- as.numeric(mylocs$lat) 82 | 83 | # Return a data frame of geocoded addresses 84 | return(mylocs) 85 | 86 | } else { 87 | print(paste("ERROR: problem with the census geocoding service, status code:", geocoded_addresses$status_code)) 88 | return(0) 89 | } 90 | } 91 | 92 | 93 | census_batch_geocode <-function (infile, id_col='id', street_col='street', city_col='city', state_col='state', zip_col='zip', header_row=TRUE) { 94 | in_addresses <- read.csv(infile, stringsAsFactors = FALSE) 95 | outfile = sub('.csv','_geocoded.csv', infile) 96 | 97 | address_cols <- c(id_col, street_col, city_col, state_col, zip_col) 98 | in_cols <- colnames(in_addresses) 99 | 100 | if (!all(address_cols %in% in_cols)) { 101 | stop('EXITING: The named address columns are not in input file.') 102 | } 103 | addresses_to_geocode <- in_addresses[,address_cols] 104 | #head(addresses_to_geocode) #debug 105 | 106 | # The census geocoder can take as input a file of addresses to be geocode. 107 | # This file can contain up to 1000 addresses. 108 | # The census geocoder does not want column names in the file to be geocoded, 109 | # but we want to add them when we read the data into R to make sense of the data. 110 | # 111 | # For info on the correct format for submitting a file of addresses see: 112 | # https://www.census.gov/geo/maps-data/data/geocoder.html 113 | # Five columns - No headers, comma separated EVEN IF DATA NOT AVAILABLE 114 | # Unique ID, house number and street name, city, state, zipcode 115 | # Two valid examples: 116 | #1, 1600 Pennsylvania Ave NW, Washington, DC, 117 | #2, 1600 Pennsylvania Ave NW,,,20502 118 | 119 | # How many addresses? 120 | num_addresses <- nrow(addresses_to_geocode) 121 | print(paste0('About to geocode ', num_addresses, " addresses...")) 122 | 123 | # some counters to keep track of the number of addresses we need to process 124 | # we can only batch geocode 1000 addresses at a time 125 | read_rows <- 1000 126 | processed_rows <- 0 127 | 128 | # Now that we have our function to submit addresses to the Census geocoder we can proceed 129 | if (num_addresses < 1000) { 130 | # If the number of addresses to geocode is less than 1000, just go ahead and geocode them 131 | print("Processing all records in one file submission.") 132 | 133 | # create a temporary file to hold the cleaned addresses to geocode 134 | temp_infile <- tempfile(fileext = ".csv") 135 | 136 | # Save the up to 1000 addresses to geocode to a file 137 | write.table( addresses_to_geocode, file=temp_infile, sep=",", quote=FALSE, row.names=FALSE, col.names=FALSE) 138 | 139 | # geocode the batch of 1000 addresses in the temp file 140 | geocoded_df <- get_geocoded_addresses(temp_infile) 141 | 142 | # delete the temp file we used for geocoding 143 | unlink(temp_infile) 144 | 145 | if (nrow(geocoded_df) < 2) { 146 | print("Problem! Unable to geocode.") 147 | # THis debug statement needs to be improved! 148 | 149 | } else { 150 | # Save the geocoded addresses to a file 151 | print(paste0('Saving geocoded data to: ', outfile)) 152 | write.csv(geocoded_df,file=outfile, row.names=FALSE) 153 | } 154 | 155 | } else { 156 | # Process 1000 addresses at a time 157 | # By reading in up to 1000 addresses to geocode from our address file 158 | # saving them to a file, geocoding that file 159 | # then write the results to our master geoceded addresses file 160 | 161 | while (processed_rows < num_addresses) { 162 | fetch_rows <- processed_rows + read_rows 163 | print(paste0("processing rows [", processed_rows,"] to [", fetch_rows,"].. This could take 2 - 7 minutes.")) 164 | 165 | # if we still have addresses to geocode 166 | # read in the next 1000 from the file of addresses to be geocoded 167 | address_subset <- addresses_to_geocode[processed_rows:fetch_rows,] 168 | # create a temporary file to hold the up to 1000 addresses to geocode 169 | temp_infile <- tempfile(fileext = ".csv") 170 | 171 | # Save the up to 1000 addresses to geocode to a file 172 | write.table( address_subset, file=temp_infile, sep=",", quote=FALSE, row.names = FALSE, col.names=FALSE) 173 | 174 | # geocode the batch of 1000 addresses in the temp file 175 | geocoded_df <- get_geocoded_addresses(temp_infile) 176 | 177 | print(paste0("Number of records geocoded: ", nrow(geocoded_df))) 178 | 179 | # delete the temp file 180 | unlink(temp_infile) 181 | 182 | if (geocoded_df == 0) { 183 | print("Problem! Unable to geocode.") 184 | 185 | } else { 186 | # Save the geocoded addresses to a file 187 | 188 | if (processed_rows == 0) { 189 | # If we only processed one file or the first of many, create a new file 190 | # and write geocoded addresses to it 191 | print(paste0('Saving geocoded data to: ', outfile)) 192 | write.csv(geocoded_df,file=outfile, row.names=FALSE) 193 | 194 | } else { 195 | # Append to the file 196 | print(paste0('Appending geocoded data to: ', outfile)) 197 | write.table(geocoded_df,file=outfile, sep=",", row.names=FALSE, append=TRUE) 198 | } 199 | 200 | # increment the counters to see if there are more addresses to geocode 201 | processed_rows <- processed_rows + read_rows 202 | } 203 | } 204 | } 205 | print(paste0("Done geocoding ", infile, "- check in address count == out address count!")) 206 | } 207 | ############################################################################### 208 | 209 | imap_census_geocodes <- function(infile, save_map=FALSE){ 210 | 211 | # We have now geocoded all of the addresses and saved them to a file. 212 | # Let's read in the file of geocoded address 213 | # and Plot them on a map using ggmap 214 | # read in geocoded addresses 215 | geocoded_results <- read.csv(infile,stringsAsFactors = FALSE) 216 | head(geocoded_results) # take a look at the results 217 | 218 | # Now create an interactive map with Leaflet 219 | map1 <- leaflet() %>% addTiles() %>% 220 | addCircleMarkers(data = geocoded_results, lng = ~lon, 221 | lat = ~lat, radius = 5, stroke=F, 222 | popup = paste("Geocoded Address:
", geocoded_results$matched_address), 223 | color = "red", 224 | fillOpacity = 0.7) 225 | 226 | map1 # view it 227 | 228 | if (save_map == TRUE){ 229 | # You can save leaflet map to html file 230 | # So that you can open and view it anytime 231 | library(htmlwidgets) 232 | saveWidget(map1, file="map1.html", selfcontained=FALSE) 233 | } 234 | return(map1) 235 | } 236 | -------------------------------------------------------------------------------- /scripts/older_scripts/census_geocoding_single_address.R: -------------------------------------------------------------------------------- 1 | # 2 | # Single Address Geocoding with the US Census Geocoding Service 3 | # 4 | # pattyf@berkeley.edu, 05/2/2016 5 | # 6 | ## Documentation: 7 | # http://geocoding.geo.census.gov/ 8 | # https://www.census.gov/geo/maps-data/data/geocoder.html 9 | # http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf 10 | 11 | #clean environment 12 | rm(list=ls()) 13 | 14 | #Load libraries 15 | library(RJSONIO) 16 | library(plyr) 17 | 18 | #set working directory 19 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 20 | 21 | # Identify the URL of the census geocoding service and related parameters 22 | census_prefix <- "http://geocoding.geo.census.gov/geocoder/geographies/address?" 23 | census_suffix <- "&benchmark=Public_AR_Current&vintage=ACS2014_Current&format=json" 24 | # See http://geocoding.geo.census.gov/ for available benchmarks and vintages 25 | # See also: http://www2.census.gov/geo/pdfs/education/brochures/CensusGeocoder.pdf 26 | # for definition of benchmarks and vintages 27 | 28 | # Identify the file of addresses that will be geocoded 29 | census_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv" 30 | 31 | # the output file we will create 32 | geocoded_output_file <- "geocoded_addresses_single_out.csv" 33 | 34 | # let's take a look at the addresses that we will geocode 35 | addresses_to_geocode <- read.csv(census_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip')) 36 | 37 | # get the address in the format needed by the Census API GET call 38 | addresses_to_geocode$census_format <- paste0( 39 | "street=",addresses_to_geocode$street, 40 | "&city=",addresses_to_geocode$city, 41 | "&state=",addresses_to_geocode$state, 42 | "&zip=",addresses_to_geocode$zip 43 | ) 44 | 45 | census_geocode <- function(address) { 46 | 47 | #prepare the address so that it is url request ready 48 | address <- URLencode(address) 49 | 50 | #prepare the full Census Geocoding Request URL 51 | g_address <- paste0(census_prefix, address,census_suffix) 52 | 53 | # create an empty data frame to return 54 | answer <- data.frame(lat=NA, lon=NA, geoid=NA) 55 | 56 | out <- tryCatch( 57 | { 58 | # HTTP Requests can hang, fail, etc so we wrap 59 | # it in a tryCatch() function to handle problematic 60 | # addresses and keep on going... important with lots of addresses 61 | # 'tryCatch()' will return the last evaluated expression 62 | # in case the "try" part was completed successfully 63 | 64 | message("Trying to Geocode with Census API") 65 | 66 | fromJSON(g_address) # result will be returned if no error 67 | # The return value of `readLines()` is the actual value 68 | # that will be returned in case there is no condition 69 | # (e.g. warning or error). 70 | # You don't need to state the return value via `return()` as code 71 | # in the "try" part is not wrapped insided a function (unlike that 72 | # for the condition handlers for warnings and error below) 73 | }, 74 | error=function(cond) { 75 | message(paste("Address URL does not seem to exist:", g_address)) 76 | message("Here's the original error message:") 77 | message(cond) 78 | # The return value in case of error is the NA data frame row 79 | return(answer) 80 | }, 81 | warning=function(cond) { 82 | message(paste("Address URL caused a warning:", g_address)) 83 | message("Here's the original warning message:") 84 | message(cond) 85 | # The return value in case of error is the NA data frame row 86 | return(answer) 87 | }, 88 | finally={ 89 | # NOTE: 90 | # Here goes everything that should be executed at the end, 91 | # regardless of success or error. 92 | # If you want more than one expression to be executed, then you 93 | # need to wrap them in curly brackets ({...}); otherwise you could 94 | # just have written 'finally=' 95 | message(paste("Processed Address:", g_address)) 96 | } 97 | ) 98 | if (length(out$result$addressMatches) > 0) { 99 | # if we got a geocoded response 100 | # update the answer data frame 101 | answer$lon <- out$result$addressMatches[[1]]$coordinates[['x']] 102 | answer$lat <- out$result$addressMatches[[1]]$coordinates[['y']] 103 | answer$geoid <-out$result$addressMatches[[1]]$geographies$`Census Tracts`[[1]]$GEOID 104 | } 105 | return(answer) 106 | 107 | } 108 | 109 | ## apply the geocoding function to the CSV file 110 | #initialise a dataframe to hold the results 111 | geocoded.df <- data.frame() 112 | 113 | geocoded.df <- ldply(addresses_to_geocode$census_format,function(x) census_geocode(x)) 114 | 115 | #append the answer to the results file. 116 | geocoded_addresses <- cbind(addresses_to_geocode, geocoded.df) 117 | 118 | # take a look at our geocoded output 119 | head(geocoded_addresses) 120 | 121 | # Save geocoded addresses to a file 122 | write.csv(geocoded_addresses, file=geocoded_output_file, row.names=FALSE) 123 | -------------------------------------------------------------------------------- /scripts/older_scripts/esri_wgs_geocoding.R: -------------------------------------------------------------------------------- 1 | 2 | # This script provides an example of accessing the ESRI world geocoding service. 3 | 4 | # This code borrows HEAVILY (almost completely) and with thanks from 5 | # Claudia Engel: https://github.com/cengel/ArcGIS_geocoding 6 | # See her repo for more details and other examples 7 | 8 | ################################## 9 | ## Single Line Geocode Function ## 10 | ################################## 11 | # The function takes: 12 | # - one address at a time as one string (SingleLine) 13 | # - token - which you get from developers.arcgis.com 14 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm 15 | # - TRUE/FALSE - allow to return Postal codes if a full street address match cannot be found (default is TRUE) 16 | # 17 | # The function returns: 18 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84 19 | # score - The accuracy of the address match between 0 and 100. 20 | # locName - The component locator used to return a particular match result 21 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U) 22 | # matchAddr - Complete address returned for the geocode request. 23 | # side - The side of the street where an address resides relative to the direction 24 | # of feature digitization 25 | # addressType - The match level for a geocode request. "PointAddress" is typically the 26 | # most spatially accurate match level. "StreetAddress" differs from PointAddress 27 | # because the house number is interpolated from a range of numbers. "StreetName" is similar, 28 | # but without the house number. 29 | 30 | geocodeSL <- function (address, token, postal = TRUE){ 31 | require(httr) 32 | 33 | # ESRI geolocator 34 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses" 35 | 36 | # template for SingleLine format 37 | pref <- "{'records':[{'attributes':{'OBJECTID':1,'SingleLine':'" 38 | suff <- "'}}]}" 39 | 40 | # url 41 | url <- URLencode(paste0(gserver, "?addresses=", pref, address, suff, "&token=", token, ifelse(postal, "&f=json", "&f=json&category=Address"))) 42 | 43 | # submit 44 | rawdata <- GET(url) 45 | 46 | # parse JSON and process result 47 | res <- content(rawdata, "parsed", "application/json") 48 | resdf <- with(res$locations[[1]], {data.frame(lon = as.numeric(location$x), 49 | lat = as.numeric(location$y), 50 | score = score, 51 | locName = attributes$Loc_name, 52 | status = attributes$Status, 53 | matchAddr = attributes$Match_addr, 54 | side = attributes$Side, 55 | addressType = attributes$Addr_type)}) 56 | return(resdf) 57 | } 58 | 59 | ####################################### 60 | ## Multi Line Batch Geocode Function ## 61 | ####################################### 62 | # The function takes: 63 | # - ID variable to identify records, must be numeric and should be unique 64 | # - multiple addresses as vectors, separated into: Street, City, State, Zip 65 | # - token - which you get from developers.arcgis.com 66 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm 67 | # 68 | # It can take a maximum of 1000 addresses. If more, it returns an error. 69 | # 70 | # The function returns a data frame with the following fields: 71 | # ID - Result ID can be used to join the output fields in the response to the attributes 72 | # in the original address table. 73 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84 74 | # score - The accuracy of the address match between 0 and 100. 75 | # locName - The component locator used to return a particular match result 76 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U) 77 | # matchAddr - Complete address returned for the geocode request. 78 | # side - The side of the street where an address resides relative to the direction 79 | # of feature digitization 80 | # addressType - The match level for a geocode request. "PointAddress" is typically the 81 | # most spatially accurate match level. "StreetAddress" differs from PointAddress 82 | # because the house number is interpolated from a range of numbers. "StreetName" is similar, 83 | # but without the house number. 84 | 85 | geocodeML_batch <- function(id, street, city, state, zip, token){ 86 | require(httr) 87 | require(rjson) 88 | 89 | # check if we have more than 1000, if so stop. 90 | if (length(id) > 1000){ 91 | print(paste("length is: ", length(id))) 92 | stop("Can only process up to 1000 addresses at a time.")} 93 | 94 | # check if id is numeric 95 | if (!is.numeric(id)) { 96 | stop("id variable needs to be numeric.") 97 | } 98 | 99 | # make data frame 100 | adr_df <- data.frame(OBJECTID = id, 101 | Street = street, 102 | City = city, 103 | State = state, 104 | Zip = zip) 105 | 106 | # make json 107 | tmp_list <- apply(adr_df, 1, function(i) list(attributes = as.list(i))) 108 | 109 | # need to coerce ID back to numeric 110 | tmp_list <- lapply(tmp_list, function(i) { i$attributes$OBJECTID <- as.numeric(i$attributes$OBJECTID); i }) 111 | adr_json <- toJSON(list(records = tmp_list)) 112 | 113 | # Identify the geocoding web service URL 114 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses" 115 | 116 | # submit 117 | req <- POST( 118 | url = gserver, 119 | body = list(addresses = adr_json, f="json", token=token), 120 | encode = "form") 121 | #stop_for_status(req) # error check 122 | 123 | # process and parse 124 | res <- content(req, "parsed", "application/json") 125 | resdfr <- data.frame() 126 | for (i in seq_len(length(res$locations))){ 127 | d <- with(res$locations[[i]], {data.frame(ID = attributes$ResultID, 128 | lon = as.numeric(location$x), 129 | lat = as.numeric(location$y), 130 | score = score, 131 | locName = attributes$Loc_name, 132 | status = attributes$Status, 133 | matchAddr = attributes$Match_addr, 134 | side = attributes$Side, 135 | addressType = attributes$Addr_type)}) 136 | resdfr <- rbind(resdfr, d) 137 | } 138 | return(resdfr) 139 | } 140 | 141 | #-------------------------------------------------------------------------------------- 142 | # Some code to use the above functions 143 | #-------------------------------------------------------------------------------------- 144 | 145 | # set your access token 146 | myToken <- "enter your long ugly ESRI geocoding access token here" 147 | 148 | # --------------------------- 149 | # GEOCODE A SINGLE ADDRESS 150 | # --------------------------- 151 | geocode_output <-geocodeSL("1600 Pennsylvania Avenue NW, Washington, DC", myToken, postal = TRUE) 152 | 153 | # ----------------------------- 154 | # GEOCODE A BATCH OF ADDRESSES 155 | # ----------------------------- 156 | 157 | # make up a data frame with some addresses: 158 | adr_df <- data.frame( 159 | ID = 1:3, 160 | street = c('450 Serra Mall', '1600 Amphitheatre Pkwy', '1355 Market Street Suite 900'), 161 | city = c('Stanford', 'Mountain View', 'San Francisco'), 162 | state = 'CA', 163 | zip = c('94305', '94043', '94103')) 164 | 165 | 166 | # Batch geocode your dataframe of addresses with the following function 167 | adr_gc <- geocodeML_batch(adr_df$ID, adr_df$street, adr_df$city, adr_df$state, adr_df$zip, myToken) 168 | 169 | # join back with original data 170 | merge(adr_df, adr_gc, by = "ID", all.x = T) -------------------------------------------------------------------------------- /scripts/older_scripts/geocode_it.R: -------------------------------------------------------------------------------- 1 | library(plyr) 2 | library(ggmap) 3 | library(readxl) 4 | library(stringr) 5 | 6 | setwd("~/Documents/Dlab/consults/leora") 7 | 8 | data <- read_excel("doh2.xlsx") 9 | head(data) # take a look 10 | 11 | # We need one address not multipart 12 | data$address_long <- with(data,paste(address, city,"CA", zip, sep=",")) 13 | head(data) 14 | 15 | # Geocode a file of addresses - loaded into data frame 16 | geocoded_output <- geocode(data$address_long, output = "latlona", source = "google") 17 | 18 | # check out any warnings 19 | warnings_out <- warnings() 20 | head(warnings_out) 21 | 22 | # Add output to input data 23 | geocoded_data <- data.frame(data, geocoded_output) 24 | head(geocoded_data) # check it 25 | 26 | # Subset based on what was/was not geocoded 27 | not_geocoded <- subset(geocoded_data, is.na(lat)) 28 | yes_geocoded <- subset(geocoded_data, !is.na(lat)) 29 | nrow(geocoded_data) == (nrow(not_geocoded) + nrow(yes_geocoded)) 30 | 31 | 32 | # Create a map to check geocoded output 33 | mymap <- get_map(location=c(lon=mean(yes_geocoded$lon), lat=mean(yes_geocoded$lat)), zoom=4) 34 | 35 | ggmap(mymap) + 36 | geom_point(aes(x = lon, y = lat), data=yes_geocoded, size = 2, col="red" ) 37 | 38 | #Scaling up to more than 2500 records? 39 | # Google limits free geocoding to 2500 addresses per day 40 | geocodeQueryCheck() #how am I doing? 41 | 42 | # Fix the records that were not geocoded 43 | # because they are out of state 44 | # so remove the ",CA" that we appended above 45 | 46 | not_geocoded$add2 <- gsub(',CA,'," ", not_geocoded$address_long) 47 | 48 | #DF[ , !(names(DF) %in% drops)] 49 | not_geocoded <- not_geocoded[, !(names(not_geocoded) %in% c('lat','lon','address.1'))] 50 | str(not_geocoded) 51 | 52 | # try again 53 | geoout2 <- geocode(not_geocoded$add2, output = "latlona", source = "google") 54 | # check and address any warnings() 55 | 56 | # Add output to input data 57 | geocoded2 <- data.frame(not_geocoded, geoout2) 58 | head(geocoded2) # check it 59 | 60 | # fix bad ones identified via warnings() 61 | # try again 62 | geoout2 <- geocode(not_geocoded$add2, output = "latlona", source = "google") 63 | 64 | 65 | # Add output to input data 66 | geocoded2 <- data.frame(not_geocoded, geoout2) 67 | head(geocoded2) # check it 68 | 69 | nrow(geocoded_data) == (nrow(geocoded2) + nrow(yes_geocoded)) 70 | 71 | # Make sure ncols the same and then combine our outputs 72 | # names(yes_geocoded) 73 | # names(geocoded2) 74 | #"primkey" "address" "zip" "city" "address_long" "add2" "lon" "lat" "address.1" 75 | geocoded3 <- geocoded2[, !(names(geocoded2) %in% c('address_long'))] 76 | #"primkey" "address" "zip" "city" "add2" "lon" "lat" "address.1" 77 | newnames <- c("primkey", "address","zip","city","address_long","lon","lat","address.1") 78 | names(geocoded3) <- newnames 79 | 80 | all_geocodes <- rbind(yes_geocoded, geocoded3) 81 | nrow(all_geocodes) == nrow(data) 82 | # fix the colnames 83 | newnames2 <- c("primkey", "address","zip","city","geocoded_address","lon","lat","google_address") 84 | names(all_geocodes) <- newnames2 85 | 86 | 87 | 88 | #--------------------------------------- 89 | # Get FIPS code to link to census data 90 | #--------------------------------------- 91 | library(RCurl) 92 | library(RJSONIO) 93 | # FCC's Census Block Conversions API 94 | # http://www.fcc.gov/developers/census-block-conversions-api 95 | 96 | latlong2fips <- function(latitude, longitude) { 97 | # After: https://gist.github.com/ramhiser/f09a71d96a4dec80994c 98 | 99 | thecode <- "none" 100 | 101 | if ( !is.numeric(latitude) | !is.numeric(longitude) ) { 102 | return(thecode) 103 | } 104 | if ( is.na(latitude) | is.na(longitude) ) { 105 | return("NANA") 106 | } 107 | url <- "http://data.fcc.gov/api/block/find?format=json&latitude=%f&longitude=%f" 108 | url <- sprintf(url, latitude, longitude) 109 | print(url) 110 | json <- RCurl::getURL(url) 111 | json <- RJSONIO::fromJSON(json) 112 | #{"Block":{"FIPS":"240054114062015"},"County":{"FIPS":"24005","name":"Baltimore"},"State":{"FIPS":"24","code":"MD","name":"Maryland"},"status":"OK","executionTime":"103"} 113 | #print(json$status) 114 | if (json$status == "OK") { 115 | thecode <- as.character(json$Block['FIPS']) # Census 2010 Block FIPS Code 116 | } 117 | return(thecode) 118 | } 119 | 120 | na_geocodes <- subset(all_geocodes, is.na(lat)) 121 | nrow(na_geocodes) 122 | not_na_geocodes <- subset(all_geocodes, !is.na(lat)) 123 | nrow(not_na_geocodes) 124 | nrow(not_na_geocodes) + nrow(na_geocodes) == nrow(all_geocodes) 125 | 126 | #all_geocodes$fips<- mapply(latlong2fips,all_geocodes$lat,all_geocodes$lon) 127 | not_na_geocodes$fips<- mapply(latlong2fips,not_na_geocodes$lat,not_na_geocodes$lon) 128 | 129 | # take a look 130 | head(all_geocodes) 131 | 132 | # add a fips code to the 2 recs without lat/lon 133 | na_geocodes$fips <- "none" 134 | 135 | #combine the two data frames 136 | geocodes_w_fips <- rbind(not_na_geocodes, na_geocodes) 137 | 138 | # FIPS CODES 139 | # digits 1-2: state 140 | # digits 3-5: county 141 | # digits 6-11: tract 142 | # digits 12-13: blockgroup 143 | # digits 14-15: blockgroup block 144 | geocodes_w_fips$fips_tract <- substr(geocodes_w_fips$fips,1,11) 145 | geocodes_w_fips$fips_tract_only <- substr(geocodes_w_fips$fips,6,11) 146 | 147 | # Save geocoded output to a file 148 | # Need to quote output so leading zeros in fips codes preserved 149 | write.table(geocodes_w_fips,file="google_geocoded_output.csv", row.names=FALSE, quote = T, sep=",") 150 | -------------------------------------------------------------------------------- /scripts/older_scripts/getFipsForPoints.R: -------------------------------------------------------------------------------- 1 | ## ############################################################################ 2 | # 3 | # Joining Census FIPS codes to points via point in polygon overlay 4 | # 5 | # Author: Patty Frontiera pfrontiera@berkeley.edu 6 | # Date: 2014_04_15 7 | # Last updated: 2014_04_17 8 | # 9 | # Purpose: 10 | ## This script takes as input a point file and a file of Census Tiger data 11 | ## performs a spatial overlay of the two 12 | ## and adds the Census FIPS code to the point file attribute table 13 | ## Then writes the output to CSV 14 | ## 15 | # #ASSUMPTION: input point data has fields "latitude" and "longitude" 16 | ## 17 | ## This same practice can be done much faster in ArcGIS or PostGIS 18 | ## but this method is fine for < 1 million records 19 | ## 20 | ## This approach will work with on an offline server provided 21 | ## the data, R code and R libraries reside on the server. 22 | ## 23 | ## THIS IS SAMPLE CODE - you will need to make changes! 24 | ## ############################################################################ 25 | 26 | # clean start - remove any objects in the enviroment 27 | rm(list = ls()) 28 | 29 | #LOAD LIBS 30 | require(rgdal) 31 | require(R.utils) 32 | 33 | # ######################################################################################################## 34 | # USER SUPPLIED VALUES 35 | # ######################################################################################################## 36 | 37 | working_dir <- "/Users/pattyf/geocoding/temp" 38 | 39 | point_file <- "/Users/pattyf/geocoding/test_address_points.csv" 40 | # You can download some sample point data from this url: 41 | ## https://gist.githubusercontent.com/pattyf/9091aca4d536e983beea/raw/65b4ca99a215b65cdd7c2406dfbac9749eb897f6/test_address_points 42 | 43 | point_file_delimiter <- "|" # I prefer this delimiter to a comma as address components often contain commas 44 | 45 | point_file_crs <- "4326" # These points use geographic coordinates with the WGS84 datum 46 | # WGS 84 - coordinate reference system (crs) used by most GPS / Google maps etc 47 | ## AKA - spatial reference system or map projection or coordinate system 48 | ## See spatialreference.org - http://spatialreference.org/ref/epsg/4326/ 49 | 50 | # HEY: IMPORTANT 51 | #ASSUMPTION: input point data has fields "latitude" and "longitude" 52 | 53 | ## Census block data - must point to file on your computer 54 | #census_file <- '/Users/pattyf/Gisdata/Census/tabblock2010_06_pophu/tabblock2010_06_pophu.shp' 55 | 56 | ## Census blockgroup data - must point to file on your computer 57 | 58 | #census_file <- '/Users/pattyf/Gisdata/Census/tl_2014_06_tract/tl_2014_06_bg.shp' 59 | 60 | ## Census tract data - must point to file on your computer 61 | census_file <- '/Users/pattyf/Gisdata/Census/tl_2014_06_tract/tl_2014_06_tract.shp' 62 | # CA block-level census data were downloaded from the census website, url below: 63 | ## http://www2.census.gov/geo/tiger/TIGER2014/TABBLOCK/tl_2014_06_tabblock10.zip 64 | ## Could automate the download but adds unneeded complexity to this script 65 | ## THIS IS BIG FILE = 415MB or so 66 | ## You can download a smaller file by downloading larger census geographies 67 | ### eg tracts level data 68 | ## http://www2.census.gov/geo/tiger/TIGER2014/TRACT/tl_2014_06_tract.zip 69 | ### or block group level data 70 | ### http://www2.census.gov/geo/tiger/TIGER2014/BG/tl_2014_06_bg.zip 71 | ## However, if you intersect points with the block level data 72 | ## you get a FIPS CODE that includes the state, county, tract, blockgroup and block id 73 | ## You need to change this file if not doing CA 74 | ## or if you want to change the input remote census data file, eg to smaller file like tracts 75 | ## See http://www2.census.gov for details 76 | ## Note there are several vintages (year versions) for each census products. For 77 | ## tracts, block groups, and blocks these don't change between census - there are only improvements/corrections 78 | ## If you are interested in comparisons over time (eg 2000 - 2010 census) get the harmonized data from NHGIS 79 | 80 | #census_layer <- 'tabblock2010_06_pophu' # The layer is the name of the feature layer within the file 81 | # For shapefiles it is the same as the prefix of the shapefile 82 | 83 | #census_layer <- 'tl_2014_06_bg' #census blockgroup level data 84 | 85 | census_layer <- 'tl_2014_06_tract' #census tract data 86 | 87 | census_crs <- '4269' # US Census Tigerline data use geographic coordintes with the NAD83 datum 88 | # The EPGS code for which is 4269 89 | # See http://spatialreference.org/ref/sr-org/4269/ for details. 90 | 91 | census_geograhpy_type = "tracts" # one of tracts, blocks, or blockgroups 92 | 93 | output_crs <- '4326' #WGS84 94 | ## USE '3310' for CA Teale Albers - See http://spatialreference.org/ref/epsg/3310/ 95 | ## Used for CA state-wide data processing (metric calculations) 96 | ## If the output CRS does not match the census CRS we will 97 | ## transform the data before saving to new file as last step 98 | 99 | out_csv_file <-"point_data_withfips.csv" # The name of the output csv file 100 | # Will be written to working_dir if full path not specified 101 | 102 | out_shapefile_prefix <- "point_data_withfips" 103 | out_shapefile_directory <- "." # The period indicates the current working dir. 104 | # You can specify another directory as needed 105 | 106 | debug <- 1 # We are just testing this script if debug is 1. If running for real, set this to 0 107 | # When debug is 1 we only read in first 50 records from point file 108 | # ######################################################################################################## 109 | 110 | # Load needed libraries 111 | library(sp) 112 | library(rgdal) 113 | library("R.utils") # for file utils, like zip and unzipping files 114 | 115 | # Set working directory for input and output where full path not given 116 | setwd(working_dir) 117 | 118 | # Read in point data 119 | ## In this exampe we have geocoded addresses - 355,054 addresses all in alameda county (would prefer a state sample) 120 | ## Format of these address data points in input file: 121 | ## Inaddress|street_address|street_name|latitude|country_code|fips_county|country_name|country_code3|longitude|region|locality|street_number|confidence| 122 | 123 | if (debug == 1) { 124 | # When debug is 1 only read in first 50 records from point file 125 | point_data<-read.table(point_file,sep=point_file_delimiter, header=T, stringsAsFactors=,nrow=50) 126 | } else { 127 | point_data<-read.table(point_file,sep=point_file_delimiter, header=T, stringsAsFactors=F) 128 | } 129 | 130 | # Convert data frame to a spatialpoints data frame object 131 | coordinates(point_data) =~ longitude+latitude 132 | 133 | # Specify the CRS of the input point data 134 | proj4string(point_data) = CRS(paste0("+init=epsg:",point_file_crs)) 135 | 136 | # Read the census block data into R 137 | census_polys <- readOGR(census_file,census_layer) 138 | 139 | # Specify the CRS of the input census data 140 | proj4string(census_polys) = CRS(paste0("+init=epsg:",census_crs)) # define the projection 141 | 142 | # CRS of both layers must match! 143 | ## If they do not then the point data should be transformed 144 | ## as it is much easier operation on points than polygons 145 | if (point_file_crs != census_crs) { 146 | point_data <- spTransform(point_data,CRS(paste0("+init=epsg:",census_crs))) 147 | } 148 | 149 | # 150 | # Spatial Intersection 151 | ## Get fips code for each address point 152 | ## The block key from this dataset is col 5, which has the name BLOCKID10 (census 2010 block id) 153 | ptm <- proc.time() # Time this operation to get a sense of how it will scale to more points 154 | if (census_geograhpy_type == "blocks") { 155 | point_data$fips_code <- over(point_data,census_polys)$BLOCKID10 156 | } 157 | if (census_geograhpy_type == "tracts") { 158 | point_data$fips_code <- over(point_data,census_polys)$GEOID 159 | } 160 | if (census_geograhpy_type == "blockgroups") { 161 | point_data$fips_code <- over(point_data,census_polys)$GEOID 162 | } 163 | 164 | print(proc.time() - ptm) 165 | 166 | # ############################################### 167 | # Notes on output from testing 168 | # ############################################### 169 | ## It took 18 minutes to intersect ~350,000 address points 170 | ## with census block-level data 171 | ## 172 | ## user system elapsed 173 | ## 1049.953 18.078 1072.092 174 | ## 175 | # How long does this operation take in ArcGIS? 176 | ## on our geocoding server it took only 177 | ## 2 minutes using spatial intersect operation. 178 | # ############################################### 179 | 180 | 181 | # ############################################### 182 | # Transform the data before saving if needed 183 | # ############################################### 184 | if (output_crs != census_crs) { 185 | point_data <- spTransform(point_data,CRS(paste0("+init=epsg:",output_crs))) 186 | } 187 | 188 | 189 | # ############################################### 190 | # Save output to local files 191 | # ############################################### 192 | # 193 | ## as csv 194 | write.csv(point_data@data,out_csv_file,row.names=FALSE) 195 | # 196 | ## as shapefile 197 | ### note that field/col names longer than 8 characters will be truncated! 198 | writeOGR(point_data, out_shapefile_directory, "out_shapefile_prefix", driver="ESRI Shapefile",overwrite_layer=TRUE) -------------------------------------------------------------------------------- /scripts/older_scripts/ggmap_geocoding_examples.R: -------------------------------------------------------------------------------- 1 | # 2 | # Geocoding with GGMAP & the Google Geocoding Service 3 | # examples 4 | # 5 | 6 | #-------------------------------------- 7 | # libraries - 8 | # that may be needed for this tutorial 9 | # and how to install & load them: 10 | #-------------------------------------- 11 | required.pkg <- c("htmlwidgets", "leaflet", "ggmap", "ggplot2","httr","acs","RJSONIO","RCurl","stringr","plyr", "rgeos","rgdal", "sp") 12 | pkgs.not.installed <- required.pkg[!sapply(required.pkg, function(p) require(p, character.only=T))] 13 | install.packages(pkgs.not.installed, dependencies=TRUE) 14 | 15 | # Load all libraries them all at once. 16 | lapply(required.pkg, library, character.only = TRUE) 17 | 18 | #------------------------------------------------------- 19 | # Exploring Google Geocoder with ggmap package 20 | #------------------------------------------------------- 21 | library(ggplot2) 22 | library(ggmap) 23 | 24 | geocode("Barrows Hall, Berkeley, CA", source="google") 25 | 26 | 27 | geocode("Barrows Hall, Berkeley, CA", source="google", output="latlon") 28 | geocode("Barrows Hall, Berkeley, CA", source="google", output="latlona") 29 | geocode("Barrows Hall, Berkeley, CA", source="google", output="more") 30 | geocode("Barrows Hall, Berkeley, CA", source="google", output="all") 31 | 32 | df <- data.frame( 33 | address = c( 34 | "1517 Shattuck Ave, Berkeley, CA 94709", 35 | "Barrows Hall, Berkeley, CA", 36 | "2332 Haste St, Berkeley, CA 94704" 37 | ), 38 | stringsAsFactors = FALSE 39 | ) 40 | 41 | df 42 | 43 | ## Geocode the three Addresses 44 | 45 | df2 <- geocode(df$address,source="google", output="more") 46 | 47 | # just keep lat, lon, type, and loctype 48 | df2 <- df2[,c(1:4)] 49 | 50 | #look at output 51 | df2 52 | 53 | df3 <- data.frame(df,df2) 54 | 55 | #look at output 56 | df3 57 | 58 | # Create a map of the geocoded output 59 | map <- get_map(location=c(lon=mean(df3$lon), lat=mean(df3$lat)), zoom=14) 60 | ggmap(map) + 61 | geom_point(aes(x = lon, y = lat), data=df3, size = 6, col="red" ) 62 | 63 | ##-------------------------------- 64 | ## Geocode a file of addresses 65 | ##-------------------------------- 66 | # get the input data 67 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F) 68 | head(data) 69 | 70 | ## We need one column with address (not multiple) 71 | data$address <- with(data,paste(street,city,state,zip, sep=",")) 72 | head(data) 73 | 74 | ## Odd address formats can be a problem 75 | data[19,8] 76 | geocode(data[19,8], source="google", output="latlona") 77 | 78 | # Fix that address by removing the "#D" 79 | data[19,8]<-"7305 Edgewater Dr,Oakland,CA,94621" 80 | data[19,8] 81 | geocode(data[19,8], source="google", output="latlona") 82 | 83 | # Geocode a file of addresses - loaded into data frame 84 | geocoded_output <- geocode(data$address, output = "latlona", 85 | source = "google") 86 | 87 | # Add output to input data 88 | geocoded_output <- data.frame(data, geocoded_output) 89 | 90 | head(geocoded_output) # check it 91 | 92 | #Scaling up to more than 2500 records? 93 | # Google limits free geocoding to 2500 addresses per day 94 | geocodeQueryCheck() #how am I doing? 95 | 96 | #--------------------------------------- 97 | # Get FIPS code to link to census data 98 | #--------------------------------------- 99 | library(RCurl) 100 | library(RJSONIO) 101 | # FCC's Census Block Conversions API 102 | # http://www.fcc.gov/developers/census-block-conversions-api 103 | 104 | latlong2fips <- function(latitude, longitude) { 105 | #Source: https://gist.github.com/ramhiser/f09a71d96a4dec80994c 106 | url <- "http://data.fcc.gov/api/block/find?format=json&latitude=%f&longitude=%f" 107 | url <- sprintf(url, latitude, longitude) 108 | print(url) 109 | json <- RCurl::getURL(url) 110 | json <- RJSONIO::fromJSON(json) 111 | as.character(json$Block['FIPS']) # Census 2010 Block FIPS Code 112 | } 113 | 114 | geocoded_output$fips<- mapply(latlong2fips,geocoded_output$lat,geocoded_output$lon) 115 | 116 | # take a look 117 | head(geocoded_output) 118 | 119 | # Save geocoded output to a file 120 | write.csv(geocoded_output,file="google_geocoded_output.csv", row.names=FALSE) 121 | 122 | -------------------------------------------------------------------------------- /scripts/older_scripts/ggmap_google.R: -------------------------------------------------------------------------------- 1 | # geocoding with ggmap 2 | ## http://zevross.com/blog/2014/03/19/geocoding-with-rs-ggmap-package/ 3 | ## Subject to Google Terms of Use: http://developers.google.com/maps/terms 4 | ### Clear workspace 5 | rm(list=ls()) 6 | 7 | library(ggmap) 8 | 9 | ### Set working directory 10 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 11 | 12 | #Try 1 address 13 | geocode(location="7305 Edgewater Dr #D Oakland CA 94621", source="google") 14 | geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google") 15 | geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="latlona") 16 | geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="more") 17 | 18 | one <- geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="more") 19 | two <- geocode("sather gate, berkeley, ca", source="google", output="more", messaging=TRUE) 20 | three <- geocode("1011 shattuck ave, berkeley ca", source="google", output="all") 21 | 22 | one$loctype 23 | two$loctype 24 | 25 | 26 | #try 27 | ?geocode 28 | 29 | # Geocoding script for large list of addresses 30 | # get the input data 31 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F) 32 | head(data) 33 | data$address <- with(data,paste(street,city,state,zip, sep=" ")) 34 | head(data) 35 | 36 | # data[19,8]<-"7305 Edgewater Dr Oakland CA 94621" ## Why do we need to do this?? 37 | 38 | mylocs <- geocode(data[,8], output = "more", source = "google") 39 | head(mylocs) 40 | 41 | # if you want more output try 42 | mylocs_sub <- mylocs[,c(1:4)] 43 | 44 | #append geocode results back to input data 45 | geocoded_data <- data.frame(data,mylocs_sub) 46 | 47 | #write.csv(geocoded_data,file="geocoded_data.csv", row.names=FALSE) 48 | 49 | #lets plot it 50 | library(ggplot2) 51 | library(ggmap) 52 | 53 | map <- get_map(location=c(lon=mean(mylocs$lon), lat=mean(mylocs$lat))) 54 | ggmap(map) + 55 | geom_point(aes(x = lon, y = lat), data = mylocs, size = 6, col="red" ) 56 | 57 | 58 | ## TRY - what's different? 59 | map <- get_map(location=c(lon=mean(mylocs$lon), lat=mean(mylocs$lat)), zoom=13) 60 | ggmap(map) + 61 | geom_point(aes(x = lon, y = lat), data = mylocs, size = 6, col="red" ) 62 | 63 | 64 | 65 | #Scaling up to more than 2500 records 66 | geocodeQueryCheck() #how am I doing? 67 | 68 | -------------------------------------------------------------------------------- /scripts/older_scripts/google_batch_big.R: -------------------------------------------------------------------------------- 1 | # 2 | # http://www.shanelynn.ie/massive-geocoding-with-r-and-google-maps/ 3 | # 4 | # Geocoding script for large list of addresses. 5 | # Shane Lynn 10/10/2013 6 | 7 | #load up the ggmap library 8 | library(ggmap) 9 | # get the input data 10 | infile <- "input" 11 | data <- read.csv(paste0('./', infile, '.csv')) 12 | 13 | # get the address list, and append "Ireland" to the end to increase accuracy 14 | # (change or remove this if your address already include a country etc.) 15 | addresses = data$Address 16 | addresses = paste0(addresses, ", Ireland") 17 | 18 | #define a function that will process googles server responses for us. 19 | getGeoDetails <- function(address){    20 | #use the gecode function to query google servers 21 | geo_reply = geocode(address, output='all', messaging=TRUE, override_limit=TRUE) 22 | #now extract the bits that we need from the returned list 23 | answer <- data.frame(lat=NA, long=NA, accuracy=NA, formatted_address=NA, address_type=NA, status=NA) 24 | answer$status <- geo_reply$status 25 | 26 | #if we are over the query limit - want to pause for an hour 27 | while(geo_reply$status == "OVER_QUERY_LIMIT"){ 28 | print("OVER QUERY LIMIT - Pausing for 1 hour at:") 29 | time <- Sys.time() 30 | print(as.character(time)) 31 | Sys.sleep(60*60) 32 | geo_reply = geocode(address, output='all', messaging=TRUE, override_limit=TRUE) 33 | answer$status <- geo_reply$status 34 | } 35 | 36 | #return Na's if we didn't get a match: 37 | if (geo_reply$status != "OK"){ 38 | return(answer) 39 | }    40 | #else, extract what we need from the Google server reply into a dataframe: 41 | answer$lat <- geo_reply$results[[1]]$geometry$location$lat 42 | answer$long <- geo_reply$results[[1]]$geometry$location$lng    43 | if (length(geo_reply$results[[1]]$types) > 0){ 44 | answer$accuracy <- geo_reply$results[[1]]$types[[1]] 45 | } 46 | answer$address_type <- paste(geo_reply$results[[1]]$types, collapse=',') 47 | answer$formatted_address <- geo_reply$results[[1]]$formatted_address 48 | 49 | return(answer) 50 | } 51 | 52 | #initialise a dataframe to hold the results 53 | geocoded <- data.frame() 54 | # find out where to start in the address list (if the script was interrupted before): 55 | startindex <- 1 56 | #if a temp file exists - load it up and count the rows! 57 | tempfilename <- paste0(infile, '_temp_geocoded.rds') 58 | if (file.exists(tempfilename)){ 59 | print("Found temp file - resuming from index:") 60 | geocoded <- readRDS(tempfilename) 61 | startindex <- nrow(geocoded) 62 | print(startindex) 63 | } 64 | 65 | # Start the geocoding process - address by address. geocode() function takes care of query speed limit. 66 | for (ii in seq(startindex, length(addresses))){ 67 | print(paste("Working on index", ii, "of", length(addresses))) 68 | #query the google geocoder - this will pause here if we are over the limit. 69 | result = getGeoDetails(addresses[ii]) 70 | print(result$status)      71 | result$index <- ii 72 | #append the answer to the results file. 73 | geocoded <- rbind(geocoded, result) 74 | #save temporary results as we are going along 75 | saveRDS(geocoded, tempfilename) 76 | } 77 | 78 | #now we add the latitude and longitude to the main data 79 | data$lat <- geocoded$lat 80 | data$long <- geocoded$long 81 | data$accuracy <- geocoded$accuracy 82 | 83 | #finally write it all to the output files 84 | saveRDS(data, paste0("../data/", infile ,"_geocoded.rds")) 85 | write.table(data, file=paste0("../data/", infile ,"_geocoded.csv"), sep=",", row.names=FALSE) -------------------------------------------------------------------------------- /scripts/older_scripts/google_geocode_in_limits.R: -------------------------------------------------------------------------------- 1 | # 2 | # Purpose: Use GGMAP to geocode with source="google" source" option 3 | # within google daily query limits 4 | # Subject to Google Terms of Use: http://developers.google.com/maps/terms 5 | # You need to re-run the script when your number of needed geocodes exceeds the query limit 6 | # pattyf@berkeley.edu, 12/09/2015 7 | # created for dlab.berkeley.edu tutorial as example 8 | # 9 | 10 | address_file<- "address_data/formatted/oak_liq_w_ids_types_headers.csv" 11 | address_file_copy <-"address_data/formatted/oak_liq_w_ids_types_headers_copy.csv" 12 | address_file_geocoded <- "address_data/formatted/oak_liq_w_ids_types_headers_geocoded.csv" 13 | # DO ONCE - make a copy of the data with the addresses properly formatted 14 | if (!file.exists(address_file_copy)) { 15 | data <- read.csv(file=address_file,stringsAsFactors=F) # read data 16 | data$address <- with(data,paste(street,city,state,zip, sep=",")) #add single column address 17 | data[19,8]<-"7305 Edgewater Dr,Oakland,CA,94621" ## Specific to this data 18 | 19 | write.csv(data,file=address_file_copy, row.names=FALSE) 20 | } 21 | 22 | # Read in the copy of the data to be geocoded 23 | data <- read.csv(file=address_file_copy,stringsAsFactors=F) # read data 24 | maxrecs <- as.numeric(geocodeQueryCheck()) 25 | if (maxrecs > nrow(data)) { 26 | maxrecs = nrow(data) 27 | } 28 | maxrecs 29 | 30 | if (maxrecs > 0) { 31 | #create two subsets 32 | not_geocoded <- slice(data,1:maxrecs) 33 | geocode_later <- slice(data,maxrecs+1:n()) 34 | 35 | #save to file what we will geocode later 36 | write.csv(geocode_later,file=address_file_copy, row.names=FALSE) 37 | rm(geocode_later) 38 | 39 | geocoded_output <- geocode(not_geocoded$address, output = "latlona", source = "google") 40 | 41 | geocoded_output <- data.frame(not_geocoded, geocoded_output) # combine the input data with geocoded results 42 | 43 | #save output 44 | if (file.exists(address_file_geocoded)) { 45 | write.table(geocoded_output,file=address_file_geocoded, col.names=FALSE, row.names=FALSE, sep=",", append=TRUE) 46 | } else { 47 | write.table(geocoded_output,file=address_file_geocoded, row.names=FALSE, sep=",") 48 | } 49 | } 50 | 51 | #check file with geocoded data 52 | doh <- read.csv(file=address_file_geocoded,stringsAsFactors=F) # read data 53 | doh 54 | -------------------------------------------------------------------------------- /scripts/older_scripts/spatial_analysis_examples.R: -------------------------------------------------------------------------------- 1 | # 2 | # Spatial Analysis of Addresses Geocoded with Census Geocoder 3 | # 4 | # pattyf@berkeley.edu, 5/3/2016 5 | 6 | #-------------------------------------- 7 | # libraries - 8 | # that may be needed for this tutorial 9 | # and how to install & load them: 10 | #-------------------------------------- 11 | required.pkg <- c("htmlwidgets", "leaflet", "ggmap", "ggplot2","httr","acs","RJSONIO","RCurl","stringr","plyr", "rgeos","rgdal", "sp") 12 | pkgs.not.installed <- required.pkg[!sapply(required.pkg, function(p) require(p, character.only=T))] 13 | install.packages(pkgs.not.installed, dependencies=TRUE) 14 | 15 | # Load all libraries them all at once. 16 | lapply(required.pkg, library, character.only = TRUE) 17 | 18 | 19 | #------------------------------------------------------ 20 | # Data Linkage Example: 21 | # Link geocoded addresses to census data 22 | #------------------------------------------------------ 23 | #clean environment 24 | rm(list=ls()) 25 | 26 | #set working directory 27 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 28 | 29 | # There are many online services for downloading census data. 30 | # You can use one of these to download census data and then read the data into R. 31 | # Or you can use a library like "acs" to make a request of the census online data service API from within R. 32 | # You can link the census data to your geocoded addresses by the FIPS code. 33 | # The FIPS code, also called GEOID, identifies the census geography to which the tabular data have been aggregated. 34 | # For example, the specific census tract or blockgroup. 35 | # This requires the geocoded addresses to have FIPS codes to link to the census data. 36 | # If you use the Census Geocoding Service you get the FIPS codes with your geocoded output. 37 | # If you use another service that does not give you the FIPS code, you can use the FCC census api or spatial overlay to get 38 | # the FIPS code for each geocoded address. 39 | 40 | # The code below uses the acs library and is a modification of the following blog post which has great examples and more details: 41 | # http://zevross.com/blog/2015/10/14/manipulating-and-mapping-us-census-data-in-r-using-the-acs-tigris-and-leaflet-packages-3 42 | 43 | # Use the ACS (American Community Survey) library to fetch census tract level data for our geocoded addresses. 44 | library(acs) 45 | library(stringr) # to format strings 46 | 47 | # You need a census api key to use this library. You can get it in a few minutes at: 48 | # http://api.census.gov/data/key_signup.html 49 | 50 | # I keep my key in a file that I source to read into R 51 | # My file has one line and looks like this (NOT MY REAL KEY): 52 | # my_census_api_key <- "f2666666666666666666666666666632" 53 | source("keys/census_api_key.R") 54 | 55 | # Activate the key 56 | api.key.install(key=my_census_api_key) 57 | 58 | # Identify the census geography of interest 59 | geo<-geo.make(state=c("CA"),county=c(1), tract="*") 60 | 61 | # Fetch the census data of interest 62 | income<-acs.fetch(endyear = 2014, span = 5, geography = geo, table.number = "B19001", col.names = "pretty") 63 | 64 | # Take a look at the specific ACS columns returned 65 | attr(income, "acs.colnames") 66 | 67 | # Convert the census data to a data frame, (1) keeping only the columns of interest and (2) creating the FIPS key 68 | income_df <- data.frame(paste0(str_pad(income@geography$state, 2, "left", pad="0"), 69 | str_pad(income@geography$county, 3, "left", pad="0"), 70 | str_pad(income@geography$tract, 6, "left", pad="0")), 71 | income@estimate[,c("B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): Total:" , 72 | "B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): $200,000 or more")], 73 | stringsAsFactors = FALSE) 74 | 75 | # take a look at it 76 | head(income_df) 77 | 78 | # add row numbers to the data frame 79 | rownames(income_df)<-1:nrow(income_df) 80 | 81 | # relabel the columns 82 | names(income_df)<-c("GEOID", "total", "over_200") 83 | 84 | # create a new variable - percent of incomes that are over 200k in the census tract 85 | income_df$percent <- 100*(income_df$over_200/income_df$total) 86 | 87 | # take a look at it 88 | head(income_df) 89 | 90 | # Read in geocoded addresses 91 | geocoded_output_file <- "geocoded_addresses_out.csv" 92 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE) 93 | 94 | # take a look at them 95 | head(geocoded_results) 96 | 97 | # Create the Key on which we will join the geocoded addresses to the 98 | # Census data - this is the FIPS code, often called the GEOID 99 | geocoded_results$GEOID <- paste0(str_pad(geocoded_results$state_fips, 2, "left", pad="0"), 100 | str_pad(geocoded_results$county_fips, 3, "left", pad="0"), 101 | str_pad(geocoded_results$tract_fips, 6, "left", pad="0")) 102 | 103 | # take a look at them 104 | head(geocoded_results) 105 | 106 | # Now Join the census data to the geocoded addresses by the GEOID 107 | geocoded_results <- merge(geocoded_results,income_df, by="GEOID") 108 | 109 | # take a look at them 110 | head(geocoded_results) 111 | 112 | # Map the results with Leaflet for Interactive mapping 113 | # This way we can click on any address and see the census data value. 114 | leaflet() %>% addProviderTiles("CartoDB.Positron") %>% 115 | addCircleMarkers(data = geocoded_results, lng = ~lon, 116 | lat = ~lat, radius = 5, stroke=F, 117 | popup = paste0("Geocoded Address:
", geocoded_results$matched_address, 118 | "
Percent of Households above $200k: ", round(geocoded_results$percent,2), "%"), 119 | color = "red", 120 | fillOpacity = 0.7) 121 | 122 | 123 | #---------------------------------------------------------------------------------------- 124 | # Spatial Overlay #1 125 | # Question: What is the Community College District for each of our geocoded addresses? 126 | #---------------------------------------------------------------------------------------- 127 | #clean environment 128 | rm(list=ls()) 129 | 130 | #set working directory 131 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 132 | 133 | library(sp) 134 | library(rgdal) 135 | library(rgeos) 136 | 137 | # read in geocoded addresses 138 | geocoded_output_file <- "geocoded_addresses_out.csv" 139 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE) 140 | head(geocoded_results) # take a look at the results 141 | 142 | # what is the type of object 143 | class(geocoded_results) 144 | 145 | # let's make it spatialPointsDataFrame 146 | coordinates(geocoded_results) <- ~lon+lat 147 | class(geocoded_results) 148 | 149 | # plot the points 150 | plot(geocoded_results) 151 | 152 | # Get the Alameda Community College Districts data 153 | # Format: ESRI Shapefile 154 | # Source: https://data.acgov.org/Geospatial-Data/Community-College-Districts-within-Alameda-County/bdqp-je9q 155 | 156 | # Read downloaded shapefile into R 157 | alameda_ccds <- readOGR(dsn="./shapefiles/AlamedaCommunityCollegeDistricts", layer="geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c") 158 | class(alameda_ccds) # what is the data object type? 159 | plot(alameda_ccds) #plot the CCDs 160 | points(geocoded_results, col="red") # add the geocoded points to the plot 161 | 162 | head(alameda_ccds@data) #look at the attributes that describe each polygon 163 | 164 | # Let's use the rGEOS over function to find out 165 | # the CCD of each of our addresses 166 | # over stands for spatial overlay 167 | address_ccd <-over(geocoded_results,alameda_ccds) 168 | 169 | # That didn't work 170 | # "over" requires both data sets to be spatial objects (they are) 171 | # with the same coordinate reference system (CRS) 172 | # What is the CRS of the CCDs? 173 | alameda_ccds@proj4string # or proj4string(alameda_ccds) 174 | 175 | # What is the CRS of our geocoded points? 176 | geocoded_results@proj4string # undefined 177 | 178 | # Let's set the CRS of our points to that of the CCDs 179 | # Why is that ok? the geocoded points are NAD83 CRS if Census geocoder was used, 180 | # WGS84 (same as the CCDs) if Google geocoder was used. 181 | # However in USA those are for the most part identical (may be a few meters off) 182 | proj4string(geocoded_results) <- CRS(proj4string(alameda_ccds)) 183 | 184 | # make sure the CRSs are the same 185 | proj4string(alameda_ccds) == proj4string(geocoded_results) 186 | 187 | # Now try the overlay operation again: 188 | address_ccd <-over(geocoded_results,alameda_ccds) 189 | address_ccd # take a look at the output 190 | 191 | # Now we can join the CCD district name (dist_name) to our geocoded addresses 192 | # first, subset the overlay results 193 | ccd_df <- address_ccd[c('dist_name')] 194 | 195 | # Make sure the CCD dist_name is a character string not a factor 196 | str(ccd_df) 197 | ccd_df[] <- lapply(ccd_df, as.character) 198 | str(ccd_df) 199 | 200 | # Set NAs to a default value 201 | ccd_df[c("dist_name")][is.na(ccd_df[c('dist_name')])] <- "unknown" 202 | head(ccd_df) # take a look 203 | 204 | # Join the CCD data to our geocoded data 205 | geocoded_results <- cbind(geocoded_results, ccd_df) 206 | 207 | # view results 208 | head(geocoded_results) 209 | 210 | # Plot it - leaflet Interactive mapping 211 | leaflet() %>% addTiles() %>% 212 | setView(lng = mean(geocoded_results$lon), lat = mean(geocoded_results$lat), zoom = 16) %>% 213 | addCircleMarkers(data = geocoded_results, lng = ~lon, 214 | lat = ~lat, radius = 5, stroke=F, 215 | popup = paste("Geocoded Address:
", geocoded_results$matched_address," 216 |
Communit College District:
", geocoded_results$dist_name), 217 | color = "red", 218 | fillOpacity = 0.9) 219 | 220 | # 221 | # Question: How many addresses are in each CCD? 222 | # 223 | # create a cross-tab from our overlay (over) operation 224 | addressByCCD_df <- as.data.frame(table(address_ccd$dist_name)) 225 | 226 | #look at it 227 | head(addressByCCD_df) 228 | 229 | #relabel the columsn 230 | names(addressByCCD_df)[names(addressByCCD_df)=="Var1"] <- "ccd_name" 231 | names(addressByCCD_df)[names(addressByCCD_df)=="Freq"] <- "address_count" 232 | 233 | #look at it again 234 | addressByCCD_df 235 | 236 | #----------------------------------------------------------------------- 237 | # Spatial Overlay #2 238 | # Question: What addresses are within 1000 meters of a school? 239 | #----------------------------------------------------------------------- 240 | #clean environment 241 | rm(list=ls()) 242 | 243 | #set working directory 244 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 245 | 246 | library(sp) 247 | library(rgdal) 248 | library(rgeos) 249 | 250 | 251 | # Read in geocoded addresses 252 | geocoded_output_file <- "geocoded_addresses_out.csv" 253 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE) 254 | head(geocoded_results) # take a look at the results 255 | 256 | # what is the type of object 257 | class(geocoded_results) 258 | 259 | #create a spatialPointsDataFrame object from our geocoded address locations 260 | coordinates(geocoded_results) <- ~lon+lat 261 | class(geocoded_results) 262 | 263 | #plot the points 264 | plot(geocoded_results) 265 | 266 | #what is the coordinate system of our data? 267 | geocoded_results@proj4string #undefined 268 | 269 | # Get the Alameda County Schools data 270 | # Format: ESRI Shapefile 271 | # Source: https://data.acgov.org/ 272 | # Read downloaded shapefile into R 273 | alameda_schools <- readOGR(dsn="./shapefiles/AlamedaCountySchools", layer="geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca") 274 | # plot it 275 | plot(alameda_schools) 276 | 277 | # What class of data object is it? 278 | class(alameda_schools) 279 | 280 | # What is its CRS? 281 | alameda_schools@proj4string # or proj4string(alameda_schools) 282 | 283 | # Let's set the CRS of the geocoded points to that of the alameda schools 284 | # Why is that ok? the geocoded points are NAD83 CRS if Census geocoder was used, 285 | # WGS84 (same as the CCDs) if Google geocoder was used. 286 | # However in USA those are for the most part identical (may be a few meters off) 287 | proj4string(geocoded_results) <- CRS(proj4string(alameda_schools)) 288 | 289 | # make sure they are the same 290 | proj4string(alameda_schools) == proj4string(geocoded_results) 291 | 292 | # now that both are in the same coordinate space let's transform them to a 2D projected CRS 293 | # Here we use UTM zone 10N, WGS84 294 | # http://spatialreference.org/ref/epsg/32610/ 295 | geocoded_results_utm10 <- spTransform(geocoded_results, CRS("+init=epsg:32610")) 296 | alameda_schools_utm10 <- spTransform(alameda_schools, CRS("+init=epsg:32610")) 297 | 298 | # Let's assume we are investinging the addressesrelative to schools and that 299 | # we want to see if any of these addresses are within 1000 feet of a school. 300 | # 1000 feet = 305 meters 301 | 302 | # Create a polygon from each point location that is the 1,000 foot buffer around the school 303 | # We submit the buffer distance in meters because meters are the units of the CRS. 304 | alschools_buf <-gBuffer(alameda_schools_utm10, byid=TRUE,width=305) 305 | 306 | #plot the buffers 307 | plot(alschools_buf) 308 | 309 | # add the geododed address points 310 | points(geocoded_results_utm10, col="red") 311 | 312 | # "over" (overlay) operation to see what geocoded addresses are within school buffer zones 313 | in_school_zone <- over(geocoded_results_utm10,alschools_buf) 314 | 315 | #take a look at the output 316 | in_school_zone 317 | 318 | # create a new data frame that just has our column of interest - schoolname (site) 319 | in_buf <- in_school_zone[c('site')] 320 | 321 | # Make sure the schoolname (site) is a character string not a factor 322 | in_buf[] <- lapply(in_buf, as.character) 323 | 324 | # Replace NAs with a default value 325 | in_buf[c("site")][is.na(in_buf[c('site')])] <- "Not within school zone" 326 | 327 | # take a look at our data 328 | in_buf 329 | 330 | # Join it to our geocoded data 331 | geocoded_results <- cbind(geocoded_results, in_buf) 332 | 333 | # Take a look 334 | head(geocoded_results) 335 | 336 | #plot it using ggmaps - static map 337 | map <- get_map(location=c(lon=mean(geocoded_results$lon),lat=mean(geocoded_results$lat)), zoom=15) 338 | ggmap(map) + 339 | geom_point(aes(x = x, y = y), size = 4, col="black", data = alameda_schools@data) + 340 | geom_point(aes(x = lon, y = lat), size = 4, col="blue", data = geocoded_results[geocoded_results$site == 'Not within school zone',]) + 341 | geom_point(aes(x = lon, y = lat), size = 5, col="red", data = geocoded_results[!geocoded_results$site == 'Not within school zone',]) 342 | 343 | # plot it - leaflet Interactive mapping 344 | # First subset the data to keep the code clearer 345 | geocoded_resultsInZone <- subset(geocoded_results, site != 'Not within school zone') 346 | geocoded_resultsOutsideZone <- subset(geocoded_results, site == 'Not within school zone') 347 | 348 | leaflet() %>% addTiles() %>% 349 | setView(lng = mean(geocoded_results$lon), lat = mean(geocoded_results$lat), zoom = 16) %>% 350 | addCircleMarkers(data = geocoded_resultsInZone, lng = ~lon, 351 | lat = ~lat, radius = 5, stroke=F, 352 | popup = paste0("Address:
", geocoded_resultsInZone$matched_address, 353 | "
In School Zone: ", geocoded_resultsInZone$site), 354 | color = "red", 355 | fillOpacity = 0.9) %>% 356 | addCircleMarkers(data = geocoded_resultsOutsideZone, lng = ~lon, 357 | lat = ~lat, radius = 5, stroke=F, 358 | popup = paste0("Address:
", geocoded_resultsOutsideZone$matched_address, 359 | "
In School Zone: ", geocoded_resultsOutsideZone$site), 360 | color = "blue", 361 | fillOpacity = 0.7) %>% 362 | addMarkers(data= alameda_schools, lng = ~x, lat =~y, 363 | popup = paste("School:
", alameda_schools$site), 364 | ) 365 | 366 | #--------------------------------------------------------------------------- 367 | # sessionInfo() 368 | #--------------------------------------------------------------------------- 369 | #R version 3.2.2 (2015-08-14) 370 | #Platform: x86_64-apple-darwin13.4.0 (64-bit) 371 | #Running under: OS X 10.9.5 (Mavericks) 372 | 373 | #locale: 374 | # [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 375 | 376 | #attached base packages: 377 | # [1] stats graphics grDevices utils datasets methods base 378 | 379 | #other attached packages: 380 | # [1] htmlwidgets_0.5 rgeos_0.3-14 rgdal_1.0-4 sp_1.2-1 leaflet_1.0.0 ggmap_2.6.1 ggplot2_2.1.0 httr_1.1.0 acs_2.0 381 | #[10] XML_3.98-1.3 plyr_1.8.3 stringr_1.0.0 382 | 383 | #loaded via a namespace (and not attached): 384 | # [1] Rcpp_0.12.4 bitops_1.0-6 tools_3.2.2 digest_0.6.8 jsonlite_0.9.19 gtable_0.1.2 lattice_0.20-33 385 | #[8] png_0.1-7 mapproj_1.2-4 curl_0.9.6 yaml_2.1.13 proto_0.3-10 RgoogleMaps_1.2.0.7 maps_3.0.0-2 386 | #[15] grid_3.2.2 R6_2.1.2 jpeg_0.1-8 RJSONIO_1.3-0 reshape2_1.4.1 magrittr_1.5 scales_0.3.0 387 | #[22] htmltools_0.2.6 mime_0.4 geosphere_1.4-3 colorspace_1.2-6 labeling_0.3 stringi_1.0-1 RCurl_1.95-4.8 388 | #[29] munsell_0.4.2 rjson_0.2.15 -------------------------------------------------------------------------------- /scripts/older_scripts/tiger_geocoding.R: -------------------------------------------------------------------------------- 1 | # 2 | # Geocoding with Tiger Geocoding Service 3 | # 4 | # pattyf@berkeley.edu, 05/2/2016 5 | # 6 | # Important note: 7 | ## You can only geocode 1000 addresses at a time 8 | ## so need to add code to loop or subset your files 9 | ## 10 | ## Documentation: 11 | ## 12 | 13 | #clean environment 14 | rm(list=ls()) 15 | 16 | #Load libraries 17 | library(httr) 18 | 19 | #set working directory 20 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 21 | 22 | # CLI format for CURL 23 | #format of geocoding request for Tiger Geocoder 24 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch --form addressFile=@tiger/tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 25 | ## 26 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch 27 | ##--form addressFile=@tiger/tiger_12addresses_to_geocode.csv 28 | ##--form benchmark=Public_AR_Census2010 29 | ##--form vintage=Census2010_Census2010 30 | ##-o output_file.csv 31 | 32 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv" 33 | 34 | # let's take a look at the addresses that we will geocode 35 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip')) 36 | 37 | tiger_url <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch" 38 | 39 | ## WORKED - thanks to: http://stackoverflow.com/questions/26611289/curl-post-statement-to-rcurl-or-httr 40 | ## add verbose() to see details of process 41 | ## Using httr I think 42 | geocoded_addresses <- POST(tiger_url, encode="multipart", 43 | body=list(addressFile=upload_file(tiger_input_addressFile), 44 | benchmark="Public_AR_Census2010", 45 | vintage="Census2010_Census2010" 46 | ) 47 | ) 48 | 49 | #write raw output to file 50 | capture.output(cat(content(geocoded_addresses)), file="test_out3.txt") 51 | 52 | #read output file in to a data frame (not sure how to do these two in one step) 53 | mylocs <- read.csv("test_out3.txt",header=FALSE) 54 | head(mylocs) 55 | 56 | #doh split the lat,long values into two separate columns 57 | mylocs$lon = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1])) 58 | mylocs$lat = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2])) 59 | 60 | mylocs$lon <- as.numeric(mylocs$lon) 61 | mylocs$lat <- as.numeric(mylocs$lat) 62 | 63 | #lets plot it 64 | library(ggplot2) 65 | library(ggmap) 66 | 67 | map <- get_map(location=c(lon=mean(mylocs$lon),lat=mean(mylocs$lat)), zoom=15) 68 | ggmap(map) + 69 | geom_point(aes(x = lon, y = lat), size = 4, col="red", data = mylocs) 70 | 71 | -------------------------------------------------------------------------------- /scripts/older_scripts/tiger_geocoding_batch.R: -------------------------------------------------------------------------------- 1 | # 2 | # Batch Geocoding with the US Census Geocoding Service 3 | # 4 | # pattyf@berkeley.edu, 05/2/2016 5 | # 6 | # Important note: 7 | ## You can only geocode 1000 addresses at a time 8 | ## so need to add code to loop or subset your files 9 | ## 10 | ## Documentation: 11 | ## 12 | #Load libraries 13 | library(httr) # to submit geocoding request 14 | library(ggplot2) # to plot output 15 | library(ggmap) # to plot output 16 | library(leaflet) # for interactive plotting 17 | library(stringr) 18 | 19 | #clean environment 20 | rm(list=ls()) 21 | 22 | #set working directory 23 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 24 | 25 | # our file of addresses that need to be geocoded 26 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv" 27 | 28 | # the output file we will create 29 | geocoded_output_file <- "geocoded_addresses_out.csv" 30 | 31 | # CLI format for CURL 32 | #format of geocoding request for Tiger Geocoder 33 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch --form addressFile=@tiger/tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 34 | ## 35 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch 36 | ##--form addressFile=@tiger/tiger_12addresses_to_geocode.csv 37 | ##--form benchmark=Public_AR_Census2010 38 | ##--form vintage=Census2010_Census2010 39 | ##-o output_file.csv 40 | 41 | # The census geocoder does not want column names in the file to be geocoded 42 | # but we want them to make sense of the data when we view it in R 43 | # For info on the correct format for submitting a file of addresses see: 44 | # https://www.census.gov/geo/maps-data/data/geocoder.html 45 | # Five columns - No headers, comma separated EVEN IF DATA NOT AVAILABLE 46 | # Unique ID, house number and street name, city, state, zipcode 47 | # Two valid examples: 48 | #1, 1600 Pennsylvania Ave NW, Washington, DC, 49 | #2, 1600 Pennsylvania Ave NW,,,20502 50 | 51 | ## Take a look at the addresses that we will geocode 52 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip')) 53 | 54 | #how many addresses? 55 | num_addresses <- nrow(addresses_to_geocode) 56 | 57 | #remove the address data object 58 | rm(addresses_to_geocode) 59 | 60 | get_geocoded_addresses <- function(file_of_addresses) { 61 | tiger_url <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch" 62 | geocoded_addresses <- POST(tiger_url, encode="multipart", 63 | body=list(addressFile=upload_file(file_of_addresses), 64 | benchmark="Public_AR_Census2010", 65 | vintage="Census2010_Census2010" 66 | ) 67 | ) 68 | 69 | # Output column names 70 | mycols <- c("id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side", "state_fips", "county_fips","tract_fips", "block_fips") 71 | 72 | #read output file in to a data frame (not sure how to do these two in one step) 73 | # create temp file 74 | mytempfile <- tempfile() 75 | #write raw output to tempfile 76 | # content(geocoded_addresses, "text", encoding = "UTF-8") 77 | 78 | capture.output(cat(content(geocoded_addresses)), file=mytempfile) 79 | #read the data into a data frame 80 | mylocs <- read.csv(mytempfile,header=FALSE, col.names = mycols) 81 | #delete tempfile 82 | unlink(mytempfile) 83 | 84 | # split the lat,long values into two separate columns 85 | mylocs$lon = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1])) 86 | mylocs$lat = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2])) 87 | 88 | mylocs$lon <- as.numeric(mylocs$lon) 89 | mylocs$lat <- as.numeric(mylocs$lat) 90 | 91 | # save geocoded addresses to a file 92 | if (skip_rows == 0) { 93 | # create and write to the file 94 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE) 95 | } else { 96 | #append to the file 97 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE, append=TRUE) 98 | } 99 | return(mylocs) 100 | } 101 | 102 | # some counters to keep track of the number of addresses we need to process 103 | # we can only batch geocode 1000 addresses at a time 104 | skip_rows <- 0 105 | read_rows <- 1000 106 | processed_rows <- 0 107 | 108 | if (num_addresses < 1000) { 109 | # geocode them 110 | my_results <- get_geocoded_addresses(tiger_input_addressFile) 111 | } else { 112 | #process 1000 addresses at a time 113 | 114 | while (processed_rows < num_addresses) { 115 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, nrows=read_rows, skip=skip_rows) 116 | temp_infile <- tempfile() 117 | # save geocoded addresses to a file 118 | write.csv(addresses_to_geocode,file=temp_infile, row.names=FALSE, col.names = FALSE) 119 | my_results <- get_geocoded_addresses(temp_infile) 120 | unlink(temp_infile) 121 | skip_rows <- skip_rows + read_rows 122 | 123 | } 124 | } 125 | 126 | # Use ggmap to plot geocoded addresses 127 | # as red dots on a google map image 128 | map <- get_map(location=c(lon=mean(my_results$lon),lat=mean(my_results$lat)), zoom=15) 129 | ggmap(map) + 130 | geom_point(aes(x = lon, y = lat), size = 4, col="red", data = my_results) 131 | 132 | #------------------------------------------------------ 133 | # Data Linkage Example: 134 | # Link geocoded addresses to census data 135 | #------------------------------------------------------ 136 | library(acs) 137 | source("keys/census_api_key.R") 138 | api.key.install(key=my_census_api_key) 139 | 140 | geo<-geo.make(state=c("CA"),county=c(1), tract="*") 141 | 142 | # !!!! important note -- the package has not been updated to 2013 143 | # data so I'm using the five year span that ends in 2012 144 | 145 | income<-acs.fetch(endyear = 2014, span = 5, geography = geo, table.number = "B19001", col.names = "pretty") 146 | attr(income, "acs.colnames") 147 | 148 | income_df <- data.frame(paste0(str_pad(income@geography$state, 2, "left", pad="0"), 149 | str_pad(income@geography$county, 3, "left", pad="0"), 150 | str_pad(income@geography$tract, 6, "left", pad="0")), 151 | income@estimate[,c("B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): Total:" , 152 | "B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): $200,000 or more")], 153 | stringsAsFactors = FALSE) 154 | 155 | income_df <- select(income_df, 1:3) 156 | rownames(income_df)<-1:nrow(income_df) 157 | names(income_df)<-c("GEOID", "total", "over_200") 158 | income_df$percent <- 100*(income_df$over_200/income_df$total) 159 | 160 | # read in geocoded addresses 161 | geocoded_output_file <- "geocoded_addresses_out.csv" 162 | my_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE) 163 | 164 | # Create the Key on which we will join the geocoded addresses to the 165 | # Census data - this is the FIPS code, often called the GEOID 166 | my_results$GEOID <- paste0(str_pad(my_results$state_fips, 2, "left", pad="0"), 167 | str_pad(my_results$county_fips, 3, "left", pad="0"), 168 | str_pad(my_results$tract_fips, 6, "left", pad="0")) 169 | 170 | # Now Join the census data to the geocoded addresses by the GEOID 171 | my_results2 <- merge(my_results,income_df, by="GEOID") 172 | 173 | # Map the results with Leaflet for Interactive mapping 174 | # This way we can click on any address and see the census data value. 175 | #popup = paste("Address:
", my_results2$matched_address,"
Percent Below Poverty Line:", my_results2$pctpov), 176 | leaflet() %>% addProviderTiles("CartoDB.Positron") %>% 177 | addCircleMarkers(data = my_results2, lng = ~lon, 178 | lat = ~lat, radius = 5, stroke=F, 179 | popup = paste("Address:
", my_results2$matched_address,"
Percent of Households
above $200k:", my_results2$percent), 180 | color = "red", 181 | fillOpacity = 0.7) 182 | 183 | 184 | #----------------------------------------------------------------------- 185 | # Spatial Overlay #1 186 | # Question: 187 | #----------------------------------------------------------------------- 188 | library(sp) 189 | library(rgdal) 190 | library(rgeos) 191 | 192 | # read in geocoded addresses 193 | geocoded_output_file <- "geocoded_addresses_out.csv" 194 | my_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE) 195 | head(my_results) # take a look at the results 196 | 197 | #what is the type of object 198 | class(my_results) 199 | 200 | #let's make it spatial points data frame 201 | coordinates(my_results) <- ~lon+lat 202 | class(my_results) 203 | #plot the points 204 | plot(my_results) 205 | 206 | # Alameda Community College Districts 207 | # Format: ESRI Shapefile 208 | # Source: https://data.acgov.org/Geospatial-Data/Community-College-Districts-within-Alameda-County/bdqp-je9q 209 | alameda_ccds <- readOGR(dsn="./shapefiles/AlamedaCommunityCollegeDistricts", layer="geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c") # Read it into R. 210 | class(alameda_ccds) # what is the data object type? 211 | plot(alameda_ccds) #plot the CCDs 212 | points(my_results, col="red") # add the geocoded points to the plot 213 | 214 | head(alameda_ccds@data) #look at the attributes that describe each polygon 215 | 216 | # Let's use the rGEOS over function to find out 217 | # the CCD of each of our addresses 218 | # over stands for spatial overlay 219 | address_ccd <-over(my_results,alameda_ccds) 220 | 221 | # over requires both data sets to be spatial objects (they are) 222 | # with the same coordinate reference system (CRS) 223 | # what is the CRS of the CCDs? 224 | alameda_ccds@proj4string # or proj4string(alameda_ccds) 225 | 226 | #what is the CRS of our geocoded points? 227 | my_results@proj4string # undefined 228 | 229 | #Let's set the CRS of our points to that of the CCDs 230 | # Why is that ok? the geocoded points are NAD83 CRS if Census geocoder was used, 231 | # WGS84 (same as the CCDs) if Google geocoder was used. 232 | # However in USA those are for the most part identical (may be a few meters off) 233 | proj4string(my_results) <- CRS(proj4string(alameda_ccds)) 234 | 235 | #make sure they are the same 236 | proj4string(alameda_ccds) == proj4string(my_results) 237 | 238 | # Now try the overlay operation again: 239 | address_ccd <-over(my_results,alameda_ccds) 240 | address_ccd 241 | 242 | # Now we can join the ccd district name (dist_name) to our geocoded addresses 243 | # first, subset the 244 | ccd_df <- address_ccd[c('dist_name')] 245 | 246 | #now make sure it is a character string not a factor 247 | str(ccd_df) 248 | ccd_df[] <- lapply(ccd_df, as.character) 249 | str(ccd_df) 250 | #now set NAs to a default value 251 | ccd_df[c("dist_name")][is.na(ccd_df[c('dist_name')])] <- "unknown" 252 | head(ccd_df) # take a look 253 | #join it to our geocoded data 254 | my_results <- cbind(my_results, ccd_df) 255 | #view results 256 | head(my_results) 257 | 258 | # Plot it - leaflet Interactive mapping 259 | leaflet() %>% addTiles() %>% 260 | setView(lng = mean(my_results$lon), lat = mean(my_results$lat), zoom = 16) %>% 261 | addCircleMarkers(data = my_results, lng = ~lon, 262 | lat = ~lat, radius = 5, stroke=F, 263 | popup = paste("Address:
", my_results3$matched_address," 264 |
Communit College District:
", my_results$dist_name), 265 | color = "red", 266 | fillOpacity = 0.9) 267 | 268 | # 269 | # Question: How many addresses are in each CCD? 270 | # 271 | # create a cross-tab from our overlay (over) operation 272 | addressByCCD_df <- as.data.frame(table(address_ccd$dist_name)) 273 | 274 | #look at it 275 | head(addressByCCD_df) 276 | 277 | #relabel the columsn 278 | names(addressByCCD_df)[names(addressByCCD_df)=="Var1"] <- "ccd_name" 279 | names(addressByCCD_df)[names(addressByCCD_df)=="Freq"] <- "address_count" 280 | 281 | #look at it again 282 | head(addressByCCD_df) 283 | 284 | #----------------------------------------------------------------------- 285 | # Spatial Overlay #2 286 | # Question: What addresses are within 1000 meters of a school? 287 | #----------------------------------------------------------------------- 288 | 289 | library(sp) 290 | library(rgdal) 291 | library(rgeos) 292 | 293 | 294 | # read in geocoded addresses 295 | geocoded_output_file <- "geocoded_addresses_out.csv" 296 | my_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE) 297 | head(my_results) # take a look at the results 298 | 299 | #what is the type of object 300 | class(my_results) 301 | 302 | #let's make it spatial points data frame 303 | coordinates(my_results) <- ~lon+lat 304 | class(my_results) 305 | #plot the points 306 | plot(my_results) 307 | 308 | #create a spatialpoints dataframe object from our geocoded address locations 309 | class(my_results) 310 | coordinates(my_results) <- ~lon+lat 311 | class(my_results) 312 | 313 | #what is the coordinate system of our data? 314 | my_results@proj4string #undefined 315 | 316 | alameda_schools <- readOGR(dsn="./shapefiles/Alameda County Schools", layer="geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca") # Read it into R. 317 | class(alameda_schools) 318 | alameda_schools@proj4string # or proj4string(alameda_schools) 319 | 320 | #let's set the CRS of the geocoded points to that of the alameda schools 321 | proj4string(my_results) <- CRS(proj4string(alameda_schools)) 322 | 323 | 324 | #make sure they are the same 325 | proj4string(alameda_schools) == proj4string(my_results) 326 | 327 | #now that both are in the same coordinate space let's transform them to a planar projected CRS 328 | #http://spatialreference.org/ref/epsg/32610/ 329 | my_results_utm10 <- spTransform(my_results, CRS("+init=epsg:32610")) 330 | alameda_schools_utm10 <- spTransform(alameda_schools, CRS("+init=epsg:32610")) 331 | 332 | # Let's assume 333 | # A sex offender cannot live within 1,000 feet of any school, childcare facility, or place where children gather. 334 | # 1000 feet = 305 meters 335 | alschools_buf <-gBuffer(alameda_schools_utm10, byid=TRUE,width=305) 336 | plot(alschools_buf) 337 | points(my_results_utm10, col="red") 338 | 339 | in_school_zone <- over(my_results_utm10,alschools_buf) 340 | in_school_zone$site 341 | in_buf <- in_school_zone[c('site'),] 342 | in_buf[] <- lapply(in_buf, as.character) 343 | in_buf[c("site")][is.na(in_buf[c('site')])] <- "Not within school zone" 344 | 345 | #join it to our geocoded data 346 | my_results3 <- cbind(my_results, in_buf) 347 | 348 | #plot it using ggmaps - static map 349 | map <- get_map(location=c(lon=mean(my_results3$lon),lat=mean(my_results3$lat)), zoom=15) 350 | ggmap(map) + 351 | geom_point(aes(x = x, y = y), size = 4, col="black", data = alameda_schools@data) + 352 | geom_point(aes(x = lon, y = lat), size = 4, col="blue", data = my_results3[my_results3$site == 'Not within school zone',]) + 353 | geom_point(aes(x = lon, y = lat), size = 5, col="red", data = my_results3[!my_results3$site == 'Not within school zone',]) 354 | 355 | #plot it - leaflet Interactive mapping 356 | leaflet() %>% addTiles() %>% 357 | setView(lng = mean(my_results3$lon), lat = mean(my_results3$lat), zoom = 16) %>% 358 | addCircleMarkers(data = my_results3, lng = ~lon, 359 | lat = ~lat, radius = 5, stroke=F, 360 | popup = paste("Address:
", my_results3$matched_address,"
In School Zone:", my_results3$site), 361 | color = "red", 362 | fillOpacity = 0.9) %>% 363 | addCircleMarkers(data = my_results3[my_results3$site == 'Not within school zone',], lng = ~lon, 364 | lat = ~lat, radius = 5, stroke=F, 365 | popup = paste("Address:
", my_results3$matched_address,"
In School Zone:", my_results3$site), 366 | color = "blue", 367 | fillOpacity = 0.7) %>% 368 | addMarkers(data= alameda_schools, lng= ~x, lat=~y, 369 | popup = paste("School:
", alameda_schools$site), 370 | ) 371 | -------------------------------------------------------------------------------- /scripts/older_scripts/tiger_one_at_a_time.R: -------------------------------------------------------------------------------- 1 | #library(httr) 2 | library(RJSONIO) 3 | gurl <- "http://geocoding.geo.census.gov/geocoder/geographies/address?street=912+Kingston+Ave&city=Piedmont&state=CA&benchmark=Public_AR_Census2010&vintage=Census2010_Census2010&format=json" 4 | 5 | bad_gurl <-"http://geocoding.geo.census.gov/geocoder/geographies/address?street=912+Kingston+Ave&city=donkey&state=CA&benchmark=Public_AR_Census2010&vintage=Census2010_Census2010&format=json" 6 | 7 | tiger_prefix <- "http://geocoding.geo.census.gov/geocoder/geographies/address?" 8 | tiger_suffix <- "&benchmark=Public_AR_Census2010&vintage=Census2010_Census2010&format=json" 9 | 10 | #g_out <- GET(gurl) 11 | 12 | 13 | g_out <- fromJSON(gurl) 14 | str(g_out) 15 | 16 | # take the first returned values in case > 1 matches 17 | lon <- g_out$result$addressMatches[[1]]$coordinates[['x']] 18 | lat <- g_out$result$addressMatches[[1]]$coordinates[['y']] 19 | matchedAddress <- g_out$result$addressMatches[[1]]$matchedAddress 20 | tractfips <- g_out$result$addressMatches[[1]]$geographies$`Census Tracts`[[1]]$GEOID 21 | blockfips <- g_out$result$addressMatches[[1]]$geographies$`Census Blocks`[[1]]$GEOID 22 | 23 | # another way 24 | g_out2 <- unlist(g_out) 25 | head(g_out2) 26 | g_out2['result.addressMatches.coordinates.x'] 27 | #Now process a file of addresses: 28 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv" 29 | # let's take a look at the addresses that we will geocode 30 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip')) 31 | 32 | addresses_to_geocode 33 | 34 | addresses_to_geocode$tiger_format <- paste0( 35 | "street=",addresses_to_geocode$street, 36 | "&city=",addresses_to_geocode$city, 37 | "&state=",addresses_to_geocode$state, 38 | "&zip=",addresses_to_geocode$zip 39 | ) 40 | 41 | # geocode a file of addresses - one at at time 42 | tgeocode <- function(address){ 43 | address <- URLencode(address) 44 | g_address <- paste0(tiger_prefix, address,tiger_suffix) 45 | print(g_address) 46 | 47 | 48 | g_out <- tryCatch( 49 | fromJSON(g_address) # result will be returned if no error 50 | 51 | ) 52 | if (length(g_out$result$addressMatches) > 0) { 53 | print(g_out$result$addressMatches[[1]]$matchedAddress) 54 | } else{ 55 | #no results 56 | } 57 | } 58 | 59 | ## apply the geocoding function to the CSV file 60 | library(plyr) 61 | ldply(addresses_to_geocode$tiger_format,function(x) tgeocode(x)) 62 | #address <- c("The White House, Washington, DC","The Capitol, Washington, DC") 63 | #locations <- ldply(address, function(x) geoCode(x)) 64 | #names(locations) <- c("lat","lon","location_type", "formatted") 65 | #head(locations) 66 | 67 | -------------------------------------------------------------------------------- /scripts/older_scripts/tigris_acs_census.R: -------------------------------------------------------------------------------- 1 | # 2 | # census data with 3 | # tigris and acs packages 4 | # after 5 | # http://rstudio-pubs-static.s3.amazonaws.com/90665_de25062951e540e7b732f21de53001f0.html 6 | # https://github.com/walkerke/tigris 7 | # http://zevross.com/blog/2015/10/14/manipulating-and-mapping-us-census-data-in-r-using-the-acs-tigris-and-leaflet-packages-3/ 8 | 9 | # download US Census TIGER Data, eg 10 | # http://www2.census.gov/geo/tiger 11 | # Then go to: http://www.census.gov/geo/maps-data/data/tiger-line.html (read how do i choose...) 12 | # Use the web interface to download 13 | # 14 | 15 | library(tigris) 16 | library(sp) 17 | 18 | cenpolys <- tracts(state = 'CA', county = c('Alameda')) 19 | 20 | plot(dfw) 21 | 22 | source("keys/census_api_key.R") 23 | api.key.install(my_census_api_key) 24 | income_data <- acs.fetch(endyear = 2012, 25 | geography = geo.make(state = "CA", 26 | county = c("Alameda"), 27 | tract = "*"), 28 | variable = "B19013_001") 29 | 30 | str(income_data@geography) #see how state and county are formmated 31 | #need to create the key on which to join spatial and demo data 32 | 33 | income_df <- data.frame(paste0("0",as.character(income_data@geography$state), 34 | "00",as.character(income_data@geography$county), 35 | income_data@geography$tract), 36 | income_data@estimate) 37 | 38 | colnames(income_df) <- c("GEOID", "hhincome") 39 | 40 | censusT <- geo_join(dfw, income_df, "GEOID", "GEOID") 41 | 42 | library(classInt) 43 | library(RColorBrewer) 44 | myclass <- classIntervals(censusT$hhincome, 9, style = "jenks") 45 | colpal <- findColours(myclass, brewer.pal(5, "OrRd")) 46 | plot(censusT, border="grey", col=colpal) 47 | 48 | plotData <- fortify(censusT, data=censusT@data, region="GEOID") 49 | head(plotData) # take a look at the result of the fortify command 50 | 51 | ggplot() + geom_polygon(data=plotData, aes(x=long, y=lat, group=group)) 52 | # Map it. 53 | map <- get_map("Berkeley", zoom=10) 54 | ggmap(map) + geom_polygon(data=plotData, aes(x=long, y=lat, group=group)) 55 | 56 | # Add transparency to better see the reference basemap. 57 | ggmap(map) + 58 | geom_polygon(data=plotData, aes(x=long, y=lat, group=group), alpha=0.5) 59 | 60 | #Now, join the census data to the geo data frame. 61 | plotData <- merge(plotData,censusT@data, by.x="id", by.y="GEOID") 62 | head(plotData) # now you can see the attribute data re-joined to the geographic data 63 | 64 | #map it - color regions by census variable 65 | ggmap(map) + 66 | geom_polygon(data = plotData, aes(x = long, y = lat, group = group, 67 | fill = hhincome), color = "black", size = 0.25) + 68 | coord_map() 69 | 70 | # get rid of tracts with NA 71 | #censusT <- censusT[!is.na(censusT$hhincome),] 72 | # Too dark - try this 73 | library(scales) #for pretty_breaks 74 | myplot <- ggmap(map) + 75 | geom_polygon(data = plotData, aes(x = long, y = lat, group = group, 76 | fill = hhincome)) + 77 | coord_map() + 78 | scale_fill_distiller(palette = "Greens", 79 | breaks = pretty_breaks(n = 8)) + 80 | guides(fill = guide_legend(reverse = TRUE)) 81 | 82 | myplot 83 | 84 | p2 <- subset(plotData,ALAND > 0) 85 | censusT <-p2 86 | ggmap(map) + 87 | geom_polygon(data = p2, aes(x = long, y = lat, group = group, 88 | fill = hhincome), color = "black", size = 0.25) + 89 | coord_map() 90 | 91 | map <- get_map(location=c(lon=mean(geocoded_output$lon), lat=mean(geocoded_output$lat)), zoom=12) 92 | myplot <- ggmap(map) + 93 | geom_polygon(data = p2, aes(x = long, y = lat, group = group, 94 | fill = hhincome), alpha=0.75) + 95 | geom_point(aes(x = lon, y = lat), data=geocoded_output, size = 6, col="red" ) + 96 | coord_map() + 97 | scale_fill_distiller(palette = "Greens", 98 | breaks = pretty_breaks(n = 8)) + 99 | guides(fill = guide_legend(reverse = TRUE)) 100 | 101 | myplot 102 | 103 | map <- get_map(location=c(lon=mean(geocoded_output$lon), lat=mean(geocoded_output$lat)), zoom=12) 104 | ggmap(map) + 105 | geom_point(aes(x = lon, y = lat), data=geocoded_output, size = 6, col="red" ) 106 | -------------------------------------------------------------------------------- /scripts/older_scripts/yahoo_geocoding.R: -------------------------------------------------------------------------------- 1 | # Geocoding with Yahoo Placefinder 2 | ## pattyf@berkeley.edu, 12/8/2015 3 | # 4 | # 2000 addresses per day limit! 5 | # 6 | # Review the following blog post by Zev Ross 7 | # http://zevross.com/blog/2015/05/19/scrape-website-data-with-the-new-r-package-rvest/ 8 | # 9 | # You need to first apply for an account on https://developer.yahoo.com 10 | # 11 | 12 | #set working directory 13 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding") 14 | 15 | # If you haven't already installed rydn get it now using devtools 16 | # devtools::install_github("trestletech/rydn") 17 | library(rydn) 18 | 19 | # readin your YDN keys - keep in a separate file 20 | source("keys/ydn_keys.R") 21 | # That file looks like this: 22 | #mykey="dj0.......................00Zg--" 23 | #mysecret ="00....................8" 24 | 25 | #test it 26 | myloc <- find_place("Barrows Hall, Berkeley,ca",key=mykey,secret=mysecret) 27 | myloc #see what was returned 28 | 29 | # TO interpret response see: https://developer.yahoo.com/boss/geo/docs/supported_responses.html 30 | 31 | #convert strings to numerics 32 | myloc$longitude <- as.numeric(myloc$longitude) 33 | myloc$latitude <- as.numeric(myloc$latitude) 34 | 35 | # work with subset of the returned info 36 | myloc_sub <- myloc[1 ,c("quality", "latitude", "longitude", "radius")] #subset 37 | 38 | #lets plot it 39 | library(ggplot2) 40 | library(ggmap) 41 | 42 | map <- get_map(location=c(lon=as.numeric(myloc_sub$longitude),lat=as.numeric(myloc_sub$latitude)), zoom=17) 43 | ggmap(map) + 44 | geom_point(aes(x = longitude, y = latitude), size = 6, col="red", data = myloc) 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /shapefiles/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/.DS_Store -------------------------------------------------------------------------------- /shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.dbf -------------------------------------------------------------------------------- /shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]] -------------------------------------------------------------------------------- /shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shp -------------------------------------------------------------------------------- /shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shx -------------------------------------------------------------------------------- /shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.dbf -------------------------------------------------------------------------------- /shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]] -------------------------------------------------------------------------------- /shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shp -------------------------------------------------------------------------------- /shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shx -------------------------------------------------------------------------------- /tiger/test_out.txt: -------------------------------------------------------------------------------- 1 | "3","10834 GOLF LINKS RD, Oakland, CA, 94605","Match","Exact","10834 Golf Links Rd, OAKLAND, CA, 94605","-122.12688,37.753845","125011765","L","06","001","409900","4003" 2 | "2","4728 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4728 Scotia Ave, OAKLAND, CA, 94605","-122.125015,37.75487","125011836","R","06","001","409900","4004" 3 | "1","10709 COTTER ST, Oakland, , 94605","Match","Exact","10709 Cotter St, OAKLAND, CA, 94605","-122.12374,37.755764","125011838","R","06","001","409900","4001" 4 | "10","271 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","271 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13144,37.75938","125011739","R","06","001","409900","3019" 5 | "7","4855 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4855 Scotia Ave, OAKLAND, CA, 94605","-122.12387,37.7552","125011839","L","06","001","409900","4001" 6 | "6","111 DONNA WAY, Oakland, CA, 94605","Match","Exact","111 Donna Way, OAKLAND, CA, 94605","-122.13216,37.760204","125011738","L","06","001","409900","3005" 7 | "5","380 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","380 Elysian Fields Dr, OAKLAND, CA, 94605","-122.1282,37.76107","125011702","L","06","001","409900","3005" 8 | "4","4627 GRASS VALLEY RD, Oakland, CA, 94605","Match","Exact","4627 Grass Valley Rd, OAKLAND, CA, 94605","-122.12255,37.75109","617284248","L","06","001","409900","4010" 9 | "9","10520 PEBBLE BEACH DR, Oakland, CA, 94605","Match","Exact","10520 Pebble Beach Dr, OAKLAND, CA, 94605","-122.12747,37.76139","125011703","L","06","001","409900","3015" 10 | "8","248 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","248 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13248,37.759678","125011736","L","06","001","409900","3010" 11 | "11","4840 SHETLAND AVE, Oakland, CA, 94605","Match","Exact","4840 Shetland Ave, OAKLAND, CA, 94605","-122.1222,37.752895","125011864","R","06","001","409900","4010" 12 | "12","24 KEY CT, Oakland, CA, 94605","Match","Exact","24 Key Ct, OAKLAND, CA, 94605","-122.124405,37.75517","125011835","R","06","001","409900","4001" 13 | -------------------------------------------------------------------------------- /tiger/test_out2.txt: -------------------------------------------------------------------------------- 1 | "3","10834 GOLF LINKS RD, Oakland, CA, 94605","Match","Exact","10834 Golf Links Rd, OAKLAND, CA, 94605","-122.12688,37.753845","125011765","L","06","001","409900","4003" 2 | "2","4728 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4728 Scotia Ave, OAKLAND, CA, 94605","-122.125015,37.75487","125011836","R","06","001","409900","4004" 3 | "1","10709 COTTER ST, Oakland, , 94605","Match","Exact","10709 Cotter St, OAKLAND, CA, 94605","-122.12374,37.755764","125011838","R","06","001","409900","4001" 4 | "10","271 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","271 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13144,37.75938","125011739","R","06","001","409900","3019" 5 | "7","4855 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4855 Scotia Ave, OAKLAND, CA, 94605","-122.12387,37.7552","125011839","L","06","001","409900","4001" 6 | "6","111 DONNA WAY, Oakland, CA, 94605","Match","Exact","111 Donna Way, OAKLAND, CA, 94605","-122.13216,37.760204","125011738","L","06","001","409900","3005" 7 | "5","380 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","380 Elysian Fields Dr, OAKLAND, CA, 94605","-122.1282,37.76107","125011702","L","06","001","409900","3005" 8 | "4","4627 GRASS VALLEY RD, Oakland, CA, 94605","Match","Exact","4627 Grass Valley Rd, OAKLAND, CA, 94605","-122.12255,37.75109","617284248","L","06","001","409900","4010" 9 | "9","10520 PEBBLE BEACH DR, Oakland, CA, 94605","Match","Exact","10520 Pebble Beach Dr, OAKLAND, CA, 94605","-122.12747,37.76139","125011703","L","06","001","409900","3015" 10 | "8","248 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","248 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13248,37.759678","125011736","L","06","001","409900","3010" 11 | "11","4840 SHETLAND AVE, Oakland, CA, 94605","Match","Exact","4840 Shetland Ave, OAKLAND, CA, 94605","-122.1222,37.752895","125011864","R","06","001","409900","4010" 12 | "12","24 KEY CT, Oakland, CA, 94605","Match","Exact","24 Key Ct, OAKLAND, CA, 94605","-122.124405,37.75517","125011835","R","06","001","409900","4001" 13 | -------------------------------------------------------------------------------- /tiger/tiger_12addresses_to_geocode.csv: -------------------------------------------------------------------------------- 1 | 1,10709 COTTER ST,Oakland,,94605 2 | 2,4728 SCOTIA AVE,Oakland,CA,94605 3 | 3,10834 GOLF LINKS RD,Oakland,CA,94605 4 | 4,4627 GRASS VALLEY RD,Oakland,CA,94605 5 | 5,380 ELYSIAN FIELDS DR,Oakland,CA,94605 6 | 6,111 DONNA WAY,Oakland,CA,94605 7 | 7,4855 SCOTIA AVE,Oakland,CA,94605 8 | 8,248 ELYSIAN FIELDS DR,Oakland,CA,94605 9 | 9,10520 PEBBLE BEACH DR,Oakland,CA,94605 10 | 10,271 ELYSIAN FIELDS DR,Oakland,CA,94605 11 | 11,4840 SHETLAND AVE,Oakland,CA,94605 12 | 12,24 KEY CT,Oakland,CA,94605 13 | --------------------------------------------------------------------------------