├── .DS_Store
├── .Rhistory
├── .gitignore
├── LICENSE
├── README.md
├── address_data
├── .DS_Store
├── formatted
│ ├── oak_liq_census_format.csv
│ ├── oak_liq_gearth_format.csv
│ ├── oak_liq_gearth_format.kdx
│ ├── oak_liq_gfusion_format.csv
│ ├── oak_liq_w_ids.csv
│ ├── oak_liq_w_ids_types_headers.csv
│ ├── oak_liq_w_ids_types_headers_copy.csv
│ └── oak_liq_w_ids_types_headers_geocoded.csv
├── oak_liquor_stores.csv
├── sample
│ ├── data_copy.csv
│ ├── geocoded_output.csv
│ ├── oak_liq_stores_raw.csv
│ ├── sample_10_addresses.csv
│ ├── sample_10_addresses_geocoded.csv
│ ├── sample_2500_addresses.csv
│ ├── sample_2500_addresses_geocoded.csv
│ ├── sample_5k_addresses.csv
│ ├── sample_5k_addresses_geocoded.csv
│ └── sample_cal_parcels_all_v2.csv
└── trulia_avgprice_bystate_2017.csv
├── draft
├── rgeodocoding2.Rmd
└── rgeodocoding2.html
├── output
├── address_data_geocoded2.csv
├── address_data_geocoded_esri.csv
├── address_data_geocoded_google.csv
├── geocoded_addresses_out.csv
├── geocoded_addresses_single_out.csv
├── tracts2010.dbf
├── tracts2010.prj
├── tracts2010.shp
└── tracts2010.shx
├── rgeocoding-slides.Rmd
├── rgeocoding-slides.html
├── rgeocoding.Rmd
├── rgeocoding.html
├── screenshots
├── .DS_Store
├── addresses1.png
├── census_api_key_apply.png
├── census_geo.png
├── dual_address_match.png
├── esri_wgs_token.png
├── fcc_api.png
├── fips_code.png
├── geocode_details1.png
├── geocoding_details1.png
├── geocoding_details2.png
├── ggmap_geocode_help.png
├── ggmap_plot1.png
├── gmap_barrows.png
├── google_limits.png
├── output_compare.png
├── popdens.png
├── ref_data_quality.png
├── social_explorer.png
├── ydn_boss_placefinder.png
├── ydn_create_application.png
├── ydn_keys.png
├── ydn_landing.png
├── ydn_signup.png
└── ydn_usage_limits.png
├── scripts
├── .DS_Store
├── .Rapp.history
├── draft
│ └── tiger_geocoding.R
├── esri_wgs_geocoding.R
├── fcc_latlon2fips.R
├── google_geocoding_ggmap.R
├── google_geocoding_ggmap_v2.R
├── oakland_liquor_stores.csv
└── older_scripts
│ ├── census_geocoding_batch.R
│ ├── census_geocoding_batch_v2.R
│ ├── census_geocoding_single_address.R
│ ├── esri_wgs_geocoding.R
│ ├── geocode_it.R
│ ├── getFipsForPoints.R
│ ├── ggmap_geocoding_examples.R
│ ├── ggmap_google.R
│ ├── google_batch_big.R
│ ├── google_geocode_in_limits.R
│ ├── spatial_analysis_examples.R
│ ├── tiger_geocoding.R
│ ├── tiger_geocoding_batch.R
│ ├── tiger_one_at_a_time.R
│ ├── tigris_acs_census.R
│ └── yahoo_geocoding.R
├── shapefiles
├── .DS_Store
├── AlamedaCommunityCollegeDistricts
│ ├── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.dbf
│ ├── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.prj
│ ├── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shp
│ └── geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shx
└── AlamedaCountySchools
│ ├── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.dbf
│ ├── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.prj
│ ├── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shp
│ └── geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shx
└── tiger
├── test_out.txt
├── test_out2.txt
└── tiger_12addresses_to_geocode.csv
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/.DS_Store
--------------------------------------------------------------------------------
/.Rhistory:
--------------------------------------------------------------------------------
1 | head(address_data)
2 | pop2$pct_under18 <- round((pop2$under18 / pop2$totpop) * 100, 1)
3 | address_data2 <- merge(address_data, pop2, by="GEOID10", all.x=T)
4 | # Take a look - what do you think?
5 | head(address_data2)
6 | head(address_data)
7 | pop2$pct_under18 <- round((pop2$under18 / pop2$totpop) * 100, 1)
8 | address_data2 <- merge(address_data, pop2, by="GEOID10", all.x=T)
9 | # Take a look - what do you think?
10 | head(address_data2)
11 | tracts2010 <- merge(tracts2010, pop2, by="GEOID10")
12 | quantColors <- colorQuantile("Reds", tracts2010$pct_under18, n=5)
13 | point_map <- leaflet() %>%
14 | addTiles() %>%
15 | addPolygons(data=tracts2010,
16 | color="white",
17 | weight=1,
18 | opacity=0.5,
19 | fillColor= ~quantColors(pct_under18),
20 | fillOpacity = 0.75,
21 | popup = paste0("Percent under 18: ", tracts2010$pct_under18, "%")) %>%
22 | addMarkers(data=address_data, lat=~glat, lng=~glon,
23 | popup=(paste0(address_data$name, "",
24 | address_data$full_address)
25 | )
26 | )
27 | point_map
28 | library(crosstalk)
29 | address_data2$sview <- paste0("http://maps.googleapis.com/maps/api/streetview?size=250x190&location=",address_data2$glat,",",address_data2$glon,"&sensor=false&fov=110")
30 | address_data2$popup_content <- paste("Name:", address_data2$name,"
",
31 | "Address: ", address_data2$full_address, "
",
32 | "Percent Under 18: ", address_data2$pct_under18, "
",
33 | "
"
34 | )
35 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
36 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_under18, n=5)
37 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
38 | addPolygons(data=tracts2010,
39 | color="white",
40 | weight=1,
41 | opacity=0.5,
42 | fillColor=~quantColors(pct_under18),
43 | fillOpacity = 0.65,
44 | popup = paste0(tracts2010$pct_under18, "% under 18"),
45 | group="Percent Under 18") %>%
46 | addMarkers(group="Liquor Stores", popup=~popup_content) %>%
47 | addLayersControl(
48 | overlayGroups = c("Liquor Stores","Percent Under 18"),
49 | options = layersControlOptions(collapsed = FALSE)
50 | )
51 | library(ggmap)
52 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding")
53 | #mykey <- "AIzaSyxxxxxxxxxxxxxxxxxxxxxxxxxOQyOFWrTw"
54 | mykey <- "AIzaSyDf-SZG8O4hj1c06VQ-k6hkBrOQyOFWrTw"
55 | register_google(key=mykey)
56 | # File of addresses
57 | address_data <- read.csv("address_data/oak_liquor_stores.csv", stringsAsFactors = F)
58 | # Take a look
59 | head(address_data)
60 | # Full addres format: "2625 Dana St, Berkeley CA, 94704"
61 | address_data$full_address <- paste0(address_data$street, ", " ,
62 | address_data$city, ", " ,
63 | address_data$state, " ",
64 | address_data$zip)
65 | # Take a look
66 | head(address_data, 3)
67 | head(google_geocoded)
68 | source("./scripts/fcc_latlon2fips.R")
69 | # test one coordinate pair
70 | latlon2fips(latitude=37.852562, longitude=-122.273634)
71 | View(pop_acs5_2016)
72 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F)
73 | writeOGR(tracts2010, "tracts2010.shp")
74 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F)
75 | writeOGR(tracts2010, "tracts2010.shp", driver="ESRI Shapefile")
76 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F)
77 | writeOGR(tracts2010, layer="tracts2010", driver="ESRI Shapefile")
78 | write.csv(address_data2, file="address_data_geocoded2.csv", row.names=F)
79 | writeOGR(tracts2010, dsn=".", layer="tracts2010", driver="ESRI Shapefile")
80 | address_data2 < read.csv("address_data_geocoded2.csv", stringsAsFactors = F)
81 | address_data2 < read.csv("address_data_geocoded2.csv", stringsAsFactors = F)
82 | address_data2 <- read.csv("address_data_geocoded2.csv", stringsAsFactors = F)
83 | address_data2 <- read.csv("address_data_geocoded2.csv", stringsAsFactors = F)
84 | tracts2010 <- readOGR(dsn=".",layer="tracts2010")
85 | address_data2 <- read.csv("address_data_geocoded2.csv", stringsAsFactors = F)
86 | tracts2010 <- readOGR(dsn=".",layer="tracts2010")
87 | address_data2$sview <- paste0("http://maps.googleapis.com/maps/api/streetview?size=250x190&location=",address_data2$glat,",",address_data2$glon,"&sensor=false&fov=110")
88 | address_data2$popup_content <- paste("Name:", address_data2$name,"
",
89 | "Address: ", address_data2$full_address, "
",
90 | "Percent Under 18: ", address_data2$pct_under18, "
",
91 | "
"
92 | )
93 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
94 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_under18, n=5)
95 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
96 | addPolygons(data=tracts2010,
97 | color="white",
98 | weight=1,
99 | opacity=0.5,
100 | fillColor=~quantColors(pct_under18),
101 | fillOpacity = 0.65,
102 | popup = paste0(tracts2010$pct_under18, "% under 18"),
103 | group="Percent Under 18") %>%
104 | addMarkers(group="Liquor Stores", popup=~popup_content) %>%
105 | addLayersControl(
106 | overlayGroups = c("Liquor Stores","Percent Under 18"),
107 | options = layersControlOptions(collapsed = FALSE)
108 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
109 | addPolygons(data=tracts2010,
110 | color="white",
111 | weight=1,
112 | opacity=0.5,
113 | fillColor=~quantColors(pct_under18),
114 | fillOpacity = 0.65,
115 | popup = paste0(tracts2010$pct_under18, "% under 18"),
116 | group="Percent Under 18") %>%
117 | addMarkers(group="Liquor Stores", popup=~popup_content) %>%
118 | addLayersControl(
119 | overlayGroups = c("Liquor Stores","Percent Under 18"),
120 | options = layersControlOptions(collapsed = FALSE)
121 | )
122 | View(tracts2010)
123 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
124 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5)
125 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
126 | addPolygons(data=tracts2010,
127 | color="white",
128 | weight=1,
129 | opacity=0.5,
130 | fillColor=~quantColors(pct_n18),
131 | fillOpacity = 0.65,
132 | popup = paste0(tracts2010$pct_n18, "% under 18"),
133 | group="Percent Under 18") %>%
134 | addMarkers(group="Liquor Stores", popup=~popup_content) %>%
135 | addLayersControl(
136 | overlayGroups = c("Liquor Stores","Percent Under 18"),
137 | options = layersControlOptions(collapsed = FALSE)
138 | )
139 | bscols( widths = c(9,3),
140 | map,
141 | list(
142 | filter_select("name", "Store:", shared_df, ~name),
143 | filter_select("pct_under18", "Percent under 18", shared_df, ~pct_under18)
144 | )
145 | )
146 | getwd()
147 | write.csv(google_geocoded,file="output/address_data_geocoded_google.csv",
148 | row.names=FALSE)
149 | write.csv(esri_geocoded,file="output/address_data_geocoded_esri.csv",
150 | row.names=FALSE)
151 | write.csv(address_data2, file="output/address_data_geocoded2.csv", row.names=F)
152 | writeOGR(tracts2010, dsn="./output", layer="tracts2010", driver="ESRI Shapefile")
153 | library(crosstalk)
154 | library(tidyverse)
155 | library(leaflet)
156 | library(rgdal)
157 | library(sp)
158 | #library(DT)
159 | address_data2 <- read.csv("output/address_data_geocoded2.csv" )
160 | =tracts2010 <- readOGR(dsn="./output",layer="tracts2010")
161 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
162 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5)
163 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
164 | addPolygons(data=tracts2010,
165 | color="white",
166 | weight=1,
167 | opacity=0.5,
168 | fillColor=~quantColors(pct_n18),
169 | fillOpacity = 0.65,
170 | popup = paste0(tracts2010$pct_n18, "% under 18"),
171 | group="Percent Under 18") %>%
172 | addMarkers(group="Liquor Stores", popup=~popup_content) %>%
173 | addLayersControl(
174 | overlayGroups = c("Liquor Stores","Percent Under 18"),
175 | options = layersControlOptions(collapsed = FALSE)
176 | )
177 | library(crosstalk)
178 | library(tidyverse)
179 | library(leaflet)
180 | library(rgdal)
181 | library(sp)
182 | #library(DT)
183 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F)
184 | =tracts2010 <- readOGR(dsn="./output",layer="tracts2010")
185 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding")
186 | library(crosstalk)
187 | library(tidyverse)
188 | library(leaflet)
189 | library(rgdal)
190 | library(sp)
191 | #library(DT)
192 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F)
193 | =tracts2010 <- readOGR(dsn="output",layer="tracts2010")
194 | library(crosstalk)
195 | library(tidyverse)
196 | library(leaflet)
197 | library(rgdal)
198 | library(sp)
199 | #library(DT)
200 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F)
201 | tracts2010 <- readOGR(dsn="output",layer="tracts2010")
202 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
203 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5)
204 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
205 | addPolygons(data=tracts2010,
206 | color="white",
207 | weight=1,
208 | opacity=0.5,
209 | fillColor=~quantColors(pct_n18),
210 | fillOpacity = 0.65,
211 | popup = paste0(tracts2010$pct_n18, "% under 18"),
212 | group="Percent Under 18") %>%
213 | addMarkers(group="Liquor Stores", popup=~popup_content) %>%
214 | addLayersControl(
215 | overlayGroups = c("Liquor Stores","Percent Under 18"),
216 | options = layersControlOptions(collapsed = FALSE)
217 | )
218 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
219 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5)
220 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
221 | addPolygons(data=tracts2010,
222 | color="white",
223 | weight=1,
224 | opacity=0.5,
225 | fillColor=~quantColors(pct_n18),
226 | fillOpacity = 0.65,
227 | popup = paste0(tracts2010$pct_n18, "% under 18"),
228 | group="Percent Under 18") %>%
229 | addMarkers(group="Liquor Stores") %>%
230 | addLayersControl(
231 | overlayGroups = c("Liquor Stores","Percent Under 18"),
232 | options = layersControlOptions(collapsed = FALSE)
233 | )
234 | bscols( widths = c(9,3),
235 | map,
236 | list(
237 | filter_select("name", "Store:", shared_df, ~name),
238 | # Create a filter input
239 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.5, width=250)
240 | )
241 | )
242 | bscols( widths = c(9,3),
243 | map,
244 | list(
245 | filter_select("name", "Store:", shared_df, ~name),
246 | # Create a filter input
247 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.15, width=250)
248 | )
249 | )
250 | bscols( widths = c(9,3),
251 | map,
252 | list(
253 | filter_select("name", "Store:", shared_df, ~name),
254 | # Create a filter input
255 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.1, width=250)
256 | )
257 | )
258 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
259 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5)
260 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
261 | addPolygons(data=tracts2010,
262 | color="white",
263 | weight=1,
264 | opacity=0.5,
265 | fillColor=~quantColors(pct_n18),
266 | fillOpacity = 0.65,
267 | popup = paste0(tracts2010$pct_n18, "% under 18"),
268 | group="Percent Under 18") %>%
269 | addMarkers(group="Liquor Stores", popup=shared_df$pct_under18) %>%
270 | addLayersControl(
271 | overlayGroups = c("Liquor Stores","Percent Under 18"),
272 | options = layersControlOptions(collapsed = FALSE)
273 | )
274 | bscols( widths = c(9,3),
275 | map,
276 | list(
277 | filter_select("name", "Store:", shared_df, ~name),
278 | # Create a filter input
279 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.1, width=250)
280 | )
281 | )
282 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding")
283 | library(ggmap)
284 | mykey <- "AIzaSyDf-SZG8O4hj1c06VQ-k6hkBrOQyOFWrTw"
285 | register_google(key=mykey)
286 | geocode("San Francisco, CA", key=mykey)
287 | geocode
288 | ?geocode
289 | geocode("San Francisco, CA")
290 | geocode("San Francisco, CA", "more")
291 | x<-geocode("San Francisco, CA", "more")
292 | View(x)
293 | x<-geocode("Barrows Hall, Berkeley", "more")
294 | View(x)
295 | x<-geocode("2465 Dana St, Berkeley", "more")
296 | x<-geocode("2465 Dana St, Berkeley", "all")
297 | revgeocode(c(-122.4194,37.77493), output="more")
298 | # File of addresses
299 | address_data <- read.csv("address_data/oak_liquor_stores.csv", stringsAsFactors = F)
300 | # Take a look
301 | head(address_data)
302 | address_data$full_address <- paste0(address_data$street, ", " ,
303 | address_data$city, ", " ,
304 | address_data$state, " ",
305 | address_data$zip)
306 | # File of addresses
307 | address_data <- read.csv("address_data/oak_liquor_stores.csv", stringsAsFactors = F)
308 | # Take a look
309 | head(address_data)
310 | View(address_data)
311 | address_data$full_address <- paste0(address_data$street, ", " ,
312 | address_data$city, ", " ,
313 | address_data$state, " ",
314 | address_data$zip)
315 | View(address_data)
316 | google_geocoded <- geocode(address_data$full_address, output = "more",
317 | source = "google", key=mykey)
318 | View(google_geocoded)
319 | address_data$glat <- google_geocoded$lat
320 | address_data$glon <- google_geocoded$glon
321 | address_data$glon <- google_geocoded$glon
322 | View(address_data)
323 | head(address_data)
324 | address_data$glon <- google_geocoded$lon
325 | library(leaflet)
326 | point_map <- leaflet(address_data) %>%
327 | addTiles() %>%
328 | addMarkers(lat=~glat, lng=~glon,
329 | popup=(paste0(address_data$name, "",
330 | address_data$full_address)
331 | )
332 | )
333 | point_map
334 | esri_token<-"rfUfTu_yJczZESOq6S50x-kcrzFr9oBvpqhRpp70Pfu_P9uNjMgNtVXGq0iH6miRPGL6yoQMg2IlNROHzErtjCeHphbAfkETqS_Ksd5loXT1BGmU0U1wI5KtKxvfVxjaxWG3AIre2Ngf_NQ9XlDB5w.."
335 | my_esri_token<-"rfUfTu_yJczZESOq6S50x-kcrzFr9oBvpqhRpp70Pfu_P9uNjMgNtVXGq0iH6miRPGL6yoQMg2IlNROHzErtjCeHphbAfkETqS_Ksd5loXT1BGmU0U1wI5KtKxvfVxjaxWG3AIre2Ngf_NQ9XlDB5w.."
336 | source("./scripts/esri_wgs_geocoding.R")
337 | geocode_one("2625 Dana St, Berkeley, CA, 94704", my_esri_token,
338 | postal = TRUE)
339 | esri_geocoded <- geocode_many(address_data$id, address_data$street,
340 | address_data$city, address_data$state,
341 | as.character(address_data$zip), my_esri_token)
342 | View(esri_geocoded)
343 | address_data <- merge(address_data, esri_geocoded[c("ID","lon","lat")],
344 | by.x="id",by.y = "ID", all.x = T)
345 | # Take a look
346 | head(address_data, 3)
347 | View(address_data)
348 | point_map <- leaflet() %>%
349 | addTiles() %>%
350 | addMarkers(lat=address_data$glat, lng=address_data$glon,
351 | popup=(paste0(address_data$name, "",
352 | address_data$full_address))
353 | ) %>%
354 | addCircleMarkers(lat=address_data$lat, lng=address_data$lon,
355 | color="black",fillColor="red",
356 | popup=(paste0(address_data$name, "",
357 | address_data$full_address))
358 | )
359 | point_map
360 | source("./scripts/fcc_latlon2fips.R")
361 | latlon2fips(latitude=37.852562, longitude=-122.273634)
362 | x<- latlon2fips(latitude=37.852562, longitude=-122.273634)
363 | x
364 | substr(x,0,11)
365 | x<- latlon2fips(37.852562,-122.273634)
366 | x
367 | address_data$fips<- mapply(latlon2fips, address_data$glat,
368 | address_data$glon)
369 | head(address_data, 3)
370 | View(address_data)
371 | library(sp)
372 | library(tigris)
373 | options(tigris_class = "sp") # options are sp or sf
374 | options(tigris_use_cache = F) # set to true to save locally
375 | tracts2010 <- tracts(state = '06', county= '001', cb = F, year=2010)
376 | plot(tracts2010)
377 | tracts2010cb <- tracts(state = '06', county= '001', cb = T, year=2010)
378 | plot(tracts2010cb)
379 | plot(tracts2010)
380 | plot(tracts2010)
381 | address_data_sp<- address_data #make copy
382 | coordinates(address_data_sp) <-c("glon", "glat")
383 | proj4string(address_data_sp) <- CRS(proj4string(tracts2010))
384 | proj4string(address_data_sp) <- CRS(proj4string(tracts2010))
385 | points(address_data_sp, col="red")
386 | fips2010 <-over(address_data_sp, tracts2010)
387 | View(fips2010)
388 | address_data$GEOID10 <- fips2010$GEOID10
389 | View(address_data)
390 | library(tidycensus)
391 | library(tigris)
392 | library(tidycensus)
393 | my_census_api_key <- "f2d6f4f743545d3a42a67412b05935dc7712c432"
394 | census_api_key(my_census_api_key)
395 | my_states<- c("06") # CA
396 | my_counties <- c("001") # Alameda County
397 | cenvar_table <-load_variables(year=2016, dataset = "acs5", cache=T)
398 | View(cenvar_table)
399 | pop_total <- "B01001_001E" # Total population
400 | pop_under18 <- "B09001_001E" # POPULATION UNDER 18 YEARS BY AGE
401 | pop_acs5_2016 <-get_acs(geography = "tract",
402 | variables = c(pop_total,pop_under18),
403 | year=2016, survey="acs5",
404 | state = my_states, county = my_counties,
405 | geometry = F)
406 | View(pop_acs5_2016)
407 | library(tidyr)
408 | library(dplyr)
409 | library(tidyr)
410 | library(dplyr)
411 | # Select the columnbs of interest
412 | # and put `totpop` and `under18` in their own columns
413 | pop2 <- pop_acs5_2016 %>%
414 | select("GEOID","variable","estimate") %>%
415 | spread(key=variable, value=estimate)
416 | # Rename columns
417 | colnames(pop2)<-c("GEOID10","totpop","under18")
418 | head(pop2)
419 | pop2$pct_under18 <- round((pop2$under18 / pop2$totpop) * 100, 1)
420 | head(pop2)
421 | address_data2 <- merge(address_data, pop2, by="GEOID10", all.x=T)
422 | View(address_data2)
423 | View(tracts2010@data)
424 | tracts2010 <- merge(tracts2010, pop2, by="GEOID10")
425 | View(tracts2010@data)
426 | quantColors <- colorQuantile("Reds", tracts2010$pct_under18, n=5)
427 | point_map <- leaflet() %>%
428 | addTiles() %>%
429 | addPolygons(data=tracts2010,
430 | color="white",
431 | weight=1,
432 | opacity=0.5,
433 | fillColor= ~quantColors(pct_under18),
434 | fillOpacity = 0.75,
435 | popup = paste0("Percent under 18: ", tracts2010$pct_under18, "%")) %>%
436 | addMarkers(data=address_data, lat=~glat, lng=~glon,
437 | popup=(paste0(address_data$name, "",
438 | address_data$full_address)
439 | )
440 | )
441 | point_map
442 | library(htmlwidgets)
443 | saveWidget(point_map, file="pointmap.html")
444 | # Chunk 1
445 | library(crosstalk)
446 | library(tidyverse)
447 | library(leaflet)
448 | library(rgdal)
449 | library(sp)
450 | #library(DT)
451 | address_data2 <- read.csv("output/address_data_geocoded2.csv", stringsAsFactors = F)
452 | tracts2010 <- readOGR(dsn="output",layer="tracts2010")
453 | # Chunk 2
454 | shared_df <- SharedData$new(address_data2, ~name, group = "Choose Store")
455 | quantColors <- colorQuantile("YlOrRd", tracts2010$pct_n18, n=5)
456 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
457 | addPolygons(data=tracts2010,
458 | color="white",
459 | weight=1,
460 | opacity=0.5,
461 | fillColor=~quantColors(pct_n18),
462 | fillOpacity = 0.65,
463 | popup = paste0(tracts2010$pct_n18, "% under 18"),
464 | group="Percent Under 18") %>%
465 | addMarkers(group="Liquor Stores", popup=shared_df$pct_under18) %>%
466 | addLayersControl(
467 | overlayGroups = c("Liquor Stores","Percent Under 18"),
468 | options = layersControlOptions(collapsed = FALSE)
469 | )
470 | # Chunk 3
471 | bscols( widths = c(9,3),
472 | map,
473 | list(
474 | filter_select("name", "Store:", shared_df, ~name),
475 | # Create a filter input
476 | filter_slider("pct_under18", "Percent under 18", shared_df, column=~pct_under18, step=0.1, width=250)
477 | )
478 | )
479 | map <- leaflet(shared_df, width = "100%", height = 600) %>% addTiles() %>%
480 | addPolygons(data=tracts2010,
481 | color="white",
482 | weight=1,
483 | opacity=0.5,
484 | fillColor=~quantColors(pct_n18),
485 | fillOpacity = 0.65,
486 | popup = paste0(tracts2010$pct_n18, "% under 18"),
487 | group="Percent Under 18") %>%
488 | addMarkers(group="Liquor Stores", popup=shared_df$pct_under18) %>%
489 | addLayersControl(
490 | overlayGroups = c("Liquor Stores","Percent Under 18"),
491 | options = layersControlOptions(collapsed = FALSE)
492 | ) %>% hideGroup("Liquor Stores")
493 | point_map
494 | point_map <- leaflet() %>%
495 | addTiles() %>%
496 | addPolygons(data=tracts2010,
497 | color="white",
498 | weight=1,
499 | opacity=0.5,
500 | fillColor= ~quantColors(pct_under18),
501 | fillOpacity = 0.75,
502 | popup = paste0("Percent under 18: ", tracts2010$pct_under18, "%")) %>%
503 | addMarkers(data=address_data, lat=~glat, lng=~glon,
504 | popup=(paste0(address_data$name, "",
505 | address_data$full_address)
506 | )
507 | ) %>% hideGroup("Liquor Stores")
508 | point_map
509 | ?tmap
510 | library(tmap)
511 | ?addPolygons
512 | ?layersControlOptions
513 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | keys/*
2 | *.zip
3 | draft/*
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2015-2016 D-Lab UC Berkeley
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RGeocoding
2 |
3 | ## About
4 | These files show how to geocode in R using three online services:
5 |
6 | - Google Geocoding API
7 | - ESRI World Geocoding Service
8 | - US Census Geocoder
9 |
10 |
11 |
--------------------------------------------------------------------------------
/address_data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/address_data/.DS_Store
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_census_format.csv:
--------------------------------------------------------------------------------
1 | 1,2101 8th Ave,Oakland,CA,94606
2 | 2,1615 Macarthur Blvd,Oakland,CA,94602
3 | 3,394 12th St,Oakland,CA,94607
4 | 4,1500 23th Ave,Oakland,CA,94606
5 | 5,6193 Ridgemont Dr,Oakland,CA,94619
6 | 6,525 Embarcadero W, Oakland,CA,94607
7 | 7,5403 Foothill Blvd,Oakland,CA,94601
8 | 8,1200 78th Ave,Oakland,CA,94621
9 | 9,828 Franklin St,Oakland,CA,94607
10 | 10,5913 International Blvd,Oakland,CA,94621
11 | 11,3210 Harrison St,Oakland,CA,94611
12 | 12,1460 7th St,Oakland,CA,94607
13 | 13,1333 Peralta St,Oakland,CA,94607
14 | 14,3710 Telegraph Ave,Oakland,CA,94609
15 | 15,3293 Lakeshore Ave,Oakland,CA,94610
16 | 16,1647 8th St,Oakland,CA,94607
17 | 17,3849 Martin Luther King Jr Way,Oakland,CA,94609
18 | 18,3900 Grand Ave,Oakland,CA,94610
19 | 19,7305 Edgewater Dr #D,Oakland,CA,94621
20 | 20,350 E 18th St,Oakland,CA,94606
21 |
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_gearth_format.csv:
--------------------------------------------------------------------------------
1 | ID,Store,Street,City,State,Zip
2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606
3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602
4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607
5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606
6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619
7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607
8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601
9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621
10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607
11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621
12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611
13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607
14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607
15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609
16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610
17 | 16,Happy Time,1647 8th St,Oakland,CA,94607
18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609
19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610
20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621
21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606
22 |
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_gearth_format.kdx:
--------------------------------------------------------------------------------
1 | Layout {
2 | FileType : "delimited"
3 | Delimiter : ","
4 | Street : "2"
5 | City : "3"
6 | State : "4"
7 | Zip : "5"
8 | SkipRows : "1"
9 | TextCodec : "ISO-8859-1"
10 | FieldDefinitions {
11 | 0 {
12 | Name : "ID"
13 | Type : "int"
14 | }
15 | 1 {
16 | Name : "Store"
17 | Type : "string"
18 | }
19 | 2 {
20 | Name : "Street"
21 | Type : "string"
22 | }
23 | 3 {
24 | Name : "City"
25 | Type : "string"
26 | }
27 | 4 {
28 | Name : "State"
29 | Type : "string"
30 | }
31 | 5 {
32 | Name : "Zip"
33 | Type : "string"
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_gfusion_format.csv:
--------------------------------------------------------------------------------
1 | ID,Store,Address
2 | 1,Wah Fay Liquors,2101 8th Ave Oakland CA 94606
3 | 2,Vision Liquor,1615 Macarthur Blvd Oakland CA 94602
4 | 3,Souza's Liquors,394 12th St Oakland CA 94607
5 | 4,Tk Liquors,1500 23th Ave Oakland CA 94606
6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr Oakland CA 94619
7 |
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_w_ids.csv:
--------------------------------------------------------------------------------
1 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606
2 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602
3 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607
4 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606
5 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619
6 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607
7 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601
8 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621
9 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607
10 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621
11 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611
12 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607
13 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607
14 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609
15 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610
16 | 16,Happy Time,1647 8th St,Oakland,CA,94607
17 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609
18 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610
19 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621
20 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606
21 |
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_w_ids_types_headers.csv:
--------------------------------------------------------------------------------
1 | id,name,street,city,state,zip,type
2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606,p
3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602,p
4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607,p
5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606,p
6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619,p
7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607,c
8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601,p
9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621,m
10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607,p
11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621,p
12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611,m
13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607,m
14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607,p
15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609,m
16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610,p
17 | 16,Happy Time,1647 8th St,Oakland,CA,94607,p
18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609,m
19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610,p
20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621,p
21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606,p
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_w_ids_types_headers_copy.csv:
--------------------------------------------------------------------------------
1 | "id","name","street","city","state","zip","type","address"
2 |
--------------------------------------------------------------------------------
/address_data/formatted/oak_liq_w_ids_types_headers_geocoded.csv:
--------------------------------------------------------------------------------
1 | "id","name","street","city","state","zip","type","address","lon","lat","address.1"
2 | 1,"Wah Fay Liquors","2101 8th Ave","Oakland","CA",94606,"p","2101 8th Ave,Oakland,CA,94606",-122.2448899,37.7983669,"2101 8th ave, oakland, ca 94606, usa"
3 | 2,"Vision Liquor","1615 Macarthur Blvd","Oakland","CA",94602,"p","1615 Macarthur Blvd,Oakland,CA,94602",-122.223667,37.800329,"1615 macarthur blvd, oakland, ca 94602, usa"
4 | 3,"Souza's Liquors","394 12th St","Oakland","CA",94607,"p","394 12th St,Oakland,CA,94607",-122.2703368,37.8026337,"394 12th st, oakland, ca 94607, usa"
5 | 4,"Tk Liquors","1500 23th Ave","Oakland","CA",94606,"p","1500 23th Ave,Oakland,CA,94606",-122.2351333,37.7842433,"1500 23rd ave, oakland, ca 94606, usa"
6 | 5,"Quadriga Wines Inc","6193 Ridgemont Dr","Oakland","CA",94619,"p","6193 Ridgemont Dr,Oakland,CA,94619",-122.167191,37.784339,"6193 ridgemont dr, oakland, ca 94619, usa"
7 | 6,"Bev Mo","525 Embarcadero W"," Oakland","CA",94607,"c","525 Embarcadero W, Oakland,CA,94607",-122.2790176,37.7959797,"525 embarcadero west, oakland, ca 94607, usa"
8 | 7,"Fairfax Liquor","5403 Foothill Blvd","Oakland","CA",94601,"p","5403 Foothill Blvd,Oakland,CA,94601",-122.1984536,37.772621,"5403 foothill blvd, oakland, ca 94601, usa"
9 | 8,"Saleen Market","1200 78th Ave","Oakland","CA",94621,"m","1200 78th Ave,Oakland,CA,94621",-122.186272,37.755498,"1200 78th ave, oakland, ca 94621, usa"
10 | 9,"Park Liquors","828 Franklin St","Oakland","CA",94607,"p","828 Franklin St,Oakland,CA,94607",-122.2719881,37.8002274,"828 franklin st, oakland, ca 94607, usa"
11 | 10,"Los Camellos","5913 International Blvd","Oakland","CA",94621,"p","5913 International Blvd,Oakland,CA,94621",-122.1993192,37.7649979,"5913 international blvd, oakland, ca 94621, usa"
12 | 11,"Vernon Market","3210 Harrison St","Oakland","CA",94611,"m","3210 Harrison St,Oakland,CA,94611",-122.2557939,37.8180419,"3210 harrison st, oakland, ca 94611, usa"
13 | 12,"Seventh Street Food & Liquor","1460 7th St","Oakland","CA",94607,"m","1460 7th St,Oakland,CA,94607",-122.2951698,37.8056645,"1460 7th st, oakland, ca 94607, usa"
14 | 13,"Sav-Mor Liquor","1333 Peralta St","Oakland","CA",94607,"p","1333 Peralta St,Oakland,CA,94607",-122.2941054,37.8112027,"1333 peralta st, oakland, ca 94607, usa"
15 | 14,"Vernon Market","3710 Telegraph Ave","Oakland","CA",94609,"m","3710 Telegraph Ave,Oakland,CA,94609",-122.2650554,37.8256134,"3710 telegraph ave, oakland, ca 94609, usa"
16 | 15,"Buckingham Wine & Spirits","3293 Lakeshore Ave","Oakland","CA",94610,"p","3293 Lakeshore Ave,Oakland,CA,94610",-122.244289,37.810986,"3293 lakeshore ave, oakland, ca 94610, usa"
17 | 16,"Happy Time","1647 8th St","Oakland","CA",94607,"p","1647 8th St,Oakland,CA,94607",-122.298867,37.807129,"1647 8th st, oakland, ca 94607, usa"
18 | 17,"Gallager Market","3849 Martin Luther King Jr Way","Oakland","CA",94609,"m","3849 Martin Luther King Jr Way,Oakland,CA,94609",-122.2691424,37.8281099,"3849 m.l.k. jr way, oakland, ca 94609, usa"
19 | 18,"Grand Piedmont Liquors","3900 Grand Ave","Oakland","CA",94610,"p","3900 Grand Ave,Oakland,CA,94610",-122.24452,37.8185089,"3900 grand ave, oakland, ca 94610, usa"
20 | 19,"J J Buckley Fine Wines","7305 Edgewater Dr #D","Oakland","CA",94621,"p","7305 Edgewater Dr,Oakland,CA,94621",-122.2063558,37.744643,"7305 edgewater dr, oakland, ca 94621, usa"
21 | 20,"Carriage Trade Liquors"," 350 E 18th St","Oakland","CA",94606,"p"," 350 E 18th St,Oakland,CA,94606",-122.2510741,37.7994208,"350 e 18th st, oakland, ca 94606, usa"
22 |
--------------------------------------------------------------------------------
/address_data/oak_liquor_stores.csv:
--------------------------------------------------------------------------------
1 | id,name,street,city,state,zip,type
2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606,p
3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602,p
4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607,p
5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606,p
6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619,p
7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607,c
8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601,p
9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621,m
10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607,p
11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621,p
12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611,m
13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607,m
14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607,p
15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609,m
16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610,p
17 | 16,Happy Time,1647 8th St,Oakland,CA,94607,p
18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609,m
19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610,p
20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621,p
21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606,p
--------------------------------------------------------------------------------
/address_data/sample/data_copy.csv:
--------------------------------------------------------------------------------
1 | "id","name","street","city","state","zip","type","address"
2 |
--------------------------------------------------------------------------------
/address_data/sample/geocoded_output.csv:
--------------------------------------------------------------------------------
1 | "id","name","street","city","state","zip","type","address","lon","lat","address.1"
2 | 1,"Wah Fay Liquors","2101 8th Ave","Oakland","CA",94606,"p","2101 8th Ave,Oakland,CA,94606",-122.2448899,37.7983669,"2101 8th ave, oakland, ca 94606, usa"
3 | 2,"Vision Liquor","1615 Macarthur Blvd","Oakland","CA",94602,"p","1615 Macarthur Blvd,Oakland,CA,94602",-122.223667,37.800329,"1615 macarthur blvd, oakland, ca 94602, usa"
4 | 3,"Souza's Liquors","394 12th St","Oakland","CA",94607,"p","394 12th St,Oakland,CA,94607",-122.2703368,37.8026337,"394 12th st, oakland, ca 94607, usa"
5 | 4,"Tk Liquors","1500 23th Ave","Oakland","CA",94606,"p","1500 23th Ave,Oakland,CA,94606",-122.2351333,37.7842433,"1500 23rd ave, oakland, ca 94606, usa"
6 | 5,"Quadriga Wines Inc","6193 Ridgemont Dr","Oakland","CA",94619,"p","6193 Ridgemont Dr,Oakland,CA,94619",-122.167191,37.784339,"6193 ridgemont dr, oakland, ca 94619, usa"
7 | 6,"Bev Mo","525 Embarcadero W"," Oakland","CA",94607,"c","525 Embarcadero W, Oakland,CA,94607",-122.2790176,37.7959797,"525 embarcadero west, oakland, ca 94607, usa"
8 | 7,"Fairfax Liquor","5403 Foothill Blvd","Oakland","CA",94601,"p","5403 Foothill Blvd,Oakland,CA,94601",-122.1984536,37.772621,"5403 foothill blvd, oakland, ca 94601, usa"
9 | 8,"Saleen Market","1200 78th Ave","Oakland","CA",94621,"m","1200 78th Ave,Oakland,CA,94621",-122.186272,37.755498,"1200 78th ave, oakland, ca 94621, usa"
10 | 9,"Park Liquors","828 Franklin St","Oakland","CA",94607,"p","828 Franklin St,Oakland,CA,94607",-122.2719881,37.8002274,"828 franklin st, oakland, ca 94607, usa"
11 | 10,"Los Camellos","5913 International Blvd","Oakland","CA",94621,"p","5913 International Blvd,Oakland,CA,94621",-122.1993192,37.7649979,"5913 international blvd, oakland, ca 94621, usa"
12 | 11,"Vernon Market","3210 Harrison St","Oakland","CA",94611,"m","3210 Harrison St,Oakland,CA,94611",-122.2557939,37.8180419,"3210 harrison st, oakland, ca 94611, usa"
13 | 12,"Seventh Street Food & Liquor","1460 7th St","Oakland","CA",94607,"m","1460 7th St,Oakland,CA,94607",-122.2951698,37.8056645,"1460 7th st, oakland, ca 94607, usa"
14 | 13,"Sav-Mor Liquor","1333 Peralta St","Oakland","CA",94607,"p","1333 Peralta St,Oakland,CA,94607",-122.2941054,37.8112027,"1333 peralta st, oakland, ca 94607, usa"
15 | 14,"Vernon Market","3710 Telegraph Ave","Oakland","CA",94609,"m","3710 Telegraph Ave,Oakland,CA,94609",-122.2650554,37.8256134,"3710 telegraph ave, oakland, ca 94609, usa"
16 | 15,"Buckingham Wine & Spirits","3293 Lakeshore Ave","Oakland","CA",94610,"p","3293 Lakeshore Ave,Oakland,CA,94610",-122.244289,37.810986,"3293 lakeshore ave, oakland, ca 94610, usa"
17 | 16,"Happy Time","1647 8th St","Oakland","CA",94607,"p","1647 8th St,Oakland,CA,94607",-122.298867,37.807129,"1647 8th st, oakland, ca 94607, usa"
18 | 17,"Gallager Market","3849 Martin Luther King Jr Way","Oakland","CA",94609,"m","3849 Martin Luther King Jr Way,Oakland,CA,94609",-122.2691424,37.8281099,"3849 m.l.k. jr way, oakland, ca 94609, usa"
19 | 18,"Grand Piedmont Liquors","3900 Grand Ave","Oakland","CA",94610,"p","3900 Grand Ave,Oakland,CA,94610",-122.24452,37.8185089,"3900 grand ave, oakland, ca 94610, usa"
20 | 19,"J J Buckley Fine Wines","7305 Edgewater Dr #D","Oakland","CA",94621,"p","7305 Edgewater Dr,Oakland,CA,94621",-122.2063558,37.744643,"7305 edgewater dr, oakland, ca 94621, usa"
21 | 20,"Carriage Trade Liquors"," 350 E 18th St","Oakland","CA",94606,"p"," 350 E 18th St,Oakland,CA,94606",-122.2510741,37.7994208,"350 e 18th st, oakland, ca 94606, usa"
22 |
--------------------------------------------------------------------------------
/address_data/sample/oak_liq_stores_raw.csv:
--------------------------------------------------------------------------------
1 | Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606
2 | Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602
3 | Souza's Liquors,394 12th,Oakland,CA,94607
4 | Tk Liquors,1500 23th Ave,Oakland,CA,94606
5 | Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA
6 | Bev Mo,525 Embarcadero W, Oakland,CA,94607
7 | Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601
8 | Saleen Market,1200 78th Ave,Oakland,94621
9 | Park Liquors,828 Franklin St,Oakland,CA,94607
10 | Los Camellos,5913 International Blvd,Oakland,CA,94621
11 |
--------------------------------------------------------------------------------
/address_data/sample/sample_10_addresses.csv:
--------------------------------------------------------------------------------
1 | "apn","pstreet_addr","pcity","pstate","pzip","plon","plat","pminx","pminy","pmaxx","pmaxy","planduse","pfull_address","parcel_dir"
2 | "029345130","26362 LAWTON AVE","Loma Linda","CA",92354,-117.229239498,34.0449167153,-117.229323238,34.0448060271,-117.229155764,34.0450273813,"532","26362 LAWTON AVE,Loma Linda,CA,92354",6071
3 | "23504800370000","9147 PERSHING AVE","Orangevale","CA",95662,-121.213901675,38.6708152346,-121.21401323,38.6706055214,-121.213792183,38.6710265909,NA,"9147 PERSHING AVE,Orangevale,CA,95662",6067
4 | "232052005","1153 SIERRA VISTA WAY","Lafayette","CA",94549,-122.114609607,37.8982677233,-122.114886371,37.8978975253,-122.114412289,37.898760217,NA,"1153 SIERRA VISTA WAY,Lafayette,CA,94549",6013
5 | "8589026023","5033 Baldwin Ave","Temple City","CA",91780,-118.049846129,34.0945402847,-118.04997662,34.0944167133,-118.049716371,34.0946635985,"0100","5033 Baldwin Ave,Temple City,CA,91780",6037
6 | "200291193","460 9TH ST","Mc Farland","CA",93250,-119.237737665,35.6789471257,-119.23794007,35.6788773989,-119.237535267,35.6790168561,"0101","460 9TH ST,Mc Farland,CA,93250",6029
7 | "107618146","7017 NEWTON PL","Alta Loma","CA",91701,-117.592751185,34.1268426075,-117.592902871,34.1267837389,-117.592610044,34.1269016316,"510","7017 NEWTON PL,Alta Loma,CA,91701",6071
8 | "8468015024","1600 W Cameron Ave","West Covina","CA",91790,-117.943032896,34.0685424244,-117.943538578,34.0681056101,-117.942527964,34.0689799637,"1800","1600 W Cameron Ave,West Covina,CA,91790",6037
9 | "2203112937","500 RANCHEROS DR","San Marcos","CA",92069,-117.153226611,33.1412541249,-117.155567866,33.1393828965,-117.150832985,33.1430624795,NA,"500 RANCHEROS DR,San Marcos,CA,92069",6073
10 | "110-120-009-000","3110 ASPEN GROVE RD","Truckee","CA",96161,-120.118140589,39.277759796,-120.118190768,39.27773372,-120.11808974,39.2777866452,"04","3110 ASPEN GROVE RD,Truckee,CA,96161",6061
11 |
--------------------------------------------------------------------------------
/address_data/sample/sample_10_addresses_geocoded.csv:
--------------------------------------------------------------------------------
1 | "id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side","state_fips","county_fips","tract_fips","block_fips","lon","lat"
2 | "2203112937","500 RANCHEROS DR, San Marcos, CA, 92069","Match","Exact","500 RANCHEROS DR, SAN MARCOS, CA, 92069","-117.15157,33.140068",195719910,"L",NA,NA,NA,NA,-117.15157,33.140068
3 | "200291193","460 9TH ST, Mc Farland, CA, 93250","Match","Exact","460 9TH ST, MC FARLAND, CA, 93250","-119.23804,35.678898",109003113,"L",NA,NA,NA,NA,-119.23804,35.678898
4 | "110-120-009-000","3110 ASPEN GROVE RD, Truckee, CA, 96161","Match","Exact","3110 ASPEN GROVE RD, TRUCKEE, CA, 96161","-120.11933,39.278297",636420421,"L",NA,NA,NA,NA,-120.11933,39.278297
5 | "232052005","1153 SIERRA VISTA WAY, Lafayette, CA, 94549","Match","Exact","1153 SIERRA VISTA WAY, LAFAYETTE, CA, 94549","-122.11476,37.897873",192046673,"L",NA,NA,NA,NA,-122.11476,37.897873
6 | "23504800370000","9147 PERSHING AVE, Orangevale, CA, 95662","Match","Exact","9147 PERSHING AVE, ORANGEVALE, CA, 95662","-121.21412,38.67051",133356827,"L",NA,NA,NA,NA,-121.21412,38.67051
7 | "107618146","7017 NEWTON PL, Alta Loma, CA, 91701","Match","Exact","7017 NEWTON PL, ALTA LOMA, CA, 91701","-117.59298,34.12686",144985211,"L",NA,NA,NA,NA,-117.59298,34.12686
8 | "029345130","26362 LAWTON AVE, Loma Linda, CA, 92354","Match","Exact","26362 LAWTON AVE, LOMA LINDA, CA, 92354","-117.22901,34.04474",145230114,"L",NA,NA,NA,NA,-117.22901,34.04474
9 | "8589026023","5033 Baldwin Ave, Temple City, CA, 91780","Match","Exact","5033 BALDWIN AVE, TEMPLE CITY, CA, 91780","-118.049385,34.093925",142744738,"L",NA,NA,NA,NA,-118.049385,34.093925
10 | "8468015024","1600 W Cameron Ave, West Covina, CA, 91790","Match","Exact","1600 W CAMERON AVE, WEST COVINA, CA, 91790","-117.94267,34.06895",241245486,"L",NA,NA,NA,NA,-117.94267,34.06895
11 |
--------------------------------------------------------------------------------
/address_data/trulia_avgprice_bystate_2017.csv:
--------------------------------------------------------------------------------
1 | state,avg_listingPrice_week_aug23_2017
2 | Hawaii,905687
3 | District Of Columbia,773286
4 | California,697539
5 | Massachusetts,602210
6 | New York,565227
7 | Colorado,538477
8 | Utah,440946
9 | Connecticut,435585
10 | Oregon,416718
11 | Florida,406803
12 | Rhode Island,405450
13 | Washington,378565
14 | New Jersey,372916
15 | Maryland,369454
16 | Idaho,349000
17 | Virginia,341015
18 | Nevada,331971
19 | Arizona,322398
20 | Texas,320067
21 | Montana,314959
22 | New Hampshire,310914
23 | Vermont,306034
24 | Delaware,303971
25 | Georgia,296535
26 | Wyoming,291855
27 | South Carolina,291636
28 | Minnesota,290514
29 | Illinois,277163
30 | North Carolina,276389
31 | Maine,275717
32 | Tennessee,268692
33 | Alaska,267404
34 | New Mexico,254798
35 | South Dakota,238163
36 | Louisiana,232610
37 | Nebraska,230000
38 | North Dakota,226863
39 | Pennsylvania,224090
40 | Wisconsin,223480
41 | Kentucky,213848
42 | Alabama,212733
43 | Michigan,212694
44 | Missouri,204506
45 | Oklahoma,201091
46 | Mississippi,195390
47 | Arkansas,191446
48 | Indiana,190843
49 | Ohio,190371
50 | Kansas,187649
51 | Iowa,185087
52 | West Virginia,174865
--------------------------------------------------------------------------------
/draft/rgeodocoding2.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "RGeocoding"
3 | author: "patty"
4 | date: "December 4, 2015"
5 | output: slidy_presentation
6 | ---
7 |
8 | ## Geocoding in R
9 |
10 | Getting Started: Download the zipfile for this tutorial from
11 | https://github.com/dlab-geo/RGeocoding/archive/master.zip
12 |
13 |
14 |
15 | ## Overview
16 |
17 | - What is Geocoding
18 | - A simple example in Google Maps
19 | - Why Geocode
20 | - Geocoding in Detail
21 | - How to Geocode in R
22 | - with GGMAPS
23 | - with Yahoo Placefinder
24 | - with TIGER
25 | - Now what
26 |
27 | ## What is Geocoding
28 |
29 | Determine the geographic coordinates of a named place, street address, or zip code.
30 |
31 | - city, building,
32 | - street address, intersection,
33 | - mountain, landmark,
34 | - crime or other event location,
35 | - zip code, etc.
36 |
37 | ## Try It!
38 |
39 | 
40 |
41 | maps.google.com
42 |
43 | ## Geographic Coordinates
44 |
45 |
46 |
47 | ||
48 | ------------------------|-----------------------------------|-
49 | **Latitude**|+/- 90 degrees|*how far north or south of equator*
50 | **Longitude**|+/- 180 degrees|*how far E/W of prime meridian*
51 |
52 | **Decimal Degrees (DD)**
53 | 37.870145, -122.25952
54 |
55 | **Degrees, minutes, seconds (DMS)**
56 | 37° 52' 12"N, 122° 15' 36" W
57 |
58 |
59 | ## Why?
60 |
61 | - Display locations on a map
62 | - Link locations to other data
63 | - Spatial analysis
64 | - Calculate distance, direction, area, etc.
65 | - Identify patterns & relationships:
66 | - clusters, outliers, neighbors
67 |
68 | ## Address Geocoding
69 |
70 | ## Where do Addresses come from
71 |
72 | - Extract from text documents
73 | - File or Database
74 | - Web Scraping
75 |
76 | ## Process
77 |
78 | 
79 |
80 | ## Evaluation
81 |
82 | Reference database extremeley important
83 |
84 | 
85 |
86 | ## Geocoder Output Comparison
87 |
88 | 
89 |
90 | ## Considerations
91 |
92 | - Geographic scope
93 | - Time period
94 | - Output Quality
95 | - Number of addresses
96 | - Data Privacy/security
97 | - Cost
98 |
99 | ## Local Geocoding
100 | - ArcGIS
101 |
102 | ## Remote Geocoding
103 | ArcGIS
104 | Google
105 | Yahoo
106 | OpenStreetMaps
107 | Data Science Toolkit (DSTK)
108 | *and many others*
109 |
110 | ## Geocoding in R
111 |
112 | Access an online Geocoder using an API
113 | *Application Progromming Toolkit*
114 |
115 | In R via a package or script.
116 |
117 | ## Geocoding in R with
118 |
119 | - GGMAPS
120 | - Google
121 | - DSTK
122 | - RYDN & Yahoo
123 | - TIGER
124 |
125 | ## Geocoding with GGMAP
126 | - Created by David Kahle and Hadley Wickham, ggplot2 developer
127 | - Provides functionality for fetching online map data from Google and other services
128 | and overlaying other geodata using ggplot
129 | - Includes functions for Geocoding functionality using:
130 | - the Data Science Toolkit (DSTK) geocoding service
131 | - default, unlimited usage
132 | - Google's Geocoding service
133 | - limited to 2500 addresses per day
134 | - other limits may also apply!
135 |
136 | ## GGMAP
137 | - the Data Science Toolkit (DSTK) geocoding service
138 | - default, unlimited usage
139 | - solid, not great
140 | - older data, limited geographic coverage
141 | - sometimes unavailable
142 |
143 | - Google's Geocoding service
144 | - fantastic accuracy, worldwide coverage, up to date
145 | - limited to 2500 addresses per day
146 |
147 |
148 | ## Geocoding with GGMAP
149 |
150 |
151 | > library(ggmap)
152 | > geocode("Barrows Hall, Berkeley, CA", source="google")
153 |
154 | lon lat
155 | 1 -122.258 37.87006
156 |
157 |
158 | Go ahead and stick that in maps.google.com
159 | - must be in *lat,lon* format!
160 |
161 | Then try Geocoding
162 |
163 | - an address
164 | - a zipcode
165 |
166 | ## ?geocode
167 |
168 |
169 |
170 |
171 | ## Try these changes
172 |
173 | - output="latlon" or "latlona" or "more" or "all"
174 |
175 | ## Output differences
176 |
177 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="latlon")
178 |
179 | lon lat
180 | 1 -122.258 37.87006
181 |
182 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="latlona")
183 |
184 | lon lat address
185 | 1 -122.258 37.87006 barrows hall, berkeley, ca 94720, usa
186 |
187 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="more")
188 |
189 | lon lat type loctype address north south
190 | 1 -122.258 37.87006 premise rooftop barrows hall, berkeley, ca 94720, usa 37.87147 37.86877
191 | east west premise locality administrative_area_level_2
192 | 1 -122.2566 -122.2593 Barrows Hall Berkeley Alameda County
193 | administrative_area_level_1 country postal_code
194 | 1 California United States 94720
195 |
196 | > geocode("Barrows Hall, Berkeley, CA", source="google", output="all")
197 |
198 |
199 | ## Accuracy
200 |
201 | one <- geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="more")
202 | two <- geocode("sather gate, berkeley, ca", source="google", output="more", messaging=TRUE)
203 |
204 | one$loctype
205 | two$loctype
206 | three$loctype
207 |
208 |
209 | ## Append geocoded info to source data
210 |
211 | Create a data frame with three addresses
212 |
213 |
214 |
215 | > df <- data.frame(
216 | address = c("1517 Shattuck Ave, Berkeley, CA 94709",
217 | "Barrows Hall, Berkeley, CA",
218 | "2332 Haste St, Berkeley, CA 94704"),
219 | stringsAsFactors = FALSE
220 | )
221 |
222 | > df
223 | address
224 | 1 1517 Shattuck Ave, Berkeley, CA 94709
225 | 2 Barrows Hall, Berkeley, CA
226 | 3 2332 Haste St, Berkeley, CA 94704
227 |
228 |
229 | ## Geocode the three Addresses
230 |
231 |
232 | > df2 <- geocode(df$address,source="google", output="more")
233 | # just keep lat, lon, type, and loctype
234 | > df2 <- df2[,c(1:4)]
235 |
236 | > df2
237 | lon lat type loctype
238 | 1 -122.2689 37.87959 street_address rooftop
239 | 2 -122.2580 37.87006 premise rooftop
240 | 3 -122.2615 37.86537 street_address rooftop
241 |
242 |
243 | ## Join output to input
244 |
245 | > df3 <- data.frame(df,df2)
246 |
247 | > df3
248 | address lon lat type loctype
249 | 1 1517 Shattuck Ave, Berkeley, CA 94709 -122.2689 37.87959 street_address rooftop
250 | 2 Barrows Hall, Berkeley, CA -122.2580 37.87006 premise rooftop
251 | 3 2332 Haste St, Berkeley, CA 94704 -122.2615 37.86537 street_address rooftop
252 | >
253 |
254 |
255 |
256 | ## Map it
257 |
258 |
259 | > map <- get_map(location=c(lon=mean(df3$lon), lat=mean(df3$lat)), zoom=14)
260 | > ggmap(map) +
261 | geom_point(aes(x = lon, y = lat), data=df3, size = 6, col="red" )
262 |
263 |
264 | Try different (or no) zoom levels!
265 |
266 |
267 | ## Geocode a file of addresses
268 |
269 |
270 | # get the input data
271 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F)
272 | head(data)
273 |
274 | id name street city state zip type
275 | 1 1 Wah Fay Liquors 2101 8th Ave Oakland CA 94606 p
276 | 2 2 Vision Liquor 1615 Macarthur Blvd Oakland CA 94602 p
277 | 3 3 Souza's Liquors 394 12th St Oakland CA 94607 p
278 | 4 4 Tk Liquors 1500 23th Ave Oakland CA 94606 p
279 | 5 5 Quadriga Wines Inc 6193 Ridgemont Dr Oakland CA 94619 p
280 | 6 6 Bev Mo 525 Embarcadero W Oakland CA 94607 c
281 |
282 |
283 |
284 | ## We need one column with address (not multiple)
285 |
286 | data$address <- with(data,paste(street,city,state,zip, sep=" "))
287 | head(data)
288 |
289 | id name street city state zip type address
290 | 1 1 Wah Fay Liquors 2101 8th Ave Oakland CA 94606 p 2101 8th Ave Oakland CA 94606
291 | 2 2 Vision Liquor 1615 Macarthur Blvd Oakland CA 94602 p 1615 Macarthur Blvd Oakland CA 94602
292 | 3 3 Souza's Liquors 394 12th St Oakland CA 94607 p 394 12th St Oakland CA 94607
293 | 4 4 Tk Liquors 1500 23th Ave Oakland CA 94606 p 1500 23th Ave Oakland CA 94606
294 | 5 5 Quadriga Wines Inc 6193 Ridgemont Dr Oakland CA 94619 p 6193 Ridgemont Dr Oakland CA 94619
295 | 6 6 Bev Mo 525 Embarcadero W Oakland CA 94607 c 525 Embarcadero W Oakland CA 94607
296 |
297 |
298 |
299 | ## Irregularity is a Problem
300 |
301 |
302 |
303 | > data[19,8]
304 | [1] "7305 Edgewater Dr #D Oakland CA 94621"
305 |
306 | > geocode(data[19,8], source="google", output="latlona")
307 | lon lat address
308 | 1 -81.44055 28.62331 7305 edgewater dr, lockhart, fl 32810, usa
309 |
310 | data[19,8]<-"7305 Edgewater Dr Oakland CA 94621" ## Why do we need to do this??
311 |
312 |
313 | ## Geocode!
314 |
315 | geocoded_output <- geocode(data$address, output = "latlona", source = "google")
316 | geocoded_output <- data.frame(data, geocoded_output)
317 |
318 | ## Review
319 | head(geocoded_output)
320 |
321 | ## save output
322 | write.csv(geocoded_output,file="geocoded_output.csv", row.names=FALSE)
323 |
324 |
325 | ## Know Your limits
326 |
327 | ####Scaling up to more than 2500 records?
328 |
329 | geocodeQueryCheck() #how am I doing?
330 |
331 |
332 | ## Working With Limits
333 |
334 | maxrecs <- geocodeQueryCheck()
335 |
336 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F) # read data
337 | data$address <- with(data,paste(street,city,state,zip, sep=" ")) #add single column address
338 | if (!exists("data$geocoded")) {
339 | # do this once
340 | data$geocoded <- 0
341 | }
342 | not_geocoded <- subset(data,geocoded == 0)
343 | not_geocoded <- not_geocoded[,maxrecs]
344 | nrow(not_geocoded)
345 | not_geocoded[19,8]<-"7305 Edgewater Dr Oakland CA 94621" ## Why do we need to do this??
346 |
347 | geocoded_output <- geocode(not_geocoded$address, output = "latlona", source = "google")
348 | geocoded_output$geocoded <- 1
349 | geocoded_output <- data.frame(data, geocoded_output)
350 | #save output
351 | write.csv(geocoded_output,file="geocoded_output.csv", row.names=FALSE)
352 |
353 |
354 |
355 | ## More Google Limits
356 |
357 | https://developers.google.com/maps/documentation/geocoding/usage-limits
358 |
359 |
360 |
361 | ## Yahoo Placefinder - non-commerical version!
362 |
363 | - Similar quality to Google
364 | - Limited to 2000 geocodes per day
365 | - **But** fewer usage restrictions
366 |
367 | - Available via *RYDN* Package
368 | - YDN = Yahoo Developers Network
369 |
370 | ## First!
371 | Apply for an account on YDN
372 |
373 | **Add slides for account**
374 |
375 |
376 | ## Next - Install RYDN
377 | # devtools::install_github("trestletech/rydn")
378 | library(rydn)
379 |
380 | ## Key in!
381 | # readin your keys from YDN (don't share)
382 | source("keys/ydn_keys.R")
383 | #mykey="dj0.......................00Zg--"
384 | #mysecret ="00....................8"
385 |
386 | ## Testing
387 | myloc <- find_place("Barrows Hall, Berkeley,ca",key=mykey,secret=mysecret)
388 | myloc #see what was returned
389 |
390 | *TO interpret response see: https://developer.yahoo.com/boss/geo/docs/supported_responses.html*
391 |
392 | ## work with subset of the returned info
393 | myloc_sub <- myloc[1 ,c("quality", "latitude", "longitude", "radius")] #subset
394 |
395 | #convert strings to numerics
396 | myloc$longitude <- as.numeric(myloc$longitude)
397 | myloc$latitude <- as.numeric(myloc$latitude)
398 |
399 | ## Now geocode!
400 |
401 |
402 | ## What's median income around my liquor stores
403 |
404 |
405 |
406 | ## References
407 | - https://cran.r-project.org/web/packages/ggmap/index.html
408 | - https://journal.r-project.org/archive/2013-1/kahle-wickham.pdf
409 | - https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/ggmap/ggmapCheatsheet.pdf
410 |
411 |
412 |
413 |
--------------------------------------------------------------------------------
/output/address_data_geocoded2.csv:
--------------------------------------------------------------------------------
1 | "GEOID10","id","name","street","city","state","zip","type","full_address","glon","glat","lon","lat","fips","CTIDFP00","totpop","under18","pct_under18"
2 | "06001401000",17,"Gallager Market","3849 Martin Luther King Jr Way","Oakland","CA",94609,"m","3849 Martin Luther King Jr Way, Oakland, CA 94609",-122.2691521,37.8281185,-122.268896055,37.828055535,"060014010004018","06001401000",6193,820,13.2
3 | "06001401100",14,"Vernon Market","3710 Telegraph Ave","Oakland","CA",94609,"m","3710 Telegraph Ave, Oakland, CA 94609",-122.2650564,37.8255986,-122.265324385,37.8257232560001,"060014011003009","06001401100",4138,264,6.4
4 | "06001401800",13,"Sav-Mor Liquor","1333 Peralta St","Oakland","CA",94607,"p","1333 Peralta St, Oakland, CA 94607",-122.2942244,37.8113527,-122.294328517,37.8109490200001,"060014018001005","06001401800",1866,425,22.8
5 | "06001401800",16,"Happy Time","1647 8th St","Oakland","CA",94607,"p","1647 8th St, Oakland, CA 94607",-122.298875,37.8071804,-122.298818746,37.8072919190001,"060014018002012","06001401800",1866,425,22.8
6 | "06001402200",12,"Seventh Street Food & Liquor","1460 7th St","Oakland","CA",94607,"m","1460 7th St, Oakland, CA 94607",-122.2951819,37.8056587,-122.295257249,37.805456087,"060014022002015","06001402200",2406,501,20.8
7 | "06001403000",3,"Souza's Liquors","394 12th St","Oakland","CA",94607,"p","394 12th St, Oakland, CA 94607",-122.2705233,37.8026203,-122.270542917,37.80238021,"060014030001005","06001403000",3167,308,9.7
8 | "06001403000",9,"Park Liquors","828 Franklin St","Oakland","CA",94607,"p","828 Franklin St, Oakland, CA 94607",-122.2719881,37.8002274,-122.272205314,37.8001763230001,"060014030001017","06001403000",3167,308,9.7
9 | "06001403502",11,"Vernon Market","3210 Harrison St","Oakland","CA",94611,"m","3210 Harrison St, Oakland, CA 94611",-122.2557954,37.8180398,-122.25600678,37.818050858,"060014035022001","06001403500",2081,118,5.7
10 | "06001403800",15,"Buckingham Wine & Spirits","3293 Lakeshore Ave","Oakland","CA",94610,"p","3293 Lakeshore Ave, Oakland, CA 94610",-122.2443398,37.8110554,-122.244180914,37.8107465640001,"060014038003008","06001403800",3323,377,11.3
11 | "06001403800",18,"Grand Piedmont Liquors","3900 Grand Ave","Oakland","CA",94610,"p","3900 Grand Ave, Oakland, CA 94610",-122.2444949,37.8185137,-122.244728744,37.8186177630001,"060014038004003","06001403800",3323,377,11.3
12 | "06001404900",2,"Vision Liquor","1615 Macarthur Blvd","Oakland","CA",94602,"p","1615 Macarthur Blvd, Oakland, CA 94602",-122.223667,37.800329,-122.223598292,37.8005377910001,"060014049003026","06001404900",3995,830,20.8
13 | "06001405301",20,"Carriage Trade Liquors"," 350 E 18th St","Oakland","CA",94606,"p"," 350 E 18th St, Oakland, CA 94606",-122.2510981,37.7994349,-122.251216784,37.7992969090001,"060014053012004","06001405300",2899,318,11
14 | "06001405500",1,"Wah Fay Liquors","2101 8th Ave","Oakland","CA",94606,"p","2101 8th Ave, Oakland, CA 94606",-122.2448776,37.79836,-122.244758177,37.7982741370001,"060014055003004","06001405500",4104,683,16.6
15 | "06001406201",4,"Tk Liquors","1500 23th Ave","Oakland","CA",94606,"p","1500 23th Ave, Oakland, CA 94606",-122.2349511,37.7842636,-122.235061143,37.7842299920001,"060014062014000","06001406201",4296,1111,25.9
16 | "06001407500",7,"Fairfax Liquor","5403 Foothill Blvd","Oakland","CA",94601,"p","5403 Foothill Blvd, Oakland, CA 94601",-122.1983483,37.7724053,-122.198434586,37.7725488080001,"060014075003004","06001407500",4201,1321,31.4
17 | "06001408100",5,"Quadriga Wines Inc","6193 Ridgemont Dr","Oakland","CA",94619,"p","6193 Ridgemont Dr, Oakland, CA 94619",-122.1671766,37.7843368,-122.167020084,37.78445829,"060014081002000","06001408100",6266,765,12.2
18 | "06001408800",10,"Los Camellos","5913 International Blvd","Oakland","CA",94621,"p","5913 International Blvd, Oakland, CA 94621",-122.1994052,37.7648862,-122.199439298,37.764960681,"060014088004003","06001408800",6348,2090,32.9
19 | "06001408900",8,"Saleen Market","1200 78th Ave","Oakland","CA",94621,"m","1200 78th Ave, Oakland, CA 94621",-122.1863008,37.7555499,-122.186416836,37.7556614330001,"060014089002026","06001408900",3105,901,29
20 | "06001409000",19,"J J Buckley Fine Wines","7305 Edgewater Dr #D","Oakland","CA",94621,"p","7305 Edgewater Dr #D, Oakland, CA 94621",-122.2064413,37.744684,-122.20642817,37.745344269,"060014090002013","06001409000",3752,1080,28.8
21 | "06001983200",6,"Bev Mo","525 Embarcadero W"," Oakland","CA",94607,"c","525 Embarcadero W, Oakland, CA 94607",-122.2791286,37.7959002,-122.278601227,37.79605572,"060019832001030","06001403200",572,38,6.6
22 |
--------------------------------------------------------------------------------
/output/address_data_geocoded_esri.csv:
--------------------------------------------------------------------------------
1 | "ID","lon","lat","score","locName","status","matchAddr","side","addressType"
2 | 2,-122.223598292,37.8005377910001,100,"World","M","1615 MacArthur Blvd, Oakland, California, 94602","L","PointAddress"
3 | 4,-122.235061143,37.7842299920001,100,"World","M","1500 23rd Ave, Oakland, California, 94606","R","StreetAddress"
4 | 1,-122.244758177,37.7982741370001,100,"World","M","2101 8th Ave, Oakland, California, 94606","L","PointAddress"
5 | 3,-122.270542917,37.80238021,100,"World","M","394 12th St, Oakland, California, 94607","R","PointAddress"
6 | 5,-122.167020084,37.78445829,100,"World","M","6193 Ridgemont Dr, Oakland, California, 94619","L","PointAddress"
7 | 8,-122.186416836,37.7556614330001,100,"World","M","1200 78th Ave, Oakland, California, 94621","R","PointAddress"
8 | 7,-122.198434586,37.7725488080001,100,"World","M","5403 Foothill Blvd, Oakland, California, 94601","L","StreetAddress"
9 | 6,-122.278601227,37.79605572,100,"World","M","525 Embarcadero W, Oakland, California, 94607","L","StreetAddress"
10 | 10,-122.199439298,37.764960681,100,"World","M","5913 International Blvd, Oakland, California, 94621","L","StreetAddress"
11 | 9,-122.272205314,37.8001763230001,100,"World","M","828 Franklin St, Oakland, California, 94607","R","PointAddress"
12 | 11,-122.25600678,37.818050858,100,"World","M","3210 Harrison St, Oakland, California, 94611","R","PointAddress"
13 | 12,-122.295257249,37.805456087,100,"World","M","1460 7th St, Oakland, California, 94607","R","StreetAddress"
14 | 14,-122.265324385,37.8257232560001,100,"World","M","3710 Telegraph Ave, Oakland, California, 94609","R","StreetAddress"
15 | 13,-122.294328517,37.8109490200001,100,"World","M","1333 Peralta St, Oakland, California, 94607","L","StreetAddress"
16 | 15,-122.244180914,37.8107465640001,100,"World","M","3293 Lakeshore Ave, Oakland, California, 94610","L","PointAddress"
17 | 16,-122.298818746,37.8072919190001,100,"World","M","1647 8th St, Oakland, California, 94607","L","PointAddress"
18 | 20,-122.251216784,37.7992969090001,100,"World","M","350 E 18th St, Oakland, California, 94606","R","PointAddress"
19 | 18,-122.244728744,37.8186177630001,100,"World","M","3900 Grand Ave, Oakland, California, 94610","R","PointAddress"
20 | 17,-122.268896055,37.828055535,100,"World","M","3849 Martin Luther King Jr Way, Oakland, California, 94609","L","PointAddress"
21 | 19,-122.20642817,37.745344269,100,"World","M","7305 Edgewater Dr, Oakland, California, 94621","L","StreetAddress"
22 |
--------------------------------------------------------------------------------
/output/address_data_geocoded_google.csv:
--------------------------------------------------------------------------------
1 | "lon","lat","type","loctype","address","north","south","east","west","street_number","route","locality","administrative_area_level_2","administrative_area_level_1","country","postal_code","postal_code_suffix","neighborhood","subpremise"
2 | -122.2448776,37.79836,"premise","rooftop","2101 8th ave, oakland, ca 94606, usa",37.7997158302915,37.7970178697085,-122.243540919708,-122.246238880292,"2101","8th Avenue","Oakland","Alameda County","California","United States","94606","2007",NA,NA
3 | -122.223667,37.800329,"street_address","rooftop","1615 macarthur blvd, oakland, ca 94602, usa",37.8016779802915,37.7989800197085,-122.222318019708,-122.225015980292,"1615","MacArthur Boulevard","Oakland","Alameda County","California","United States","94602","1606","Glenview",NA
4 | -122.2705233,37.8026203,"street_address","rooftop","394 12th st, oakland, ca 94607, usa",37.8039692802915,37.8012713197085,-122.269174319708,-122.271872280292,"394","12th Street","Oakland","Alameda County","California","United States","94607","4249","Downtown Oakland",NA
5 | -122.2349511,37.7842636,"street_address","rooftop","1500 23rd ave, oakland, ca 94606, usa",37.7856125802915,37.7829146197085,-122.233602119708,-122.236300080291,"1500","23rd Avenue","Oakland","Alameda County","California","United States","94606","5035","Rancho San Antonio",NA
6 | -122.1671766,37.7843368,"premise","rooftop","6193 ridgemont dr, oakland, ca 94619, usa",37.7856903302915,37.7829923697085,-122.165849819708,-122.168547780292,"6193","Ridgemont Drive","Oakland","Alameda County","California","United States","94619","3724","Caballo Hills",NA
7 | -122.2791286,37.7959002,"street_address","rooftop","525 embarcadero west, oakland, ca 94607, usa",37.7972491802915,37.7945512197085,-122.277779619708,-122.280477580292,"525","Embarcadero West","Oakland","Alameda County","California","United States","94607","3565","Downtown Oakland",NA
8 | -122.1983483,37.7724053,"street_address","range_interpolated","5403 foothill blvd, oakland, ca 94601, usa",37.7737542802915,37.7710563197085,-122.196999319708,-122.199697280292,"5403","Foothill Boulevard","Oakland","Alameda County","California","United States","94601","5515","Fairfax",NA
9 | -122.1863008,37.7555499,"premise","rooftop","1200 78th ave, oakland, ca 94621, usa",37.7568967302915,37.7541987697085,-122.184959919709,-122.187657880291,"1200","78th Avenue","Oakland","Alameda County","California","United States","94621","2604","Fitchburg",NA
10 | -122.2719881,37.8002274,"street_address","rooftop","828 franklin st, oakland, ca 94607, usa",37.8015763802915,37.7988784197085,-122.270639119709,-122.273337080291,"828","Franklin Street","Oakland","Alameda County","California","United States","94607","4202","Chinatown",NA
11 | -122.1994052,37.7648862,"street_address","rooftop","5913 international blvd, oakland, ca 94621, usa",37.7662351802915,37.7635372197085,-122.198056219709,-122.200754180291,"5913","International Boulevard","Oakland","Alameda County","California","United States","94621","4202","East 14th Street Business District",NA
12 | -122.2557954,37.8180398,"premise","rooftop","3210 harrison st, oakland, ca 94611, usa",37.8193728302915,37.8166748697085,-122.254448769709,-122.257146730291,"3210","Harrison Street","Oakland","Alameda County","California","United States","94611","5527","Oakland Ave - Harrison St",NA
13 | -122.2951819,37.8056587,"premise","rooftop","1460 7th st, oakland, ca 94607, usa",37.8070176802915,37.8043197197085,-122.293811069708,-122.296509030292,"1460","7th Street","Oakland","Alameda County","California","United States","94607",NA,"Prescott",NA
14 | -122.2942244,37.8113527,"street_address","rooftop","1333 peralta st, oakland, ca 94607, usa",37.8127016802915,37.8100037197085,-122.292875419708,-122.295573380292,"1333","Peralta Street","Oakland","Alameda County","California","United States","94607","2015","Prescott",NA
15 | -122.2650564,37.8255986,"premise","rooftop","3710 telegraph ave, oakland, ca 94609, usa",37.8269623802915,37.8242644197085,-122.263706369709,-122.266404330291,"3710","Telegraph Avenue","Oakland","Alameda County","California","United States","94609",NA,"Mosswood",NA
16 | -122.2443398,37.8110554,"premise","rooftop","3293 lakeshore ave, oakland, ca 94610, usa",37.8123312802915,37.8096333197085,-122.242957869709,-122.245655830291,"3293","Lakeshore Avenue","Oakland","Alameda County","California","United States","94610","2719","Lakeshore",NA
17 | -122.298875,37.8071804,"premise","rooftop","1647 8th st, oakland, ca 94607, usa",37.8085135802915,37.8058156197085,-122.297543019708,-122.300240980292,"1647","8th Street","Oakland","Alameda County","California","United States","94607","1354","Prescott",NA
18 | -122.2691521,37.8281185,"premise","rooftop","3849 martin luther king jr way, oakland, ca 94609, usa",37.8294589302915,37.8267609697085,-122.267793469708,-122.270491430292,"3849","Martin Luther King Junior Way","Oakland","Alameda County","California","United States","94609","2313","Mosswood",NA
19 | -122.2444949,37.8185137,"premise","rooftop","3900 grand ave, oakland, ca 94610, usa",37.8198538302915,37.8171558697085,-122.243162819709,-122.245860780291,"3900","Grand Avenue","Oakland","Alameda County","California","United States","94610",NA,"Grand Lake",NA
20 | -122.2064413,37.744684,"subpremise","rooftop","7305 edgewater dr d, oakland, ca 94621, usa",37.7460985802915,37.7434006197085,-122.205147969709,-122.207845930291,"7305","Edgewater Drive","Oakland","Alameda County","California","United States","94621",NA,NA,"D"
21 | -122.2510981,37.7994349,"premise","rooftop","350 e 18th st, oakland, ca 94606, usa",37.8008464802915,37.7981485197085,-122.249700119709,-122.252398080291,"350","East 18th Street","Oakland","Alameda County","California","United States","94606","1814","Ivy Hill",NA
22 |
--------------------------------------------------------------------------------
/output/geocoded_addresses_out.csv:
--------------------------------------------------------------------------------
1 | "id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side","state_fips","county_fips","tract_fips","block_fips","lon","lat"
2 | 3,"10834 GOLF LINKS RD, Oakland, CA, 94605","Match","Exact","10834 GOLF LINKS RD, OAKLAND, CA, 94605","-122.126884,37.75383",125011765,"L",6,1,409900,NA,-122.126884,37.75383
3 | 2,"4728 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4728 SCOTIA AVE, OAKLAND, CA, 94605","-122.125046,37.75488",125011836,"R",6,1,409900,NA,-122.125046,37.75488
4 | 1,"10709 COTTER ST, Oakland, , 94605","Match","Exact","10709 COTTER ST, OAKLAND, CA, 94605","-122.12373,37.755764",125011838,"R",6,1,409900,NA,-122.12373,37.755764
5 | 10,"271 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","271 ELYSIAN FIELDS DR, OAKLAND, CA, 94605","-122.13135,37.759373",125011739,"R",6,1,409900,NA,-122.13135,37.759373
6 | 7,"4855 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4855 SCOTIA AVE, OAKLAND, CA, 94605","-122.12386,37.75519",125011839,"L",6,1,409900,NA,-122.12386,37.75519
7 | 6,"111 DONNA WAY, Oakland, CA, 94605","Match","Exact","111 DONNA WAY, OAKLAND, CA, 94605","-122.13217,37.760193",125011738,"L",6,1,409900,NA,-122.13217,37.760193
8 | 5,"380 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","380 ELYSIAN FIELDS DR, OAKLAND, CA, 94605","-122.12819,37.761078",125011702,"L",6,1,409900,NA,-122.12819,37.761078
9 | 4,"4627 GRASS VALLEY RD, Oakland, CA, 94605","Match","Exact","4627 GRASS VALLEY RD, OAKLAND, CA, 94605","-122.12256,37.751083",617284248,"L",6,1,409900,NA,-122.12256,37.751083
10 | 9,"10520 PEBBLE BEACH DR, Oakland, CA, 94605","Match","Exact","10520 PEBBLE BEACH DR, OAKLAND, CA, 94605","-122.12747,37.761383",125011703,"L",6,1,409900,NA,-122.12747,37.761383
11 | 8,"248 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","248 ELYSIAN FIELDS DR, OAKLAND, CA, 94605","-122.1325,37.759678",125011736,"L",6,1,409900,NA,-122.1325,37.759678
12 | 11,"4840 SHETLAND AVE, Oakland, CA, 94605","Match","Exact","4840 SHETLAND AVE, OAKLAND, CA, 94605","-122.12222,37.75289",125011864,"R",6,1,409900,NA,-122.12222,37.75289
13 | 12,"24 KEY CT, Oakland, CA, 94605","Match","Exact","24 KEY CT, OAKLAND, CA, 94605","-122.124565,37.755417",125011835,"R",6,1,409900,NA,-122.124565,37.755417
14 |
--------------------------------------------------------------------------------
/output/geocoded_addresses_single_out.csv:
--------------------------------------------------------------------------------
1 | "id","street","city","state","zip","census_format","lat","lon","geoid"
2 | 2,"4728 SCOTIA AVE","Oakland","CA",94605,"street=4728 SCOTIA AVE&city=Oakland&state=CA&zip=94605",37.75488,-122.125046,"06001409900"
3 | 3,"10834 GOLF LINKS RD","Oakland","CA",94605,"street=10834 GOLF LINKS RD&city=Oakland&state=CA&zip=94605",37.75383,-122.126884,"06001409900"
4 | 4,"4627 GRASS VALLEY RD","Oakland","CA",94605,"street=4627 GRASS VALLEY RD&city=Oakland&state=CA&zip=94605",37.751083,-122.12256,"06001409900"
5 | 5,"380 ELYSIAN FIELDS DR","Oakland","CA",94605,"street=380 ELYSIAN FIELDS DR&city=Oakland&state=CA&zip=94605",37.761078,-122.12819,"06001409900"
6 | 6,"111 DONNA WAY","Oakland","CA",94605,"street=111 DONNA WAY&city=Oakland&state=CA&zip=94605",37.760193,-122.13217,"06001409900"
7 | 7,"4855 SCOTIA AVE","Oakland","CA",94605,"street=4855 SCOTIA AVE&city=Oakland&state=CA&zip=94605",37.75519,-122.12386,"06001409900"
8 | 8,"248 ELYSIAN FIELDS DR","Oakland","CA",94605,"street=248 ELYSIAN FIELDS DR&city=Oakland&state=CA&zip=94605",37.759678,-122.1325,"06001409900"
9 | 9,"10520 PEBBLE BEACH DR","Oakland","CA",94605,"street=10520 PEBBLE BEACH DR&city=Oakland&state=CA&zip=94605",37.761383,-122.12747,"06001409900"
10 | 10,"271 ELYSIAN FIELDS DR","Oakland","CA",94605,"street=271 ELYSIAN FIELDS DR&city=Oakland&state=CA&zip=94605",37.759373,-122.13135,"06001409900"
11 | 11,"4840 SHETLAND AVE","Oakland","CA",94605,"street=4840 SHETLAND AVE&city=Oakland&state=CA&zip=94605",37.75289,-122.12222,"06001409900"
12 | 12,"24 KEY CT","Oakland","CA",94605,"street=24 KEY CT&city=Oakland&state=CA&zip=94605",37.755417,-122.124565,"06001409900"
13 |
--------------------------------------------------------------------------------
/output/tracts2010.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/output/tracts2010.dbf
--------------------------------------------------------------------------------
/output/tracts2010.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
--------------------------------------------------------------------------------
/output/tracts2010.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/output/tracts2010.shp
--------------------------------------------------------------------------------
/output/tracts2010.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/output/tracts2010.shx
--------------------------------------------------------------------------------
/screenshots/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/.DS_Store
--------------------------------------------------------------------------------
/screenshots/addresses1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/addresses1.png
--------------------------------------------------------------------------------
/screenshots/census_api_key_apply.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/census_api_key_apply.png
--------------------------------------------------------------------------------
/screenshots/census_geo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/census_geo.png
--------------------------------------------------------------------------------
/screenshots/dual_address_match.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/dual_address_match.png
--------------------------------------------------------------------------------
/screenshots/esri_wgs_token.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/esri_wgs_token.png
--------------------------------------------------------------------------------
/screenshots/fcc_api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/fcc_api.png
--------------------------------------------------------------------------------
/screenshots/fips_code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/fips_code.png
--------------------------------------------------------------------------------
/screenshots/geocode_details1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/geocode_details1.png
--------------------------------------------------------------------------------
/screenshots/geocoding_details1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/geocoding_details1.png
--------------------------------------------------------------------------------
/screenshots/geocoding_details2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/geocoding_details2.png
--------------------------------------------------------------------------------
/screenshots/ggmap_geocode_help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ggmap_geocode_help.png
--------------------------------------------------------------------------------
/screenshots/ggmap_plot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ggmap_plot1.png
--------------------------------------------------------------------------------
/screenshots/gmap_barrows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/gmap_barrows.png
--------------------------------------------------------------------------------
/screenshots/google_limits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/google_limits.png
--------------------------------------------------------------------------------
/screenshots/output_compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/output_compare.png
--------------------------------------------------------------------------------
/screenshots/popdens.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/popdens.png
--------------------------------------------------------------------------------
/screenshots/ref_data_quality.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ref_data_quality.png
--------------------------------------------------------------------------------
/screenshots/social_explorer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/social_explorer.png
--------------------------------------------------------------------------------
/screenshots/ydn_boss_placefinder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_boss_placefinder.png
--------------------------------------------------------------------------------
/screenshots/ydn_create_application.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_create_application.png
--------------------------------------------------------------------------------
/screenshots/ydn_keys.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_keys.png
--------------------------------------------------------------------------------
/screenshots/ydn_landing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_landing.png
--------------------------------------------------------------------------------
/screenshots/ydn_signup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_signup.png
--------------------------------------------------------------------------------
/screenshots/ydn_usage_limits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/screenshots/ydn_usage_limits.png
--------------------------------------------------------------------------------
/scripts/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/scripts/.DS_Store
--------------------------------------------------------------------------------
/scripts/.Rapp.history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/scripts/.Rapp.history
--------------------------------------------------------------------------------
/scripts/draft/tiger_geocoding.R:
--------------------------------------------------------------------------------
1 | #
2 | # Geocoding with Tiger Geocoding Service
3 | #
4 |
5 | #clean environment
6 | rm(list=ls())
7 |
8 | #Load libraries
9 | library(curl)
10 |
11 | #set working directory
12 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
13 |
14 | # CLI format for CURL
15 | #format of geocoding request for Tiger Geocoder
16 | #curl --form addressFile=@tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 http://geocoding.geo.census.gov/geocoder/geographies/addressbatch
17 | ### or this if saving to file as indicated by -o flag
18 | #curl --form addressFile=@tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 http://geocoding.geo.census.gov/geocoder/geographies/addressbatch
19 | ## -o geocoded_addresses_with_fips.csv
20 |
21 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv"
22 |
23 | tiger_url_prefix <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
24 |
25 | tiger_url_options <- "--form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010"
26 |
27 | tiger_url_address_options <- paste0("--form addressFile=@", tiger_input_addressFile)
28 |
29 | tiger_url <- paste(tiger_url_prefix, tiger_url_options, tiger_url_address_options)
30 |
31 | # worked:
32 | # curl --form addressFile=@tiger/tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010 http://geocoding.geo.census.gov/geocoder/geographies/addressbatch
33 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch
34 | ##--form addressFile=@tiger/tiger_12addresses_to_geocode.csv
35 | ##--form benchmark=Public_AR_Census2010
36 | ##--form vintage=Census2010_Census2010
37 |
38 |
39 | library(httr)
40 |
41 | ## WORKED - thanks to: http://stackoverflow.com/questions/26611289/curl-post-statement-to-rcurl-or-httr
42 | ## add verbose() to see details of process
43 | ## Using httr I think
44 | geocoded_addresses <- POST(tiger_url_prefix, encode="multipart",
45 | body=list(addressFile=upload_file(tiger_input_addressFile),
46 | benchmark="Public_AR_Census2010",
47 | vintage="Census2010_Census2010"
48 | )
49 | )
50 |
51 | #write raw output to file
52 | capture.output(cat(content(geocoded_addresses)), file="test_out.txt")
53 | #read output file in to a data frame (not sure how to do these two in one step)
54 | mylocs <- read.csv("test_out.txt",header=FALSE)
55 | head(mylocs)
56 |
57 | #doh split the lat,long values into two separate columns
58 | mylocs$lon = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1]))
59 | mylocs$lat = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2]))
60 |
61 |
--------------------------------------------------------------------------------
/scripts/esri_wgs_geocoding.R:
--------------------------------------------------------------------------------
1 |
2 | # This script provides an example of accessing the ESRI world geocoding service.
3 |
4 | # This code borrows HEAVILY (almost completely) and with thanks from
5 | # Claudia Engel: https://github.com/cengel/ArcGIS_geocoding
6 | # See her repo for more details and other examples
7 |
8 | ##################################
9 | ## Single Line Geocode Function ##
10 | ##################################
11 | # The function takes:
12 | # - one address at a time as one string (SingleLine)
13 | # - token - which you get from developers.arcgis.com
14 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm
15 | # - TRUE/FALSE - allow to return Postal codes if a full street address match cannot be found (default is TRUE)
16 | #
17 | # The function returns:
18 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84
19 | # score - The accuracy of the address match between 0 and 100.
20 | # locName - The component locator used to return a particular match result
21 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U)
22 | # matchAddr - Complete address returned for the geocode request.
23 | # side - The side of the street where an address resides relative to the direction
24 | # of feature digitization
25 | # addressType - The match level for a geocode request. "PointAddress" is typically the
26 | # most spatially accurate match level. "StreetAddress" differs from PointAddress
27 | # because the house number is interpolated from a range of numbers. "StreetName" is similar,
28 | # but without the house number.
29 |
30 | geocode_one <- function (address, token, postal = TRUE){
31 | require(httr)
32 |
33 | # ESRI geolocator
34 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses"
35 |
36 | # template for SingleLine format
37 | pref <- "{'records':[{'attributes':{'OBJECTID':1,'SingleLine':'"
38 | suff <- "'}}]}"
39 |
40 | # url
41 | url <- URLencode(paste0(gserver, "?addresses=", pref, address, suff, "&token=", token, ifelse(postal, "&f=json", "&f=json&category=Address")))
42 |
43 | # submit
44 | rawdata <- GET(url)
45 |
46 | # parse JSON and process result
47 | res <- content(rawdata, "parsed", "application/json")
48 | resdf <- with(res$locations[[1]], {data.frame(lon = as.numeric(location$x),
49 | lat = as.numeric(location$y),
50 | score = score,
51 | locName = attributes$Loc_name,
52 | status = attributes$Status,
53 | matchAddr = attributes$Match_addr,
54 | side = attributes$Side,
55 | addressType = attributes$Addr_type)})
56 | return(resdf)
57 | }
58 |
59 | #######################################
60 | ## Multi Line Batch Geocode Function ##
61 | #######################################
62 | # The function takes:
63 | # - ID variable to identify records, must be numeric and should be unique
64 | # - multiple addresses as vectors, separated into: Street, City, State, Zip
65 | # - token - which you get from developers.arcgis.com
66 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm
67 | #
68 | # It can take a maximum of 1000 addresses. If more, it returns an error.
69 | #
70 | # The function returns a data frame with the following fields:
71 | # ID - Result ID can be used to join the output fields in the response to the attributes
72 | # in the original address table.
73 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84
74 | # score - The accuracy of the address match between 0 and 100.
75 | # locName - The component locator used to return a particular match result
76 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U)
77 | # matchAddr - Complete address returned for the geocode request.
78 | # side - The side of the street where an address resides relative to the direction
79 | # of feature digitization
80 | # addressType - The match level for a geocode request. "PointAddress" is typically the
81 | # most spatially accurate match level. "StreetAddress" differs from PointAddress
82 | # because the house number is interpolated from a range of numbers. "StreetName" is similar,
83 | # but without the house number.
84 |
85 | geocode_many<- function(id, street, city, state, zip, token){
86 | require(httr)
87 | require(rjson)
88 |
89 | # check if we have more than 1000, if so stop.
90 | if (length(id) > 1000){
91 | print(paste("length is: ", length(id)))
92 | stop("Can only process up to 1000 addresses at a time.")}
93 |
94 | # check if id is numeric
95 | if (!is.numeric(id)) {
96 | stop("id variable needs to be numeric.")
97 | }
98 |
99 | # make data frame
100 | adr_df <- data.frame(OBJECTID = id,
101 | Address = street,
102 | City = city,
103 | State = state,
104 | Zip = zip)
105 |
106 | # make json
107 | tmp_list <- apply(adr_df, 1, function(i) list(attributes = as.list(i)))
108 |
109 | # need to coerce ID back to numeric
110 | tmp_list <- lapply(tmp_list, function(i) { i$attributes$OBJECTID <- as.numeric(i$attributes$OBJECTID); i })
111 | adr_json <- toJSON(list(records = tmp_list))
112 |
113 | # Identify the geocoding web service URL
114 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses"
115 |
116 | # submit
117 | req <- POST(
118 | url = gserver,
119 | body = list(addresses = adr_json, f="json", token=token),
120 | encode = "form")
121 | #stop_for_status(req) # error check
122 |
123 | # process and parse
124 | res <- content(req, "parsed", "application/json")
125 | resdfr <- data.frame()
126 | for (i in seq_len(length(res$locations))){
127 | d <- with(res$locations[[i]], {data.frame(ID = attributes$ResultID,
128 | lon = as.numeric(location$x),
129 | lat = as.numeric(location$y),
130 | score = score,
131 | locName = attributes$Loc_name,
132 | status = attributes$Status,
133 | matchAddr = attributes$Match_addr,
134 | side = attributes$Side,
135 | addressType = attributes$Addr_type)})
136 | resdfr <- rbind(resdfr, d)
137 | }
138 | return(resdfr)
139 | }
140 |
141 | #--------------------------------------------------------------------------------------
142 | # Some code to use the above functions
143 | #--------------------------------------------------------------------------------------
144 |
145 | # set your access token
146 | myToken <- "enter your long ugly ESRI geocoding access token here"
147 |
148 | # ---------------------------
149 | # GEOCODE A SINGLE ADDRESS
150 | # ---------------------------
151 | # geocode_output <-geocode_one("1600 Pennsylvania Avenue NW, Washington, DC", myToken, postal = TRUE)
152 |
153 | # -----------------------------
154 | # GEOCODE A BATCH OF ADDRESSES
155 | # -----------------------------
156 |
157 | # make up a data frame with some addresses:
158 | adr_df <- data.frame(
159 | ID = 1:3,
160 | street = c('450 Serra Mall', '1600 Amphitheatre Pkwy', '1355 Market Street Suite 900'),
161 | city = c('Stanford', 'Mountain View', 'San Francisco'),
162 | state = 'CA',
163 | zip = c('94305', '94043', '94103'))
164 |
165 |
166 | # Batch geocode your dataframe of addresses with the following function
167 | #adr_gc <- geocode_many(adr_df$ID, adr_df$street, adr_df$city, adr_df$state, adr_df$zip, myToken)
168 |
169 | # join back with original data
170 | #merge(adr_df, adr_gc, by = "ID", all.x = T)
--------------------------------------------------------------------------------
/scripts/fcc_latlon2fips.R:
--------------------------------------------------------------------------------
1 | # FCC's Census Block Conversions API
2 | # Get Block FIPS for lat/lon
3 | ## After: https://gist.githubusercontent.com/ramhiser/f09a71d96a4dec80994c/raw/d3e1d9fc1e7f38b2a402eee3237221fa9a47d1da/latlong2fips.r
4 |
5 | latlon2fips <- function(latitude, longitude) {
6 | url <- "https://geo.fcc.gov/api/census/block/find?latitude=%f&longitude=%f&showall=true&format=json"
7 | url <- sprintf(url, latitude, longitude)
8 | json <- RCurl::getURL(url)
9 | json <- RJSONIO::fromJSON(json)
10 | as.character(json$Block['FIPS']) #Block FIPS includes state, county, tract & blockgroup FIPS
11 |
12 | }
--------------------------------------------------------------------------------
/scripts/google_geocoding_ggmap.R:
--------------------------------------------------------------------------------
1 | #
2 | # Using GGMAP to geocode with the Google Geocoding API
3 | # last updated 08/16/2018
4 | #
5 | # Notes:
6 | # 1. Read the docs at: https://developers.google.com/maps/documentation/geocoding/start
7 | # 2. As of July 2018 you may need to register for a new Google Geocoding API key
8 | # and associate it with a credit card.
9 | # The documentation indicates you get $200 of free Google API access per month.
10 | # That would be 40,000 free geocodes per month if that were the only thing you used it for.
11 | # So - use with care, protect your API keys so others don't use them.
12 | #
13 |
14 | library(ggmap)
15 |
16 | setwd("~/Documents/Dlab/workshops/2018/RGeocoding")
17 |
18 | #mykey <- "AIzaSyxxxxxxxxxxxxxxxxxxxxxxxxxOQyOFWrTw"
19 |
20 | register_google(key=mykey)
21 |
22 | # Geocode a city
23 | geocode("San Francisco, CA")
24 |
25 | # Geocode a state
26 | geocode("California")
27 |
28 | # Geocode a landmark
29 | Geocode("Golden Gate Bridge")
30 |
31 | # Reverse Geocode
32 | revgeocode(c(-122.4194,37.77493), output="more")
33 |
34 | # Geocode a data frame of addresses
35 | address_data <- read.csv("address_data/formatted/oak_liq_w_ids_types_headers.csv")
36 |
37 | # Full addres format: 100 Bolyston St, Boston, MA 01952
38 | address_data$full_address <- paste0(address_data$street, ", " , address_data$city, ", " , address_data$state, " ", address_data$zip)
39 |
40 | #?geocode
41 | # See the google documentation to interpret all of the results
42 | geocoded_output <- geocode(address_data$full_address, output = "more", source = "google", key=mykey)
43 |
44 |
45 |
--------------------------------------------------------------------------------
/scripts/google_geocoding_ggmap_v2.R:
--------------------------------------------------------------------------------
1 | # NOTE YOU MUST HAVE CURRENT DEV VERSION OF GGMAP
2 | # Install updated version of ggmaps
3 | # if(!requireNamespace("devtools")) install.packages("devtools")
4 | # devtools::install_github("dkahle/ggmap", ref = "tidyup")
5 |
6 | library(ggmap)
7 | library(purrr)
8 | setwd("./")
9 |
10 | # Replace with your google maps API key - don't share!!!
11 | register_google("YOUR_GOOGLE_API_KEY_HERE")
12 |
13 | # Test geocoding
14 | geocode("san francisco, ca", output="latlona")
15 |
16 | # Readin sample data
17 | # Oakland Liquor store subset
18 | # oak_liquor_stores.csv
19 | # Header looks like this:
20 | # id name street city state zip type
21 | sites <- read.csv("oakland_liquor_stores.csv", header = T, stringsAsFactors = F, strip.white = T)
22 |
23 | head(sites)
24 |
25 | # Create a single column version of the full address
26 | sites$addr <- paste0(sites$street, " ", sites$city, " ",sites$state, " ", sites$zip)
27 |
28 | ##################################################
29 | # Simple ggmap geocoding
30 | ##################################################
31 | # This is the easiest way but it bombs on bad addresses
32 | # and you lose all previous geocodes
33 | # Uncomment to use!
34 | ## UNCOMMENT BEGIN BELOW
35 | #geocoded_output_df <- geocode(sites$addr, output = "latlona")
36 | #
37 | #rename the columns
38 | #colnames(geocoded_output_df) <-c("lon","lat","google_address")
39 | #
40 | # Append columns
41 | #new_df <- cbind(sites,geocoded_output_df)
42 | ### UNCOMMENT END ABOVE
43 |
44 | ##################################################
45 | # ggmap geocoding - with error handling
46 | ##################################################
47 | geocode_many <- function(id, addr) {
48 | # Function to iterate over and geocode a set of addresses and ids with google via ggmap package
49 | # that will return lat=NA, lon=NA, address = "not found"
50 | # if google can't find input address
51 | # returns a data frame of geocoded addresses
52 |
53 | # Create empty data frame for results
54 | results_df <- data.frame()
55 |
56 | for (i in seq_len(length(addr))) {
57 | print(addr[i])
58 |
59 | x<- geocode(addr[i], output="latlona")
60 |
61 | if (is.na(x$lat)) {
62 | x$lat <- NA
63 | x$lon <- NA
64 | x$address <- "not found"
65 | }
66 |
67 | temp_df <- data.frame(
68 | ID = id[i],
69 | lat = x$lat,
70 | lon = x$lon,
71 | google_address = x$address
72 | )
73 |
74 | results_df <- rbind(results_df, temp_df)
75 | }
76 |
77 | return(results_df)
78 | }
79 |
80 | ########################################################################
81 | # Test function - assumes you have an id field with the column label id
82 | ########################################################################
83 | # test the function
84 | sites2 <- head(sites, 10) #take 10 sites
85 | the_geocodes <- geocode_many(sites2$id, sites2$addr)
86 | sites2 <- merge(sites2, the_geocodes, by.x="id", by.y = "ID", all.x = T)
87 | head(sites2)
88 |
89 | #################################################################
90 | # Geocode ALL DATA - you can geocode 2,000 addr per day for free
91 | #################################################################
92 | # geocode the data - **Assumes unique id for each row - in a column labeled id**
93 | the_geocodes <- geocode_many(sites$id, sites$addr)
94 |
95 | # merge geocoded output with input data
96 | sites <- merge(sites, the_geocodes, by.x="id", by.y = "ID", all.x = T)
97 |
98 | head(sites)
99 |
100 | #################################
101 | ## Add the FIPS code to each row
102 | ##################################
103 |
104 | latlon2fips <- function(latitude, longitude) {
105 | # Return a 15 digit Census Geo identifier (geoid)
106 | # like this: "060650422121006"
107 | # 06 = state is first two digits
108 | # 065 = county digits 3 - 5
109 | # 42212 = census tract digits 6-11
110 | # 10 = block group digits 12-13
111 | # 06 = block digits 14-15
112 | fips <- ""
113 | if(is.na(latitude) | is.na(longitude)) { #minor validity checking
114 | return(fips)
115 | } else {
116 | url <- "https://geo.fcc.gov/api/census/block/find?latitude=%f&longitude=%f&showall=true&format=json"
117 | url <- sprintf(url, latitude, longitude)
118 | json <- RCurl::getURL(url)
119 | json <- RJSONIO::fromJSON(json)
120 | #print(json)
121 | fips <- as.character(json$Block['FIPS']) #Block FIPS includes state, county, tract & blockgroup FIPS
122 | return(fips)
123 | }
124 | }
125 | # test
126 | latlon2fips(NA,NA)
127 |
128 | #sites2$census_geoids <- mapply(latlon2fips,sites2$lat,sites2$lon)
129 |
130 | # Census GEOIDS for each lat/lon pair from FCC
131 | sites$census_geoids <- mapply(latlon2fips,sites$lat,sites$lon)
132 |
133 | head(sites)
134 | # Write results to file
135 | write.csv(sites, file="geocoded_ouput_data.csv", row.names = F)
136 |
137 | #
138 |
139 | #static map with ggmap
140 | # use qmplot to make a scatterplot on a map
141 | qmplot(lon, lat, data = sites, maptype = "toner-lite", color = I("red"))
142 |
143 |
--------------------------------------------------------------------------------
/scripts/oakland_liquor_stores.csv:
--------------------------------------------------------------------------------
1 | id,name,street,city,state,zip,type
2 | 1,Wah Fay Liquors,2101 8th Ave,Oakland,CA,94606,p
3 | 2,Vision Liquor,1615 Macarthur Blvd,Oakland,CA,94602,p
4 | 3,Souza's Liquors,394 12th St,Oakland,CA,94607,p
5 | 4,Tk Liquors,1500 23th Ave,Oakland,CA,94606,p
6 | 5,Quadriga Wines Inc,6193 Ridgemont Dr,Oakland,CA,94619,p
7 | 6,Bev Mo,525 Embarcadero W, Oakland,CA,94607,c
8 | 7,Fairfax Liquor,5403 Foothill Blvd,Oakland,CA,94601,p
9 | 8,Saleen Market,1200 78th Ave,Oakland,CA,94621,m
10 | 9,Park Liquors,828 Franklin St,Oakland,CA,94607,p
11 | 10,Los Camellos,5913 International Blvd,Oakland,CA,94621,p
12 | 11,Vernon Market,3210 Harrison St,Oakland,CA,94611,m
13 | 12,Seventh Street Food & Liquor,1460 7th St,Oakland,CA,94607,m
14 | 13,Sav-Mor Liquor,1333 Peralta St,Oakland,CA,94607,p
15 | 14,Vernon Market,3710 Telegraph Ave,Oakland,CA,94609,m
16 | 15,Buckingham Wine & Spirits,3293 Lakeshore Ave,Oakland,CA,94610,p
17 | 16,Happy Time,1647 8th St,Oakland,CA,94607,p
18 | 17,Gallager Market,3849 Martin Luther King Jr Way,Oakland,CA,94609,m
19 | 18,Grand Piedmont Liquors,3900 Grand Ave,Oakland,CA,94610,p
20 | 19,J J Buckley Fine Wines,7305 Edgewater Dr #D,Oakland,CA,94621,p
21 | 20,Carriage Trade Liquors, 350 E 18th St,Oakland,CA,94606,p
--------------------------------------------------------------------------------
/scripts/older_scripts/census_geocoding_batch.R:
--------------------------------------------------------------------------------
1 | #
2 | # Batch Geocoding with the US Census Geocoding Service
3 | #
4 | # pattyf@berkeley.edu, 05/2/2016
5 | #
6 | ## Documentation:
7 | # http://geocoding.geo.census.gov/
8 | # https://www.census.gov/geo/maps-data/data/geocoder.html
9 | # http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
10 |
11 | #Load libraries
12 | library(httr) # to submit geocoding request
13 | library(ggplot2) # to plot output
14 | library(ggmap) # to plot output
15 | library(leaflet) # for interactive plotting
16 | library(stringr) # to format strings
17 |
18 | #clean environment
19 | rm(list=ls())
20 |
21 | #set working directory
22 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
23 |
24 | # our file of addresses that need to be geocoded
25 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv"
26 |
27 | # the output file we will create
28 | geocoded_output_file <- "geocoded_addresses_out.csv"
29 |
30 | # The census geocoder can take as input a file of addresses to be geocode.
31 | # This file can contain up to 1000 addresses.
32 | # The census geocoder does not want column names in the file to be geocoded,
33 | # but we want to add them when we read the data into R to make sense of the data.
34 | #
35 | # For info on the correct format for submitting a file of addresses see:
36 | # https://www.census.gov/geo/maps-data/data/geocoder.html
37 | # Five columns - No headers, comma separated EVEN IF DATA NOT AVAILABLE
38 | # Unique ID, house number and street name, city, state, zipcode
39 | # Two valid examples:
40 | #1, 1600 Pennsylvania Ave NW, Washington, DC,
41 | #2, 1600 Pennsylvania Ave NW,,,20502
42 |
43 | # Read in the address that we will geocode
44 | # note in this example we are adding column names but they are not in the file
45 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip'))
46 |
47 | # Look at the data
48 | head(addresses_to_geocode)
49 |
50 | # How many addresses?
51 | num_addresses <- nrow(addresses_to_geocode)
52 |
53 | # Now that we have looked at the data we are ready to geocode.
54 | # First, remove the address data object
55 | rm(addresses_to_geocode)
56 |
57 |
58 | get_geocoded_addresses <- function(file_of_addresses) {
59 | # Create a function to submit a file of addresses to the census geocoder.
60 |
61 | # Identify the URL to which we will submit the geocoding request
62 | tiger_url <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
63 |
64 | # Make the request to the Census geocoding service
65 | # and store the results in the geocoded_addresses data frame
66 | #
67 | # The important parameters here are benchmark and vintage.
68 | # You can read about these in: http://www2.census.gov/geo/pdfs/education/brochures/CensusGeocoder.pdf
69 | # Or at: http://geocoding.geo.census.gov/
70 | # The benchmark is the date the data were last updated and the vintage is the year of the census data product it links to.
71 | # below we are querying the street database with the most current data for linking to the 2014 ACS data
72 | geocoded_addresses <- POST(tiger_url, encode="multipart",
73 | body=list(addressFile=upload_file(file_of_addresses),
74 | benchmark="Public_AR_Current",
75 | vintage="ACS2014_Current"
76 | )
77 | )
78 |
79 |
80 | # We need to reformat the data that was returned by the Census Geocoder
81 | # First, create a temporary file to store the geocoded address data # create temp file
82 | mytempfile <- tempfile()
83 |
84 | #Write raw geocoded output to tempfile
85 | capture.output(cat(content(geocoded_addresses)), file=mytempfile)
86 |
87 | # Relable the output column names
88 | mycols <- c("id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side", "state_fips", "county_fips","tract_fips", "block_fips")
89 |
90 | # Read the data back into a data frame from the temp file and use the new column names
91 | mylocs <- read.csv(mytempfile,header=FALSE, col.names = mycols)
92 |
93 | # Delete that temp file
94 | unlink(mytempfile)
95 |
96 | # The latitude and longitude coordinates for the geocoded addresses are in one column.
97 | # Split the lat,long values into two separate columns:
98 | mylocs$lon = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1]))
99 | mylocs$lat = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2]))
100 |
101 | # Make sure the lat and lon values are numeric
102 | mylocs$lon <- as.numeric(mylocs$lon)
103 | mylocs$lat <- as.numeric(mylocs$lat)
104 |
105 | # Finally, save the geocoded addresses to a file
106 | if (processed_rows == 0) {
107 | # If we only processed one file or the first of many, create a new file
108 | # create and write to the file
109 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE)
110 | } else {
111 | # Append to the file
112 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE, append=TRUE)
113 | }
114 |
115 | # Return the data frame of geocoded addresses
116 | return(mylocs)
117 | }
118 |
119 | # some counters to keep track of the number of addresses we need to process
120 | # we can only batch geocode 1000 addresses at a time
121 | read_rows <- 1000
122 | processed_rows <- 0
123 |
124 | # Now that we have our function to submit addresses to the Census geocoder we can proceed
125 | if (num_addresses < 1000) {
126 | # If the number of addresses to geocode is less than 1000, just go ahead and geocode them
127 | geocoded_df <- get_geocoded_addresses(tiger_input_addressFile)
128 |
129 | } else {
130 | # Process 1000 addresses at a time
131 | # By reading in up to 1000 addresses to geocode from our address file
132 | # saving them to a file, geocoding that file
133 | # then write the results to our master geoceded addresses file
134 |
135 | while (processed_rows < num_addresses) {
136 | # if we still have addresses to geocode
137 | # read in the next 1000 from the file of addresses to be geocoded
138 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, nrows=read_rows, skip=processed_rows)
139 |
140 | # create a temporary file to hold the up to 1000 addresses to geocode
141 | temp_infile <- tempfile()
142 |
143 | # Save the up to 1000 addresses to geocode to a file
144 | write.csv(addresses_to_geocode,file=temp_infile, row.names=FALSE, col.names = FALSE)
145 |
146 | # geocode the batch of 1000 addresses in the temp file
147 | geocoded_df <- get_geocoded_addresses(temp_infile)
148 |
149 | # delete the temp file
150 | unlink(temp_infile)
151 |
152 | # increment the counters to see if there are more addresses to geocode
153 | processed_rows <- processed_rows + read_rows
154 | }
155 | }
156 |
157 | # We have now geocoded all of the addresses and saved them to a file.
158 | # Let's read in the file of geocoded address
159 | # and Plot them on a map using ggmap
160 | # read in geocoded addresses
161 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE)
162 | head(geocoded_results) # take a look at the results
163 |
164 | map <- get_map(location=c(lon=mean(geocoded_results$lon),lat=mean(geocoded_results$lat)), zoom=15)
165 | ggmap(map) +
166 | geom_point(aes(x = lon, y = lat), size = 4, col="red", data = geocoded_results)
167 |
168 | # save the map image to a file
169 | ggsave("mymap.png")
170 |
171 | # Now create an interactive map with Leaflet
172 | map1 <- leaflet() %>% addTiles() %>%
173 | addCircleMarkers(data = geocoded_results, lng = ~lon,
174 | lat = ~lat, radius = 5, stroke=F,
175 | popup = paste("Geocoded Address:
", geocoded_results$matched_address),
176 | color = "red",
177 | fillOpacity = 0.7)
178 |
179 | map1 # view it
180 |
181 | # You can save leaflet map to html file
182 | # So that you can open and view it anytime
183 | library(htmlwidgets)
184 | saveWidget(map1, file="map1.html", selfcontained=FALSE)
185 |
186 |
--------------------------------------------------------------------------------
/scripts/older_scripts/census_geocoding_batch_v2.R:
--------------------------------------------------------------------------------
1 | #
2 | # Batch Geocoding with the US Census Geocoding Service
3 | #
4 | # pattyf@berkeley.edu, 05/2/2016
5 | #
6 | ## Documentation:
7 | # http://geocoding.geo.census.gov/
8 | # https://www.census.gov/geo/maps-data/data/geocoder.html
9 | # http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
10 |
11 | # TESTING
12 | # Little test - 10 records
13 | # census_batch_geocode('address_data/sample/sample_10_addresses.csv', id_col='apn', street_col='pstreet_addr', city_col='pcity', state_col='pstate',zip_col='pzip')
14 | # Medium test - 2.5K records
15 | # census_batch_geocode('address_data/sample/sample_2500_addresses.csv', id_col='apn', street_col='pstreet_addr', city_col='pcity', state_col='pstate',zip_col='pzip')
16 | # Big test - 5K records
17 | # census_batch_geocode('address_data/sample/sample_5k_addresses.csv', id_col='apn', street_col='pstreet_addr', city_col='pcity', state_col='pstate',zip_col='pzip')
18 |
19 | # TODO
20 | #
21 | # When more than 1k records, the counting and subsetting is off by 1
22 | #
23 |
24 | #Load libraries
25 | library(httr) # to submit geocoding request
26 | library(ggplot2) # to plot output
27 | library(ggmap) # to plot output
28 | library(leaflet) # for interactive plotting
29 | library(stringr) # to format strings
30 | library(htmlwidgets)
31 |
32 | #clean environment
33 | rm(list=ls())
34 |
35 | #set working directory
36 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
37 |
38 | get_geocoded_addresses <- function(file_of_addresses, benchmark="Public_AR_Current", vintage="ACS2014_Current") {
39 | # Function to submit a file of addresses to the census geocoder.
40 |
41 | # Identify the URL to which we will submit the geocoding request
42 | tiger_url <- "https://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
43 |
44 | # Make the request to the Census geocoding service
45 | # and store the results in the geocoded_addresses data frame
46 | #
47 | # The important parameters here are benchmark and vintage.
48 | # You can read about these in: http://www2.census.gov/geo/pdfs/education/brochures/CensusGeocoder.pdf
49 | # Or at: http://geocoding.geo.census.gov/
50 | # The benchmark is the date the data were last updated and the vintage is the year of the census data product it links to.
51 | # below we are querying the street database with the most current data for linking to the 2014 ACS data
52 | geocoded_addresses <- POST(tiger_url, encode="multipart", body=list(addressFile=upload_file(file_of_addresses), benchmark="Public_AR_Current", vintage="ACS2014_Current"))
53 |
54 | if (geocoded_addresses$status_code == 200) {
55 | # We got a success status code from census api
56 | print("Successful return from census geocoder.")
57 |
58 | # We need to reformat the data that was returned by the Census Geocoder
59 | # First, create a temporary file to store the geocoded address data # create temp file
60 | mytempfile <- tempfile()
61 |
62 | #Write raw geocoded output to tempfile
63 | capture.output(cat(content(geocoded_addresses)), file=mytempfile)
64 |
65 | # Relabel the output column names - these are from the census api
66 | mycols <- c("id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side", "state_fips", "county_fips","tract_fips", "block_fips")
67 |
68 | # Read the data back into a data frame from the temp file and use the new column names
69 | mylocs <- read.csv(mytempfile,header=FALSE, col.names = mycols)
70 |
71 | # Delete that temp file
72 | unlink(mytempfile)
73 |
74 | # The latitude and longitude coordinates for the geocoded addresses are in one column.
75 | # Split the lat,long values into two separate columns:
76 | mylocs$lon = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1]))
77 | mylocs$lat = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2]))
78 |
79 | # Make sure the lat and lon values are numeric
80 | mylocs$lon <- as.numeric(mylocs$lon)
81 | mylocs$lat <- as.numeric(mylocs$lat)
82 |
83 | # Return a data frame of geocoded addresses
84 | return(mylocs)
85 |
86 | } else {
87 | print(paste("ERROR: problem with the census geocoding service, status code:", geocoded_addresses$status_code))
88 | return(0)
89 | }
90 | }
91 |
92 |
93 | census_batch_geocode <-function (infile, id_col='id', street_col='street', city_col='city', state_col='state', zip_col='zip', header_row=TRUE) {
94 | in_addresses <- read.csv(infile, stringsAsFactors = FALSE)
95 | outfile = sub('.csv','_geocoded.csv', infile)
96 |
97 | address_cols <- c(id_col, street_col, city_col, state_col, zip_col)
98 | in_cols <- colnames(in_addresses)
99 |
100 | if (!all(address_cols %in% in_cols)) {
101 | stop('EXITING: The named address columns are not in input file.')
102 | }
103 | addresses_to_geocode <- in_addresses[,address_cols]
104 | #head(addresses_to_geocode) #debug
105 |
106 | # The census geocoder can take as input a file of addresses to be geocode.
107 | # This file can contain up to 1000 addresses.
108 | # The census geocoder does not want column names in the file to be geocoded,
109 | # but we want to add them when we read the data into R to make sense of the data.
110 | #
111 | # For info on the correct format for submitting a file of addresses see:
112 | # https://www.census.gov/geo/maps-data/data/geocoder.html
113 | # Five columns - No headers, comma separated EVEN IF DATA NOT AVAILABLE
114 | # Unique ID, house number and street name, city, state, zipcode
115 | # Two valid examples:
116 | #1, 1600 Pennsylvania Ave NW, Washington, DC,
117 | #2, 1600 Pennsylvania Ave NW,,,20502
118 |
119 | # How many addresses?
120 | num_addresses <- nrow(addresses_to_geocode)
121 | print(paste0('About to geocode ', num_addresses, " addresses..."))
122 |
123 | # some counters to keep track of the number of addresses we need to process
124 | # we can only batch geocode 1000 addresses at a time
125 | read_rows <- 1000
126 | processed_rows <- 0
127 |
128 | # Now that we have our function to submit addresses to the Census geocoder we can proceed
129 | if (num_addresses < 1000) {
130 | # If the number of addresses to geocode is less than 1000, just go ahead and geocode them
131 | print("Processing all records in one file submission.")
132 |
133 | # create a temporary file to hold the cleaned addresses to geocode
134 | temp_infile <- tempfile(fileext = ".csv")
135 |
136 | # Save the up to 1000 addresses to geocode to a file
137 | write.table( addresses_to_geocode, file=temp_infile, sep=",", quote=FALSE, row.names=FALSE, col.names=FALSE)
138 |
139 | # geocode the batch of 1000 addresses in the temp file
140 | geocoded_df <- get_geocoded_addresses(temp_infile)
141 |
142 | # delete the temp file we used for geocoding
143 | unlink(temp_infile)
144 |
145 | if (nrow(geocoded_df) < 2) {
146 | print("Problem! Unable to geocode.")
147 | # THis debug statement needs to be improved!
148 |
149 | } else {
150 | # Save the geocoded addresses to a file
151 | print(paste0('Saving geocoded data to: ', outfile))
152 | write.csv(geocoded_df,file=outfile, row.names=FALSE)
153 | }
154 |
155 | } else {
156 | # Process 1000 addresses at a time
157 | # By reading in up to 1000 addresses to geocode from our address file
158 | # saving them to a file, geocoding that file
159 | # then write the results to our master geoceded addresses file
160 |
161 | while (processed_rows < num_addresses) {
162 | fetch_rows <- processed_rows + read_rows
163 | print(paste0("processing rows [", processed_rows,"] to [", fetch_rows,"].. This could take 2 - 7 minutes."))
164 |
165 | # if we still have addresses to geocode
166 | # read in the next 1000 from the file of addresses to be geocoded
167 | address_subset <- addresses_to_geocode[processed_rows:fetch_rows,]
168 | # create a temporary file to hold the up to 1000 addresses to geocode
169 | temp_infile <- tempfile(fileext = ".csv")
170 |
171 | # Save the up to 1000 addresses to geocode to a file
172 | write.table( address_subset, file=temp_infile, sep=",", quote=FALSE, row.names = FALSE, col.names=FALSE)
173 |
174 | # geocode the batch of 1000 addresses in the temp file
175 | geocoded_df <- get_geocoded_addresses(temp_infile)
176 |
177 | print(paste0("Number of records geocoded: ", nrow(geocoded_df)))
178 |
179 | # delete the temp file
180 | unlink(temp_infile)
181 |
182 | if (geocoded_df == 0) {
183 | print("Problem! Unable to geocode.")
184 |
185 | } else {
186 | # Save the geocoded addresses to a file
187 |
188 | if (processed_rows == 0) {
189 | # If we only processed one file or the first of many, create a new file
190 | # and write geocoded addresses to it
191 | print(paste0('Saving geocoded data to: ', outfile))
192 | write.csv(geocoded_df,file=outfile, row.names=FALSE)
193 |
194 | } else {
195 | # Append to the file
196 | print(paste0('Appending geocoded data to: ', outfile))
197 | write.table(geocoded_df,file=outfile, sep=",", row.names=FALSE, append=TRUE)
198 | }
199 |
200 | # increment the counters to see if there are more addresses to geocode
201 | processed_rows <- processed_rows + read_rows
202 | }
203 | }
204 | }
205 | print(paste0("Done geocoding ", infile, "- check in address count == out address count!"))
206 | }
207 | ###############################################################################
208 |
209 | imap_census_geocodes <- function(infile, save_map=FALSE){
210 |
211 | # We have now geocoded all of the addresses and saved them to a file.
212 | # Let's read in the file of geocoded address
213 | # and Plot them on a map using ggmap
214 | # read in geocoded addresses
215 | geocoded_results <- read.csv(infile,stringsAsFactors = FALSE)
216 | head(geocoded_results) # take a look at the results
217 |
218 | # Now create an interactive map with Leaflet
219 | map1 <- leaflet() %>% addTiles() %>%
220 | addCircleMarkers(data = geocoded_results, lng = ~lon,
221 | lat = ~lat, radius = 5, stroke=F,
222 | popup = paste("Geocoded Address:
", geocoded_results$matched_address),
223 | color = "red",
224 | fillOpacity = 0.7)
225 |
226 | map1 # view it
227 |
228 | if (save_map == TRUE){
229 | # You can save leaflet map to html file
230 | # So that you can open and view it anytime
231 | library(htmlwidgets)
232 | saveWidget(map1, file="map1.html", selfcontained=FALSE)
233 | }
234 | return(map1)
235 | }
236 |
--------------------------------------------------------------------------------
/scripts/older_scripts/census_geocoding_single_address.R:
--------------------------------------------------------------------------------
1 | #
2 | # Single Address Geocoding with the US Census Geocoding Service
3 | #
4 | # pattyf@berkeley.edu, 05/2/2016
5 | #
6 | ## Documentation:
7 | # http://geocoding.geo.census.gov/
8 | # https://www.census.gov/geo/maps-data/data/geocoder.html
9 | # http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
10 |
11 | #clean environment
12 | rm(list=ls())
13 |
14 | #Load libraries
15 | library(RJSONIO)
16 | library(plyr)
17 |
18 | #set working directory
19 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
20 |
21 | # Identify the URL of the census geocoding service and related parameters
22 | census_prefix <- "http://geocoding.geo.census.gov/geocoder/geographies/address?"
23 | census_suffix <- "&benchmark=Public_AR_Current&vintage=ACS2014_Current&format=json"
24 | # See http://geocoding.geo.census.gov/ for available benchmarks and vintages
25 | # See also: http://www2.census.gov/geo/pdfs/education/brochures/CensusGeocoder.pdf
26 | # for definition of benchmarks and vintages
27 |
28 | # Identify the file of addresses that will be geocoded
29 | census_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv"
30 |
31 | # the output file we will create
32 | geocoded_output_file <- "geocoded_addresses_single_out.csv"
33 |
34 | # let's take a look at the addresses that we will geocode
35 | addresses_to_geocode <- read.csv(census_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip'))
36 |
37 | # get the address in the format needed by the Census API GET call
38 | addresses_to_geocode$census_format <- paste0(
39 | "street=",addresses_to_geocode$street,
40 | "&city=",addresses_to_geocode$city,
41 | "&state=",addresses_to_geocode$state,
42 | "&zip=",addresses_to_geocode$zip
43 | )
44 |
45 | census_geocode <- function(address) {
46 |
47 | #prepare the address so that it is url request ready
48 | address <- URLencode(address)
49 |
50 | #prepare the full Census Geocoding Request URL
51 | g_address <- paste0(census_prefix, address,census_suffix)
52 |
53 | # create an empty data frame to return
54 | answer <- data.frame(lat=NA, lon=NA, geoid=NA)
55 |
56 | out <- tryCatch(
57 | {
58 | # HTTP Requests can hang, fail, etc so we wrap
59 | # it in a tryCatch() function to handle problematic
60 | # addresses and keep on going... important with lots of addresses
61 | # 'tryCatch()' will return the last evaluated expression
62 | # in case the "try" part was completed successfully
63 |
64 | message("Trying to Geocode with Census API")
65 |
66 | fromJSON(g_address) # result will be returned if no error
67 | # The return value of `readLines()` is the actual value
68 | # that will be returned in case there is no condition
69 | # (e.g. warning or error).
70 | # You don't need to state the return value via `return()` as code
71 | # in the "try" part is not wrapped insided a function (unlike that
72 | # for the condition handlers for warnings and error below)
73 | },
74 | error=function(cond) {
75 | message(paste("Address URL does not seem to exist:", g_address))
76 | message("Here's the original error message:")
77 | message(cond)
78 | # The return value in case of error is the NA data frame row
79 | return(answer)
80 | },
81 | warning=function(cond) {
82 | message(paste("Address URL caused a warning:", g_address))
83 | message("Here's the original warning message:")
84 | message(cond)
85 | # The return value in case of error is the NA data frame row
86 | return(answer)
87 | },
88 | finally={
89 | # NOTE:
90 | # Here goes everything that should be executed at the end,
91 | # regardless of success or error.
92 | # If you want more than one expression to be executed, then you
93 | # need to wrap them in curly brackets ({...}); otherwise you could
94 | # just have written 'finally='
95 | message(paste("Processed Address:", g_address))
96 | }
97 | )
98 | if (length(out$result$addressMatches) > 0) {
99 | # if we got a geocoded response
100 | # update the answer data frame
101 | answer$lon <- out$result$addressMatches[[1]]$coordinates[['x']]
102 | answer$lat <- out$result$addressMatches[[1]]$coordinates[['y']]
103 | answer$geoid <-out$result$addressMatches[[1]]$geographies$`Census Tracts`[[1]]$GEOID
104 | }
105 | return(answer)
106 |
107 | }
108 |
109 | ## apply the geocoding function to the CSV file
110 | #initialise a dataframe to hold the results
111 | geocoded.df <- data.frame()
112 |
113 | geocoded.df <- ldply(addresses_to_geocode$census_format,function(x) census_geocode(x))
114 |
115 | #append the answer to the results file.
116 | geocoded_addresses <- cbind(addresses_to_geocode, geocoded.df)
117 |
118 | # take a look at our geocoded output
119 | head(geocoded_addresses)
120 |
121 | # Save geocoded addresses to a file
122 | write.csv(geocoded_addresses, file=geocoded_output_file, row.names=FALSE)
123 |
--------------------------------------------------------------------------------
/scripts/older_scripts/esri_wgs_geocoding.R:
--------------------------------------------------------------------------------
1 |
2 | # This script provides an example of accessing the ESRI world geocoding service.
3 |
4 | # This code borrows HEAVILY (almost completely) and with thanks from
5 | # Claudia Engel: https://github.com/cengel/ArcGIS_geocoding
6 | # See her repo for more details and other examples
7 |
8 | ##################################
9 | ## Single Line Geocode Function ##
10 | ##################################
11 | # The function takes:
12 | # - one address at a time as one string (SingleLine)
13 | # - token - which you get from developers.arcgis.com
14 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm
15 | # - TRUE/FALSE - allow to return Postal codes if a full street address match cannot be found (default is TRUE)
16 | #
17 | # The function returns:
18 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84
19 | # score - The accuracy of the address match between 0 and 100.
20 | # locName - The component locator used to return a particular match result
21 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U)
22 | # matchAddr - Complete address returned for the geocode request.
23 | # side - The side of the street where an address resides relative to the direction
24 | # of feature digitization
25 | # addressType - The match level for a geocode request. "PointAddress" is typically the
26 | # most spatially accurate match level. "StreetAddress" differs from PointAddress
27 | # because the house number is interpolated from a range of numbers. "StreetName" is similar,
28 | # but without the house number.
29 |
30 | geocodeSL <- function (address, token, postal = TRUE){
31 | require(httr)
32 |
33 | # ESRI geolocator
34 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses"
35 |
36 | # template for SingleLine format
37 | pref <- "{'records':[{'attributes':{'OBJECTID':1,'SingleLine':'"
38 | suff <- "'}}]}"
39 |
40 | # url
41 | url <- URLencode(paste0(gserver, "?addresses=", pref, address, suff, "&token=", token, ifelse(postal, "&f=json", "&f=json&category=Address")))
42 |
43 | # submit
44 | rawdata <- GET(url)
45 |
46 | # parse JSON and process result
47 | res <- content(rawdata, "parsed", "application/json")
48 | resdf <- with(res$locations[[1]], {data.frame(lon = as.numeric(location$x),
49 | lat = as.numeric(location$y),
50 | score = score,
51 | locName = attributes$Loc_name,
52 | status = attributes$Status,
53 | matchAddr = attributes$Match_addr,
54 | side = attributes$Side,
55 | addressType = attributes$Addr_type)})
56 | return(resdf)
57 | }
58 |
59 | #######################################
60 | ## Multi Line Batch Geocode Function ##
61 | #######################################
62 | # The function takes:
63 | # - ID variable to identify records, must be numeric and should be unique
64 | # - multiple addresses as vectors, separated into: Street, City, State, Zip
65 | # - token - which you get from developers.arcgis.com
66 | # see: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-authenticate-a-request.htm
67 | #
68 | # It can take a maximum of 1000 addresses. If more, it returns an error.
69 | #
70 | # The function returns a data frame with the following fields:
71 | # ID - Result ID can be used to join the output fields in the response to the attributes
72 | # in the original address table.
73 | # lon, lat - The primary x/y coordinates of the address returned by the geocoding service in WGS84
74 | # score - The accuracy of the address match between 0 and 100.
75 | # locName - The component locator used to return a particular match result
76 | # status - Whether a batch geocode request results in a match (M), tie (T), or unmatch (U)
77 | # matchAddr - Complete address returned for the geocode request.
78 | # side - The side of the street where an address resides relative to the direction
79 | # of feature digitization
80 | # addressType - The match level for a geocode request. "PointAddress" is typically the
81 | # most spatially accurate match level. "StreetAddress" differs from PointAddress
82 | # because the house number is interpolated from a range of numbers. "StreetName" is similar,
83 | # but without the house number.
84 |
85 | geocodeML_batch <- function(id, street, city, state, zip, token){
86 | require(httr)
87 | require(rjson)
88 |
89 | # check if we have more than 1000, if so stop.
90 | if (length(id) > 1000){
91 | print(paste("length is: ", length(id)))
92 | stop("Can only process up to 1000 addresses at a time.")}
93 |
94 | # check if id is numeric
95 | if (!is.numeric(id)) {
96 | stop("id variable needs to be numeric.")
97 | }
98 |
99 | # make data frame
100 | adr_df <- data.frame(OBJECTID = id,
101 | Street = street,
102 | City = city,
103 | State = state,
104 | Zip = zip)
105 |
106 | # make json
107 | tmp_list <- apply(adr_df, 1, function(i) list(attributes = as.list(i)))
108 |
109 | # need to coerce ID back to numeric
110 | tmp_list <- lapply(tmp_list, function(i) { i$attributes$OBJECTID <- as.numeric(i$attributes$OBJECTID); i })
111 | adr_json <- toJSON(list(records = tmp_list))
112 |
113 | # Identify the geocoding web service URL
114 | gserver <-"http://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/geocodeAddresses"
115 |
116 | # submit
117 | req <- POST(
118 | url = gserver,
119 | body = list(addresses = adr_json, f="json", token=token),
120 | encode = "form")
121 | #stop_for_status(req) # error check
122 |
123 | # process and parse
124 | res <- content(req, "parsed", "application/json")
125 | resdfr <- data.frame()
126 | for (i in seq_len(length(res$locations))){
127 | d <- with(res$locations[[i]], {data.frame(ID = attributes$ResultID,
128 | lon = as.numeric(location$x),
129 | lat = as.numeric(location$y),
130 | score = score,
131 | locName = attributes$Loc_name,
132 | status = attributes$Status,
133 | matchAddr = attributes$Match_addr,
134 | side = attributes$Side,
135 | addressType = attributes$Addr_type)})
136 | resdfr <- rbind(resdfr, d)
137 | }
138 | return(resdfr)
139 | }
140 |
141 | #--------------------------------------------------------------------------------------
142 | # Some code to use the above functions
143 | #--------------------------------------------------------------------------------------
144 |
145 | # set your access token
146 | myToken <- "enter your long ugly ESRI geocoding access token here"
147 |
148 | # ---------------------------
149 | # GEOCODE A SINGLE ADDRESS
150 | # ---------------------------
151 | geocode_output <-geocodeSL("1600 Pennsylvania Avenue NW, Washington, DC", myToken, postal = TRUE)
152 |
153 | # -----------------------------
154 | # GEOCODE A BATCH OF ADDRESSES
155 | # -----------------------------
156 |
157 | # make up a data frame with some addresses:
158 | adr_df <- data.frame(
159 | ID = 1:3,
160 | street = c('450 Serra Mall', '1600 Amphitheatre Pkwy', '1355 Market Street Suite 900'),
161 | city = c('Stanford', 'Mountain View', 'San Francisco'),
162 | state = 'CA',
163 | zip = c('94305', '94043', '94103'))
164 |
165 |
166 | # Batch geocode your dataframe of addresses with the following function
167 | adr_gc <- geocodeML_batch(adr_df$ID, adr_df$street, adr_df$city, adr_df$state, adr_df$zip, myToken)
168 |
169 | # join back with original data
170 | merge(adr_df, adr_gc, by = "ID", all.x = T)
--------------------------------------------------------------------------------
/scripts/older_scripts/geocode_it.R:
--------------------------------------------------------------------------------
1 | library(plyr)
2 | library(ggmap)
3 | library(readxl)
4 | library(stringr)
5 |
6 | setwd("~/Documents/Dlab/consults/leora")
7 |
8 | data <- read_excel("doh2.xlsx")
9 | head(data) # take a look
10 |
11 | # We need one address not multipart
12 | data$address_long <- with(data,paste(address, city,"CA", zip, sep=","))
13 | head(data)
14 |
15 | # Geocode a file of addresses - loaded into data frame
16 | geocoded_output <- geocode(data$address_long, output = "latlona", source = "google")
17 |
18 | # check out any warnings
19 | warnings_out <- warnings()
20 | head(warnings_out)
21 |
22 | # Add output to input data
23 | geocoded_data <- data.frame(data, geocoded_output)
24 | head(geocoded_data) # check it
25 |
26 | # Subset based on what was/was not geocoded
27 | not_geocoded <- subset(geocoded_data, is.na(lat))
28 | yes_geocoded <- subset(geocoded_data, !is.na(lat))
29 | nrow(geocoded_data) == (nrow(not_geocoded) + nrow(yes_geocoded))
30 |
31 |
32 | # Create a map to check geocoded output
33 | mymap <- get_map(location=c(lon=mean(yes_geocoded$lon), lat=mean(yes_geocoded$lat)), zoom=4)
34 |
35 | ggmap(mymap) +
36 | geom_point(aes(x = lon, y = lat), data=yes_geocoded, size = 2, col="red" )
37 |
38 | #Scaling up to more than 2500 records?
39 | # Google limits free geocoding to 2500 addresses per day
40 | geocodeQueryCheck() #how am I doing?
41 |
42 | # Fix the records that were not geocoded
43 | # because they are out of state
44 | # so remove the ",CA" that we appended above
45 |
46 | not_geocoded$add2 <- gsub(',CA,'," ", not_geocoded$address_long)
47 |
48 | #DF[ , !(names(DF) %in% drops)]
49 | not_geocoded <- not_geocoded[, !(names(not_geocoded) %in% c('lat','lon','address.1'))]
50 | str(not_geocoded)
51 |
52 | # try again
53 | geoout2 <- geocode(not_geocoded$add2, output = "latlona", source = "google")
54 | # check and address any warnings()
55 |
56 | # Add output to input data
57 | geocoded2 <- data.frame(not_geocoded, geoout2)
58 | head(geocoded2) # check it
59 |
60 | # fix bad ones identified via warnings()
61 | # try again
62 | geoout2 <- geocode(not_geocoded$add2, output = "latlona", source = "google")
63 |
64 |
65 | # Add output to input data
66 | geocoded2 <- data.frame(not_geocoded, geoout2)
67 | head(geocoded2) # check it
68 |
69 | nrow(geocoded_data) == (nrow(geocoded2) + nrow(yes_geocoded))
70 |
71 | # Make sure ncols the same and then combine our outputs
72 | # names(yes_geocoded)
73 | # names(geocoded2)
74 | #"primkey" "address" "zip" "city" "address_long" "add2" "lon" "lat" "address.1"
75 | geocoded3 <- geocoded2[, !(names(geocoded2) %in% c('address_long'))]
76 | #"primkey" "address" "zip" "city" "add2" "lon" "lat" "address.1"
77 | newnames <- c("primkey", "address","zip","city","address_long","lon","lat","address.1")
78 | names(geocoded3) <- newnames
79 |
80 | all_geocodes <- rbind(yes_geocoded, geocoded3)
81 | nrow(all_geocodes) == nrow(data)
82 | # fix the colnames
83 | newnames2 <- c("primkey", "address","zip","city","geocoded_address","lon","lat","google_address")
84 | names(all_geocodes) <- newnames2
85 |
86 |
87 |
88 | #---------------------------------------
89 | # Get FIPS code to link to census data
90 | #---------------------------------------
91 | library(RCurl)
92 | library(RJSONIO)
93 | # FCC's Census Block Conversions API
94 | # http://www.fcc.gov/developers/census-block-conversions-api
95 |
96 | latlong2fips <- function(latitude, longitude) {
97 | # After: https://gist.github.com/ramhiser/f09a71d96a4dec80994c
98 |
99 | thecode <- "none"
100 |
101 | if ( !is.numeric(latitude) | !is.numeric(longitude) ) {
102 | return(thecode)
103 | }
104 | if ( is.na(latitude) | is.na(longitude) ) {
105 | return("NANA")
106 | }
107 | url <- "http://data.fcc.gov/api/block/find?format=json&latitude=%f&longitude=%f"
108 | url <- sprintf(url, latitude, longitude)
109 | print(url)
110 | json <- RCurl::getURL(url)
111 | json <- RJSONIO::fromJSON(json)
112 | #{"Block":{"FIPS":"240054114062015"},"County":{"FIPS":"24005","name":"Baltimore"},"State":{"FIPS":"24","code":"MD","name":"Maryland"},"status":"OK","executionTime":"103"}
113 | #print(json$status)
114 | if (json$status == "OK") {
115 | thecode <- as.character(json$Block['FIPS']) # Census 2010 Block FIPS Code
116 | }
117 | return(thecode)
118 | }
119 |
120 | na_geocodes <- subset(all_geocodes, is.na(lat))
121 | nrow(na_geocodes)
122 | not_na_geocodes <- subset(all_geocodes, !is.na(lat))
123 | nrow(not_na_geocodes)
124 | nrow(not_na_geocodes) + nrow(na_geocodes) == nrow(all_geocodes)
125 |
126 | #all_geocodes$fips<- mapply(latlong2fips,all_geocodes$lat,all_geocodes$lon)
127 | not_na_geocodes$fips<- mapply(latlong2fips,not_na_geocodes$lat,not_na_geocodes$lon)
128 |
129 | # take a look
130 | head(all_geocodes)
131 |
132 | # add a fips code to the 2 recs without lat/lon
133 | na_geocodes$fips <- "none"
134 |
135 | #combine the two data frames
136 | geocodes_w_fips <- rbind(not_na_geocodes, na_geocodes)
137 |
138 | # FIPS CODES
139 | # digits 1-2: state
140 | # digits 3-5: county
141 | # digits 6-11: tract
142 | # digits 12-13: blockgroup
143 | # digits 14-15: blockgroup block
144 | geocodes_w_fips$fips_tract <- substr(geocodes_w_fips$fips,1,11)
145 | geocodes_w_fips$fips_tract_only <- substr(geocodes_w_fips$fips,6,11)
146 |
147 | # Save geocoded output to a file
148 | # Need to quote output so leading zeros in fips codes preserved
149 | write.table(geocodes_w_fips,file="google_geocoded_output.csv", row.names=FALSE, quote = T, sep=",")
150 |
--------------------------------------------------------------------------------
/scripts/older_scripts/getFipsForPoints.R:
--------------------------------------------------------------------------------
1 | ## ############################################################################
2 | #
3 | # Joining Census FIPS codes to points via point in polygon overlay
4 | #
5 | # Author: Patty Frontiera pfrontiera@berkeley.edu
6 | # Date: 2014_04_15
7 | # Last updated: 2014_04_17
8 | #
9 | # Purpose:
10 | ## This script takes as input a point file and a file of Census Tiger data
11 | ## performs a spatial overlay of the two
12 | ## and adds the Census FIPS code to the point file attribute table
13 | ## Then writes the output to CSV
14 | ##
15 | # #ASSUMPTION: input point data has fields "latitude" and "longitude"
16 | ##
17 | ## This same practice can be done much faster in ArcGIS or PostGIS
18 | ## but this method is fine for < 1 million records
19 | ##
20 | ## This approach will work with on an offline server provided
21 | ## the data, R code and R libraries reside on the server.
22 | ##
23 | ## THIS IS SAMPLE CODE - you will need to make changes!
24 | ## ############################################################################
25 |
26 | # clean start - remove any objects in the enviroment
27 | rm(list = ls())
28 |
29 | #LOAD LIBS
30 | require(rgdal)
31 | require(R.utils)
32 |
33 | # ########################################################################################################
34 | # USER SUPPLIED VALUES
35 | # ########################################################################################################
36 |
37 | working_dir <- "/Users/pattyf/geocoding/temp"
38 |
39 | point_file <- "/Users/pattyf/geocoding/test_address_points.csv"
40 | # You can download some sample point data from this url:
41 | ## https://gist.githubusercontent.com/pattyf/9091aca4d536e983beea/raw/65b4ca99a215b65cdd7c2406dfbac9749eb897f6/test_address_points
42 |
43 | point_file_delimiter <- "|" # I prefer this delimiter to a comma as address components often contain commas
44 |
45 | point_file_crs <- "4326" # These points use geographic coordinates with the WGS84 datum
46 | # WGS 84 - coordinate reference system (crs) used by most GPS / Google maps etc
47 | ## AKA - spatial reference system or map projection or coordinate system
48 | ## See spatialreference.org - http://spatialreference.org/ref/epsg/4326/
49 |
50 | # HEY: IMPORTANT
51 | #ASSUMPTION: input point data has fields "latitude" and "longitude"
52 |
53 | ## Census block data - must point to file on your computer
54 | #census_file <- '/Users/pattyf/Gisdata/Census/tabblock2010_06_pophu/tabblock2010_06_pophu.shp'
55 |
56 | ## Census blockgroup data - must point to file on your computer
57 |
58 | #census_file <- '/Users/pattyf/Gisdata/Census/tl_2014_06_tract/tl_2014_06_bg.shp'
59 |
60 | ## Census tract data - must point to file on your computer
61 | census_file <- '/Users/pattyf/Gisdata/Census/tl_2014_06_tract/tl_2014_06_tract.shp'
62 | # CA block-level census data were downloaded from the census website, url below:
63 | ## http://www2.census.gov/geo/tiger/TIGER2014/TABBLOCK/tl_2014_06_tabblock10.zip
64 | ## Could automate the download but adds unneeded complexity to this script
65 | ## THIS IS BIG FILE = 415MB or so
66 | ## You can download a smaller file by downloading larger census geographies
67 | ### eg tracts level data
68 | ## http://www2.census.gov/geo/tiger/TIGER2014/TRACT/tl_2014_06_tract.zip
69 | ### or block group level data
70 | ### http://www2.census.gov/geo/tiger/TIGER2014/BG/tl_2014_06_bg.zip
71 | ## However, if you intersect points with the block level data
72 | ## you get a FIPS CODE that includes the state, county, tract, blockgroup and block id
73 | ## You need to change this file if not doing CA
74 | ## or if you want to change the input remote census data file, eg to smaller file like tracts
75 | ## See http://www2.census.gov for details
76 | ## Note there are several vintages (year versions) for each census products. For
77 | ## tracts, block groups, and blocks these don't change between census - there are only improvements/corrections
78 | ## If you are interested in comparisons over time (eg 2000 - 2010 census) get the harmonized data from NHGIS
79 |
80 | #census_layer <- 'tabblock2010_06_pophu' # The layer is the name of the feature layer within the file
81 | # For shapefiles it is the same as the prefix of the shapefile
82 |
83 | #census_layer <- 'tl_2014_06_bg' #census blockgroup level data
84 |
85 | census_layer <- 'tl_2014_06_tract' #census tract data
86 |
87 | census_crs <- '4269' # US Census Tigerline data use geographic coordintes with the NAD83 datum
88 | # The EPGS code for which is 4269
89 | # See http://spatialreference.org/ref/sr-org/4269/ for details.
90 |
91 | census_geograhpy_type = "tracts" # one of tracts, blocks, or blockgroups
92 |
93 | output_crs <- '4326' #WGS84
94 | ## USE '3310' for CA Teale Albers - See http://spatialreference.org/ref/epsg/3310/
95 | ## Used for CA state-wide data processing (metric calculations)
96 | ## If the output CRS does not match the census CRS we will
97 | ## transform the data before saving to new file as last step
98 |
99 | out_csv_file <-"point_data_withfips.csv" # The name of the output csv file
100 | # Will be written to working_dir if full path not specified
101 |
102 | out_shapefile_prefix <- "point_data_withfips"
103 | out_shapefile_directory <- "." # The period indicates the current working dir.
104 | # You can specify another directory as needed
105 |
106 | debug <- 1 # We are just testing this script if debug is 1. If running for real, set this to 0
107 | # When debug is 1 we only read in first 50 records from point file
108 | # ########################################################################################################
109 |
110 | # Load needed libraries
111 | library(sp)
112 | library(rgdal)
113 | library("R.utils") # for file utils, like zip and unzipping files
114 |
115 | # Set working directory for input and output where full path not given
116 | setwd(working_dir)
117 |
118 | # Read in point data
119 | ## In this exampe we have geocoded addresses - 355,054 addresses all in alameda county (would prefer a state sample)
120 | ## Format of these address data points in input file:
121 | ## Inaddress|street_address|street_name|latitude|country_code|fips_county|country_name|country_code3|longitude|region|locality|street_number|confidence|
122 |
123 | if (debug == 1) {
124 | # When debug is 1 only read in first 50 records from point file
125 | point_data<-read.table(point_file,sep=point_file_delimiter, header=T, stringsAsFactors=,nrow=50)
126 | } else {
127 | point_data<-read.table(point_file,sep=point_file_delimiter, header=T, stringsAsFactors=F)
128 | }
129 |
130 | # Convert data frame to a spatialpoints data frame object
131 | coordinates(point_data) =~ longitude+latitude
132 |
133 | # Specify the CRS of the input point data
134 | proj4string(point_data) = CRS(paste0("+init=epsg:",point_file_crs))
135 |
136 | # Read the census block data into R
137 | census_polys <- readOGR(census_file,census_layer)
138 |
139 | # Specify the CRS of the input census data
140 | proj4string(census_polys) = CRS(paste0("+init=epsg:",census_crs)) # define the projection
141 |
142 | # CRS of both layers must match!
143 | ## If they do not then the point data should be transformed
144 | ## as it is much easier operation on points than polygons
145 | if (point_file_crs != census_crs) {
146 | point_data <- spTransform(point_data,CRS(paste0("+init=epsg:",census_crs)))
147 | }
148 |
149 | #
150 | # Spatial Intersection
151 | ## Get fips code for each address point
152 | ## The block key from this dataset is col 5, which has the name BLOCKID10 (census 2010 block id)
153 | ptm <- proc.time() # Time this operation to get a sense of how it will scale to more points
154 | if (census_geograhpy_type == "blocks") {
155 | point_data$fips_code <- over(point_data,census_polys)$BLOCKID10
156 | }
157 | if (census_geograhpy_type == "tracts") {
158 | point_data$fips_code <- over(point_data,census_polys)$GEOID
159 | }
160 | if (census_geograhpy_type == "blockgroups") {
161 | point_data$fips_code <- over(point_data,census_polys)$GEOID
162 | }
163 |
164 | print(proc.time() - ptm)
165 |
166 | # ###############################################
167 | # Notes on output from testing
168 | # ###############################################
169 | ## It took 18 minutes to intersect ~350,000 address points
170 | ## with census block-level data
171 | ##
172 | ## user system elapsed
173 | ## 1049.953 18.078 1072.092
174 | ##
175 | # How long does this operation take in ArcGIS?
176 | ## on our geocoding server it took only
177 | ## 2 minutes using spatial intersect operation.
178 | # ###############################################
179 |
180 |
181 | # ###############################################
182 | # Transform the data before saving if needed
183 | # ###############################################
184 | if (output_crs != census_crs) {
185 | point_data <- spTransform(point_data,CRS(paste0("+init=epsg:",output_crs)))
186 | }
187 |
188 |
189 | # ###############################################
190 | # Save output to local files
191 | # ###############################################
192 | #
193 | ## as csv
194 | write.csv(point_data@data,out_csv_file,row.names=FALSE)
195 | #
196 | ## as shapefile
197 | ### note that field/col names longer than 8 characters will be truncated!
198 | writeOGR(point_data, out_shapefile_directory, "out_shapefile_prefix", driver="ESRI Shapefile",overwrite_layer=TRUE)
--------------------------------------------------------------------------------
/scripts/older_scripts/ggmap_geocoding_examples.R:
--------------------------------------------------------------------------------
1 | #
2 | # Geocoding with GGMAP & the Google Geocoding Service
3 | # examples
4 | #
5 |
6 | #--------------------------------------
7 | # libraries -
8 | # that may be needed for this tutorial
9 | # and how to install & load them:
10 | #--------------------------------------
11 | required.pkg <- c("htmlwidgets", "leaflet", "ggmap", "ggplot2","httr","acs","RJSONIO","RCurl","stringr","plyr", "rgeos","rgdal", "sp")
12 | pkgs.not.installed <- required.pkg[!sapply(required.pkg, function(p) require(p, character.only=T))]
13 | install.packages(pkgs.not.installed, dependencies=TRUE)
14 |
15 | # Load all libraries them all at once.
16 | lapply(required.pkg, library, character.only = TRUE)
17 |
18 | #-------------------------------------------------------
19 | # Exploring Google Geocoder with ggmap package
20 | #-------------------------------------------------------
21 | library(ggplot2)
22 | library(ggmap)
23 |
24 | geocode("Barrows Hall, Berkeley, CA", source="google")
25 |
26 |
27 | geocode("Barrows Hall, Berkeley, CA", source="google", output="latlon")
28 | geocode("Barrows Hall, Berkeley, CA", source="google", output="latlona")
29 | geocode("Barrows Hall, Berkeley, CA", source="google", output="more")
30 | geocode("Barrows Hall, Berkeley, CA", source="google", output="all")
31 |
32 | df <- data.frame(
33 | address = c(
34 | "1517 Shattuck Ave, Berkeley, CA 94709",
35 | "Barrows Hall, Berkeley, CA",
36 | "2332 Haste St, Berkeley, CA 94704"
37 | ),
38 | stringsAsFactors = FALSE
39 | )
40 |
41 | df
42 |
43 | ## Geocode the three Addresses
44 |
45 | df2 <- geocode(df$address,source="google", output="more")
46 |
47 | # just keep lat, lon, type, and loctype
48 | df2 <- df2[,c(1:4)]
49 |
50 | #look at output
51 | df2
52 |
53 | df3 <- data.frame(df,df2)
54 |
55 | #look at output
56 | df3
57 |
58 | # Create a map of the geocoded output
59 | map <- get_map(location=c(lon=mean(df3$lon), lat=mean(df3$lat)), zoom=14)
60 | ggmap(map) +
61 | geom_point(aes(x = lon, y = lat), data=df3, size = 6, col="red" )
62 |
63 | ##--------------------------------
64 | ## Geocode a file of addresses
65 | ##--------------------------------
66 | # get the input data
67 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F)
68 | head(data)
69 |
70 | ## We need one column with address (not multiple)
71 | data$address <- with(data,paste(street,city,state,zip, sep=","))
72 | head(data)
73 |
74 | ## Odd address formats can be a problem
75 | data[19,8]
76 | geocode(data[19,8], source="google", output="latlona")
77 |
78 | # Fix that address by removing the "#D"
79 | data[19,8]<-"7305 Edgewater Dr,Oakland,CA,94621"
80 | data[19,8]
81 | geocode(data[19,8], source="google", output="latlona")
82 |
83 | # Geocode a file of addresses - loaded into data frame
84 | geocoded_output <- geocode(data$address, output = "latlona",
85 | source = "google")
86 |
87 | # Add output to input data
88 | geocoded_output <- data.frame(data, geocoded_output)
89 |
90 | head(geocoded_output) # check it
91 |
92 | #Scaling up to more than 2500 records?
93 | # Google limits free geocoding to 2500 addresses per day
94 | geocodeQueryCheck() #how am I doing?
95 |
96 | #---------------------------------------
97 | # Get FIPS code to link to census data
98 | #---------------------------------------
99 | library(RCurl)
100 | library(RJSONIO)
101 | # FCC's Census Block Conversions API
102 | # http://www.fcc.gov/developers/census-block-conversions-api
103 |
104 | latlong2fips <- function(latitude, longitude) {
105 | #Source: https://gist.github.com/ramhiser/f09a71d96a4dec80994c
106 | url <- "http://data.fcc.gov/api/block/find?format=json&latitude=%f&longitude=%f"
107 | url <- sprintf(url, latitude, longitude)
108 | print(url)
109 | json <- RCurl::getURL(url)
110 | json <- RJSONIO::fromJSON(json)
111 | as.character(json$Block['FIPS']) # Census 2010 Block FIPS Code
112 | }
113 |
114 | geocoded_output$fips<- mapply(latlong2fips,geocoded_output$lat,geocoded_output$lon)
115 |
116 | # take a look
117 | head(geocoded_output)
118 |
119 | # Save geocoded output to a file
120 | write.csv(geocoded_output,file="google_geocoded_output.csv", row.names=FALSE)
121 |
122 |
--------------------------------------------------------------------------------
/scripts/older_scripts/ggmap_google.R:
--------------------------------------------------------------------------------
1 | # geocoding with ggmap
2 | ## http://zevross.com/blog/2014/03/19/geocoding-with-rs-ggmap-package/
3 | ## Subject to Google Terms of Use: http://developers.google.com/maps/terms
4 | ### Clear workspace
5 | rm(list=ls())
6 |
7 | library(ggmap)
8 |
9 | ### Set working directory
10 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
11 |
12 | #Try 1 address
13 | geocode(location="7305 Edgewater Dr #D Oakland CA 94621", source="google")
14 | geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google")
15 | geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="latlona")
16 | geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="more")
17 |
18 | one <- geocode(location="7305 Edgewater Dr Oakland CA 94621", source="google", output="more")
19 | two <- geocode("sather gate, berkeley, ca", source="google", output="more", messaging=TRUE)
20 | three <- geocode("1011 shattuck ave, berkeley ca", source="google", output="all")
21 |
22 | one$loctype
23 | two$loctype
24 |
25 |
26 | #try
27 | ?geocode
28 |
29 | # Geocoding script for large list of addresses
30 | # get the input data
31 | data <- read.csv(file="address_data/formatted/oak_liq_w_ids_types_headers.csv",stringsAsFactors=F)
32 | head(data)
33 | data$address <- with(data,paste(street,city,state,zip, sep=" "))
34 | head(data)
35 |
36 | # data[19,8]<-"7305 Edgewater Dr Oakland CA 94621" ## Why do we need to do this??
37 |
38 | mylocs <- geocode(data[,8], output = "more", source = "google")
39 | head(mylocs)
40 |
41 | # if you want more output try
42 | mylocs_sub <- mylocs[,c(1:4)]
43 |
44 | #append geocode results back to input data
45 | geocoded_data <- data.frame(data,mylocs_sub)
46 |
47 | #write.csv(geocoded_data,file="geocoded_data.csv", row.names=FALSE)
48 |
49 | #lets plot it
50 | library(ggplot2)
51 | library(ggmap)
52 |
53 | map <- get_map(location=c(lon=mean(mylocs$lon), lat=mean(mylocs$lat)))
54 | ggmap(map) +
55 | geom_point(aes(x = lon, y = lat), data = mylocs, size = 6, col="red" )
56 |
57 |
58 | ## TRY - what's different?
59 | map <- get_map(location=c(lon=mean(mylocs$lon), lat=mean(mylocs$lat)), zoom=13)
60 | ggmap(map) +
61 | geom_point(aes(x = lon, y = lat), data = mylocs, size = 6, col="red" )
62 |
63 |
64 |
65 | #Scaling up to more than 2500 records
66 | geocodeQueryCheck() #how am I doing?
67 |
68 |
--------------------------------------------------------------------------------
/scripts/older_scripts/google_batch_big.R:
--------------------------------------------------------------------------------
1 | #
2 | # http://www.shanelynn.ie/massive-geocoding-with-r-and-google-maps/
3 | #
4 | # Geocoding script for large list of addresses.
5 | # Shane Lynn 10/10/2013
6 |
7 | #load up the ggmap library
8 | library(ggmap)
9 | # get the input data
10 | infile <- "input"
11 | data <- read.csv(paste0('./', infile, '.csv'))
12 |
13 | # get the address list, and append "Ireland" to the end to increase accuracy
14 | # (change or remove this if your address already include a country etc.)
15 | addresses = data$Address
16 | addresses = paste0(addresses, ", Ireland")
17 |
18 | #define a function that will process googles server responses for us.
19 | getGeoDetails <- function(address){
20 | #use the gecode function to query google servers
21 | geo_reply = geocode(address, output='all', messaging=TRUE, override_limit=TRUE)
22 | #now extract the bits that we need from the returned list
23 | answer <- data.frame(lat=NA, long=NA, accuracy=NA, formatted_address=NA, address_type=NA, status=NA)
24 | answer$status <- geo_reply$status
25 |
26 | #if we are over the query limit - want to pause for an hour
27 | while(geo_reply$status == "OVER_QUERY_LIMIT"){
28 | print("OVER QUERY LIMIT - Pausing for 1 hour at:")
29 | time <- Sys.time()
30 | print(as.character(time))
31 | Sys.sleep(60*60)
32 | geo_reply = geocode(address, output='all', messaging=TRUE, override_limit=TRUE)
33 | answer$status <- geo_reply$status
34 | }
35 |
36 | #return Na's if we didn't get a match:
37 | if (geo_reply$status != "OK"){
38 | return(answer)
39 | }
40 | #else, extract what we need from the Google server reply into a dataframe:
41 | answer$lat <- geo_reply$results[[1]]$geometry$location$lat
42 | answer$long <- geo_reply$results[[1]]$geometry$location$lng
43 | if (length(geo_reply$results[[1]]$types) > 0){
44 | answer$accuracy <- geo_reply$results[[1]]$types[[1]]
45 | }
46 | answer$address_type <- paste(geo_reply$results[[1]]$types, collapse=',')
47 | answer$formatted_address <- geo_reply$results[[1]]$formatted_address
48 |
49 | return(answer)
50 | }
51 |
52 | #initialise a dataframe to hold the results
53 | geocoded <- data.frame()
54 | # find out where to start in the address list (if the script was interrupted before):
55 | startindex <- 1
56 | #if a temp file exists - load it up and count the rows!
57 | tempfilename <- paste0(infile, '_temp_geocoded.rds')
58 | if (file.exists(tempfilename)){
59 | print("Found temp file - resuming from index:")
60 | geocoded <- readRDS(tempfilename)
61 | startindex <- nrow(geocoded)
62 | print(startindex)
63 | }
64 |
65 | # Start the geocoding process - address by address. geocode() function takes care of query speed limit.
66 | for (ii in seq(startindex, length(addresses))){
67 | print(paste("Working on index", ii, "of", length(addresses)))
68 | #query the google geocoder - this will pause here if we are over the limit.
69 | result = getGeoDetails(addresses[ii])
70 | print(result$status)
71 | result$index <- ii
72 | #append the answer to the results file.
73 | geocoded <- rbind(geocoded, result)
74 | #save temporary results as we are going along
75 | saveRDS(geocoded, tempfilename)
76 | }
77 |
78 | #now we add the latitude and longitude to the main data
79 | data$lat <- geocoded$lat
80 | data$long <- geocoded$long
81 | data$accuracy <- geocoded$accuracy
82 |
83 | #finally write it all to the output files
84 | saveRDS(data, paste0("../data/", infile ,"_geocoded.rds"))
85 | write.table(data, file=paste0("../data/", infile ,"_geocoded.csv"), sep=",", row.names=FALSE)
--------------------------------------------------------------------------------
/scripts/older_scripts/google_geocode_in_limits.R:
--------------------------------------------------------------------------------
1 | #
2 | # Purpose: Use GGMAP to geocode with source="google" source" option
3 | # within google daily query limits
4 | # Subject to Google Terms of Use: http://developers.google.com/maps/terms
5 | # You need to re-run the script when your number of needed geocodes exceeds the query limit
6 | # pattyf@berkeley.edu, 12/09/2015
7 | # created for dlab.berkeley.edu tutorial as example
8 | #
9 |
10 | address_file<- "address_data/formatted/oak_liq_w_ids_types_headers.csv"
11 | address_file_copy <-"address_data/formatted/oak_liq_w_ids_types_headers_copy.csv"
12 | address_file_geocoded <- "address_data/formatted/oak_liq_w_ids_types_headers_geocoded.csv"
13 | # DO ONCE - make a copy of the data with the addresses properly formatted
14 | if (!file.exists(address_file_copy)) {
15 | data <- read.csv(file=address_file,stringsAsFactors=F) # read data
16 | data$address <- with(data,paste(street,city,state,zip, sep=",")) #add single column address
17 | data[19,8]<-"7305 Edgewater Dr,Oakland,CA,94621" ## Specific to this data
18 |
19 | write.csv(data,file=address_file_copy, row.names=FALSE)
20 | }
21 |
22 | # Read in the copy of the data to be geocoded
23 | data <- read.csv(file=address_file_copy,stringsAsFactors=F) # read data
24 | maxrecs <- as.numeric(geocodeQueryCheck())
25 | if (maxrecs > nrow(data)) {
26 | maxrecs = nrow(data)
27 | }
28 | maxrecs
29 |
30 | if (maxrecs > 0) {
31 | #create two subsets
32 | not_geocoded <- slice(data,1:maxrecs)
33 | geocode_later <- slice(data,maxrecs+1:n())
34 |
35 | #save to file what we will geocode later
36 | write.csv(geocode_later,file=address_file_copy, row.names=FALSE)
37 | rm(geocode_later)
38 |
39 | geocoded_output <- geocode(not_geocoded$address, output = "latlona", source = "google")
40 |
41 | geocoded_output <- data.frame(not_geocoded, geocoded_output) # combine the input data with geocoded results
42 |
43 | #save output
44 | if (file.exists(address_file_geocoded)) {
45 | write.table(geocoded_output,file=address_file_geocoded, col.names=FALSE, row.names=FALSE, sep=",", append=TRUE)
46 | } else {
47 | write.table(geocoded_output,file=address_file_geocoded, row.names=FALSE, sep=",")
48 | }
49 | }
50 |
51 | #check file with geocoded data
52 | doh <- read.csv(file=address_file_geocoded,stringsAsFactors=F) # read data
53 | doh
54 |
--------------------------------------------------------------------------------
/scripts/older_scripts/spatial_analysis_examples.R:
--------------------------------------------------------------------------------
1 | #
2 | # Spatial Analysis of Addresses Geocoded with Census Geocoder
3 | #
4 | # pattyf@berkeley.edu, 5/3/2016
5 |
6 | #--------------------------------------
7 | # libraries -
8 | # that may be needed for this tutorial
9 | # and how to install & load them:
10 | #--------------------------------------
11 | required.pkg <- c("htmlwidgets", "leaflet", "ggmap", "ggplot2","httr","acs","RJSONIO","RCurl","stringr","plyr", "rgeos","rgdal", "sp")
12 | pkgs.not.installed <- required.pkg[!sapply(required.pkg, function(p) require(p, character.only=T))]
13 | install.packages(pkgs.not.installed, dependencies=TRUE)
14 |
15 | # Load all libraries them all at once.
16 | lapply(required.pkg, library, character.only = TRUE)
17 |
18 |
19 | #------------------------------------------------------
20 | # Data Linkage Example:
21 | # Link geocoded addresses to census data
22 | #------------------------------------------------------
23 | #clean environment
24 | rm(list=ls())
25 |
26 | #set working directory
27 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
28 |
29 | # There are many online services for downloading census data.
30 | # You can use one of these to download census data and then read the data into R.
31 | # Or you can use a library like "acs" to make a request of the census online data service API from within R.
32 | # You can link the census data to your geocoded addresses by the FIPS code.
33 | # The FIPS code, also called GEOID, identifies the census geography to which the tabular data have been aggregated.
34 | # For example, the specific census tract or blockgroup.
35 | # This requires the geocoded addresses to have FIPS codes to link to the census data.
36 | # If you use the Census Geocoding Service you get the FIPS codes with your geocoded output.
37 | # If you use another service that does not give you the FIPS code, you can use the FCC census api or spatial overlay to get
38 | # the FIPS code for each geocoded address.
39 |
40 | # The code below uses the acs library and is a modification of the following blog post which has great examples and more details:
41 | # http://zevross.com/blog/2015/10/14/manipulating-and-mapping-us-census-data-in-r-using-the-acs-tigris-and-leaflet-packages-3
42 |
43 | # Use the ACS (American Community Survey) library to fetch census tract level data for our geocoded addresses.
44 | library(acs)
45 | library(stringr) # to format strings
46 |
47 | # You need a census api key to use this library. You can get it in a few minutes at:
48 | # http://api.census.gov/data/key_signup.html
49 |
50 | # I keep my key in a file that I source to read into R
51 | # My file has one line and looks like this (NOT MY REAL KEY):
52 | # my_census_api_key <- "f2666666666666666666666666666632"
53 | source("keys/census_api_key.R")
54 |
55 | # Activate the key
56 | api.key.install(key=my_census_api_key)
57 |
58 | # Identify the census geography of interest
59 | geo<-geo.make(state=c("CA"),county=c(1), tract="*")
60 |
61 | # Fetch the census data of interest
62 | income<-acs.fetch(endyear = 2014, span = 5, geography = geo, table.number = "B19001", col.names = "pretty")
63 |
64 | # Take a look at the specific ACS columns returned
65 | attr(income, "acs.colnames")
66 |
67 | # Convert the census data to a data frame, (1) keeping only the columns of interest and (2) creating the FIPS key
68 | income_df <- data.frame(paste0(str_pad(income@geography$state, 2, "left", pad="0"),
69 | str_pad(income@geography$county, 3, "left", pad="0"),
70 | str_pad(income@geography$tract, 6, "left", pad="0")),
71 | income@estimate[,c("B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): Total:" ,
72 | "B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): $200,000 or more")],
73 | stringsAsFactors = FALSE)
74 |
75 | # take a look at it
76 | head(income_df)
77 |
78 | # add row numbers to the data frame
79 | rownames(income_df)<-1:nrow(income_df)
80 |
81 | # relabel the columns
82 | names(income_df)<-c("GEOID", "total", "over_200")
83 |
84 | # create a new variable - percent of incomes that are over 200k in the census tract
85 | income_df$percent <- 100*(income_df$over_200/income_df$total)
86 |
87 | # take a look at it
88 | head(income_df)
89 |
90 | # Read in geocoded addresses
91 | geocoded_output_file <- "geocoded_addresses_out.csv"
92 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE)
93 |
94 | # take a look at them
95 | head(geocoded_results)
96 |
97 | # Create the Key on which we will join the geocoded addresses to the
98 | # Census data - this is the FIPS code, often called the GEOID
99 | geocoded_results$GEOID <- paste0(str_pad(geocoded_results$state_fips, 2, "left", pad="0"),
100 | str_pad(geocoded_results$county_fips, 3, "left", pad="0"),
101 | str_pad(geocoded_results$tract_fips, 6, "left", pad="0"))
102 |
103 | # take a look at them
104 | head(geocoded_results)
105 |
106 | # Now Join the census data to the geocoded addresses by the GEOID
107 | geocoded_results <- merge(geocoded_results,income_df, by="GEOID")
108 |
109 | # take a look at them
110 | head(geocoded_results)
111 |
112 | # Map the results with Leaflet for Interactive mapping
113 | # This way we can click on any address and see the census data value.
114 | leaflet() %>% addProviderTiles("CartoDB.Positron") %>%
115 | addCircleMarkers(data = geocoded_results, lng = ~lon,
116 | lat = ~lat, radius = 5, stroke=F,
117 | popup = paste0("Geocoded Address:
", geocoded_results$matched_address,
118 | "
Percent of Households above $200k: ", round(geocoded_results$percent,2), "%"),
119 | color = "red",
120 | fillOpacity = 0.7)
121 |
122 |
123 | #----------------------------------------------------------------------------------------
124 | # Spatial Overlay #1
125 | # Question: What is the Community College District for each of our geocoded addresses?
126 | #----------------------------------------------------------------------------------------
127 | #clean environment
128 | rm(list=ls())
129 |
130 | #set working directory
131 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
132 |
133 | library(sp)
134 | library(rgdal)
135 | library(rgeos)
136 |
137 | # read in geocoded addresses
138 | geocoded_output_file <- "geocoded_addresses_out.csv"
139 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE)
140 | head(geocoded_results) # take a look at the results
141 |
142 | # what is the type of object
143 | class(geocoded_results)
144 |
145 | # let's make it spatialPointsDataFrame
146 | coordinates(geocoded_results) <- ~lon+lat
147 | class(geocoded_results)
148 |
149 | # plot the points
150 | plot(geocoded_results)
151 |
152 | # Get the Alameda Community College Districts data
153 | # Format: ESRI Shapefile
154 | # Source: https://data.acgov.org/Geospatial-Data/Community-College-Districts-within-Alameda-County/bdqp-je9q
155 |
156 | # Read downloaded shapefile into R
157 | alameda_ccds <- readOGR(dsn="./shapefiles/AlamedaCommunityCollegeDistricts", layer="geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c")
158 | class(alameda_ccds) # what is the data object type?
159 | plot(alameda_ccds) #plot the CCDs
160 | points(geocoded_results, col="red") # add the geocoded points to the plot
161 |
162 | head(alameda_ccds@data) #look at the attributes that describe each polygon
163 |
164 | # Let's use the rGEOS over function to find out
165 | # the CCD of each of our addresses
166 | # over stands for spatial overlay
167 | address_ccd <-over(geocoded_results,alameda_ccds)
168 |
169 | # That didn't work
170 | # "over" requires both data sets to be spatial objects (they are)
171 | # with the same coordinate reference system (CRS)
172 | # What is the CRS of the CCDs?
173 | alameda_ccds@proj4string # or proj4string(alameda_ccds)
174 |
175 | # What is the CRS of our geocoded points?
176 | geocoded_results@proj4string # undefined
177 |
178 | # Let's set the CRS of our points to that of the CCDs
179 | # Why is that ok? the geocoded points are NAD83 CRS if Census geocoder was used,
180 | # WGS84 (same as the CCDs) if Google geocoder was used.
181 | # However in USA those are for the most part identical (may be a few meters off)
182 | proj4string(geocoded_results) <- CRS(proj4string(alameda_ccds))
183 |
184 | # make sure the CRSs are the same
185 | proj4string(alameda_ccds) == proj4string(geocoded_results)
186 |
187 | # Now try the overlay operation again:
188 | address_ccd <-over(geocoded_results,alameda_ccds)
189 | address_ccd # take a look at the output
190 |
191 | # Now we can join the CCD district name (dist_name) to our geocoded addresses
192 | # first, subset the overlay results
193 | ccd_df <- address_ccd[c('dist_name')]
194 |
195 | # Make sure the CCD dist_name is a character string not a factor
196 | str(ccd_df)
197 | ccd_df[] <- lapply(ccd_df, as.character)
198 | str(ccd_df)
199 |
200 | # Set NAs to a default value
201 | ccd_df[c("dist_name")][is.na(ccd_df[c('dist_name')])] <- "unknown"
202 | head(ccd_df) # take a look
203 |
204 | # Join the CCD data to our geocoded data
205 | geocoded_results <- cbind(geocoded_results, ccd_df)
206 |
207 | # view results
208 | head(geocoded_results)
209 |
210 | # Plot it - leaflet Interactive mapping
211 | leaflet() %>% addTiles() %>%
212 | setView(lng = mean(geocoded_results$lon), lat = mean(geocoded_results$lat), zoom = 16) %>%
213 | addCircleMarkers(data = geocoded_results, lng = ~lon,
214 | lat = ~lat, radius = 5, stroke=F,
215 | popup = paste("Geocoded Address:
", geocoded_results$matched_address,"
216 |
Communit College District:
", geocoded_results$dist_name),
217 | color = "red",
218 | fillOpacity = 0.9)
219 |
220 | #
221 | # Question: How many addresses are in each CCD?
222 | #
223 | # create a cross-tab from our overlay (over) operation
224 | addressByCCD_df <- as.data.frame(table(address_ccd$dist_name))
225 |
226 | #look at it
227 | head(addressByCCD_df)
228 |
229 | #relabel the columsn
230 | names(addressByCCD_df)[names(addressByCCD_df)=="Var1"] <- "ccd_name"
231 | names(addressByCCD_df)[names(addressByCCD_df)=="Freq"] <- "address_count"
232 |
233 | #look at it again
234 | addressByCCD_df
235 |
236 | #-----------------------------------------------------------------------
237 | # Spatial Overlay #2
238 | # Question: What addresses are within 1000 meters of a school?
239 | #-----------------------------------------------------------------------
240 | #clean environment
241 | rm(list=ls())
242 |
243 | #set working directory
244 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
245 |
246 | library(sp)
247 | library(rgdal)
248 | library(rgeos)
249 |
250 |
251 | # Read in geocoded addresses
252 | geocoded_output_file <- "geocoded_addresses_out.csv"
253 | geocoded_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE)
254 | head(geocoded_results) # take a look at the results
255 |
256 | # what is the type of object
257 | class(geocoded_results)
258 |
259 | #create a spatialPointsDataFrame object from our geocoded address locations
260 | coordinates(geocoded_results) <- ~lon+lat
261 | class(geocoded_results)
262 |
263 | #plot the points
264 | plot(geocoded_results)
265 |
266 | #what is the coordinate system of our data?
267 | geocoded_results@proj4string #undefined
268 |
269 | # Get the Alameda County Schools data
270 | # Format: ESRI Shapefile
271 | # Source: https://data.acgov.org/
272 | # Read downloaded shapefile into R
273 | alameda_schools <- readOGR(dsn="./shapefiles/AlamedaCountySchools", layer="geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca")
274 | # plot it
275 | plot(alameda_schools)
276 |
277 | # What class of data object is it?
278 | class(alameda_schools)
279 |
280 | # What is its CRS?
281 | alameda_schools@proj4string # or proj4string(alameda_schools)
282 |
283 | # Let's set the CRS of the geocoded points to that of the alameda schools
284 | # Why is that ok? the geocoded points are NAD83 CRS if Census geocoder was used,
285 | # WGS84 (same as the CCDs) if Google geocoder was used.
286 | # However in USA those are for the most part identical (may be a few meters off)
287 | proj4string(geocoded_results) <- CRS(proj4string(alameda_schools))
288 |
289 | # make sure they are the same
290 | proj4string(alameda_schools) == proj4string(geocoded_results)
291 |
292 | # now that both are in the same coordinate space let's transform them to a 2D projected CRS
293 | # Here we use UTM zone 10N, WGS84
294 | # http://spatialreference.org/ref/epsg/32610/
295 | geocoded_results_utm10 <- spTransform(geocoded_results, CRS("+init=epsg:32610"))
296 | alameda_schools_utm10 <- spTransform(alameda_schools, CRS("+init=epsg:32610"))
297 |
298 | # Let's assume we are investinging the addressesrelative to schools and that
299 | # we want to see if any of these addresses are within 1000 feet of a school.
300 | # 1000 feet = 305 meters
301 |
302 | # Create a polygon from each point location that is the 1,000 foot buffer around the school
303 | # We submit the buffer distance in meters because meters are the units of the CRS.
304 | alschools_buf <-gBuffer(alameda_schools_utm10, byid=TRUE,width=305)
305 |
306 | #plot the buffers
307 | plot(alschools_buf)
308 |
309 | # add the geododed address points
310 | points(geocoded_results_utm10, col="red")
311 |
312 | # "over" (overlay) operation to see what geocoded addresses are within school buffer zones
313 | in_school_zone <- over(geocoded_results_utm10,alschools_buf)
314 |
315 | #take a look at the output
316 | in_school_zone
317 |
318 | # create a new data frame that just has our column of interest - schoolname (site)
319 | in_buf <- in_school_zone[c('site')]
320 |
321 | # Make sure the schoolname (site) is a character string not a factor
322 | in_buf[] <- lapply(in_buf, as.character)
323 |
324 | # Replace NAs with a default value
325 | in_buf[c("site")][is.na(in_buf[c('site')])] <- "Not within school zone"
326 |
327 | # take a look at our data
328 | in_buf
329 |
330 | # Join it to our geocoded data
331 | geocoded_results <- cbind(geocoded_results, in_buf)
332 |
333 | # Take a look
334 | head(geocoded_results)
335 |
336 | #plot it using ggmaps - static map
337 | map <- get_map(location=c(lon=mean(geocoded_results$lon),lat=mean(geocoded_results$lat)), zoom=15)
338 | ggmap(map) +
339 | geom_point(aes(x = x, y = y), size = 4, col="black", data = alameda_schools@data) +
340 | geom_point(aes(x = lon, y = lat), size = 4, col="blue", data = geocoded_results[geocoded_results$site == 'Not within school zone',]) +
341 | geom_point(aes(x = lon, y = lat), size = 5, col="red", data = geocoded_results[!geocoded_results$site == 'Not within school zone',])
342 |
343 | # plot it - leaflet Interactive mapping
344 | # First subset the data to keep the code clearer
345 | geocoded_resultsInZone <- subset(geocoded_results, site != 'Not within school zone')
346 | geocoded_resultsOutsideZone <- subset(geocoded_results, site == 'Not within school zone')
347 |
348 | leaflet() %>% addTiles() %>%
349 | setView(lng = mean(geocoded_results$lon), lat = mean(geocoded_results$lat), zoom = 16) %>%
350 | addCircleMarkers(data = geocoded_resultsInZone, lng = ~lon,
351 | lat = ~lat, radius = 5, stroke=F,
352 | popup = paste0("Address:
", geocoded_resultsInZone$matched_address,
353 | "
In School Zone: ", geocoded_resultsInZone$site),
354 | color = "red",
355 | fillOpacity = 0.9) %>%
356 | addCircleMarkers(data = geocoded_resultsOutsideZone, lng = ~lon,
357 | lat = ~lat, radius = 5, stroke=F,
358 | popup = paste0("Address:
", geocoded_resultsOutsideZone$matched_address,
359 | "
In School Zone: ", geocoded_resultsOutsideZone$site),
360 | color = "blue",
361 | fillOpacity = 0.7) %>%
362 | addMarkers(data= alameda_schools, lng = ~x, lat =~y,
363 | popup = paste("School:
", alameda_schools$site),
364 | )
365 |
366 | #---------------------------------------------------------------------------
367 | # sessionInfo()
368 | #---------------------------------------------------------------------------
369 | #R version 3.2.2 (2015-08-14)
370 | #Platform: x86_64-apple-darwin13.4.0 (64-bit)
371 | #Running under: OS X 10.9.5 (Mavericks)
372 |
373 | #locale:
374 | # [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
375 |
376 | #attached base packages:
377 | # [1] stats graphics grDevices utils datasets methods base
378 |
379 | #other attached packages:
380 | # [1] htmlwidgets_0.5 rgeos_0.3-14 rgdal_1.0-4 sp_1.2-1 leaflet_1.0.0 ggmap_2.6.1 ggplot2_2.1.0 httr_1.1.0 acs_2.0
381 | #[10] XML_3.98-1.3 plyr_1.8.3 stringr_1.0.0
382 |
383 | #loaded via a namespace (and not attached):
384 | # [1] Rcpp_0.12.4 bitops_1.0-6 tools_3.2.2 digest_0.6.8 jsonlite_0.9.19 gtable_0.1.2 lattice_0.20-33
385 | #[8] png_0.1-7 mapproj_1.2-4 curl_0.9.6 yaml_2.1.13 proto_0.3-10 RgoogleMaps_1.2.0.7 maps_3.0.0-2
386 | #[15] grid_3.2.2 R6_2.1.2 jpeg_0.1-8 RJSONIO_1.3-0 reshape2_1.4.1 magrittr_1.5 scales_0.3.0
387 | #[22] htmltools_0.2.6 mime_0.4 geosphere_1.4-3 colorspace_1.2-6 labeling_0.3 stringi_1.0-1 RCurl_1.95-4.8
388 | #[29] munsell_0.4.2 rjson_0.2.15
--------------------------------------------------------------------------------
/scripts/older_scripts/tiger_geocoding.R:
--------------------------------------------------------------------------------
1 | #
2 | # Geocoding with Tiger Geocoding Service
3 | #
4 | # pattyf@berkeley.edu, 05/2/2016
5 | #
6 | # Important note:
7 | ## You can only geocode 1000 addresses at a time
8 | ## so need to add code to loop or subset your files
9 | ##
10 | ## Documentation:
11 | ##
12 |
13 | #clean environment
14 | rm(list=ls())
15 |
16 | #Load libraries
17 | library(httr)
18 |
19 | #set working directory
20 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
21 |
22 | # CLI format for CURL
23 | #format of geocoding request for Tiger Geocoder
24 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch --form addressFile=@tiger/tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010
25 | ##
26 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch
27 | ##--form addressFile=@tiger/tiger_12addresses_to_geocode.csv
28 | ##--form benchmark=Public_AR_Census2010
29 | ##--form vintage=Census2010_Census2010
30 | ##-o output_file.csv
31 |
32 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv"
33 |
34 | # let's take a look at the addresses that we will geocode
35 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip'))
36 |
37 | tiger_url <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
38 |
39 | ## WORKED - thanks to: http://stackoverflow.com/questions/26611289/curl-post-statement-to-rcurl-or-httr
40 | ## add verbose() to see details of process
41 | ## Using httr I think
42 | geocoded_addresses <- POST(tiger_url, encode="multipart",
43 | body=list(addressFile=upload_file(tiger_input_addressFile),
44 | benchmark="Public_AR_Census2010",
45 | vintage="Census2010_Census2010"
46 | )
47 | )
48 |
49 | #write raw output to file
50 | capture.output(cat(content(geocoded_addresses)), file="test_out3.txt")
51 |
52 | #read output file in to a data frame (not sure how to do these two in one step)
53 | mylocs <- read.csv("test_out3.txt",header=FALSE)
54 | head(mylocs)
55 |
56 | #doh split the lat,long values into two separate columns
57 | mylocs$lon = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1]))
58 | mylocs$lat = unlist(lapply(mylocs$V6, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2]))
59 |
60 | mylocs$lon <- as.numeric(mylocs$lon)
61 | mylocs$lat <- as.numeric(mylocs$lat)
62 |
63 | #lets plot it
64 | library(ggplot2)
65 | library(ggmap)
66 |
67 | map <- get_map(location=c(lon=mean(mylocs$lon),lat=mean(mylocs$lat)), zoom=15)
68 | ggmap(map) +
69 | geom_point(aes(x = lon, y = lat), size = 4, col="red", data = mylocs)
70 |
71 |
--------------------------------------------------------------------------------
/scripts/older_scripts/tiger_geocoding_batch.R:
--------------------------------------------------------------------------------
1 | #
2 | # Batch Geocoding with the US Census Geocoding Service
3 | #
4 | # pattyf@berkeley.edu, 05/2/2016
5 | #
6 | # Important note:
7 | ## You can only geocode 1000 addresses at a time
8 | ## so need to add code to loop or subset your files
9 | ##
10 | ## Documentation:
11 | ##
12 | #Load libraries
13 | library(httr) # to submit geocoding request
14 | library(ggplot2) # to plot output
15 | library(ggmap) # to plot output
16 | library(leaflet) # for interactive plotting
17 | library(stringr)
18 |
19 | #clean environment
20 | rm(list=ls())
21 |
22 | #set working directory
23 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
24 |
25 | # our file of addresses that need to be geocoded
26 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv"
27 |
28 | # the output file we will create
29 | geocoded_output_file <- "geocoded_addresses_out.csv"
30 |
31 | # CLI format for CURL
32 | #format of geocoding request for Tiger Geocoder
33 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch --form addressFile=@tiger/tiger_12addresses_to_geocode.csv --form benchmark=Public_AR_Census2010 --form vintage=Census2010_Census2010
34 | ##
35 | ##curl http://geocoding.geo.census.gov/geocoder/geographies/addressbatch
36 | ##--form addressFile=@tiger/tiger_12addresses_to_geocode.csv
37 | ##--form benchmark=Public_AR_Census2010
38 | ##--form vintage=Census2010_Census2010
39 | ##-o output_file.csv
40 |
41 | # The census geocoder does not want column names in the file to be geocoded
42 | # but we want them to make sense of the data when we view it in R
43 | # For info on the correct format for submitting a file of addresses see:
44 | # https://www.census.gov/geo/maps-data/data/geocoder.html
45 | # Five columns - No headers, comma separated EVEN IF DATA NOT AVAILABLE
46 | # Unique ID, house number and street name, city, state, zipcode
47 | # Two valid examples:
48 | #1, 1600 Pennsylvania Ave NW, Washington, DC,
49 | #2, 1600 Pennsylvania Ave NW,,,20502
50 |
51 | ## Take a look at the addresses that we will geocode
52 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip'))
53 |
54 | #how many addresses?
55 | num_addresses <- nrow(addresses_to_geocode)
56 |
57 | #remove the address data object
58 | rm(addresses_to_geocode)
59 |
60 | get_geocoded_addresses <- function(file_of_addresses) {
61 | tiger_url <- "http://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
62 | geocoded_addresses <- POST(tiger_url, encode="multipart",
63 | body=list(addressFile=upload_file(file_of_addresses),
64 | benchmark="Public_AR_Census2010",
65 | vintage="Census2010_Census2010"
66 | )
67 | )
68 |
69 | # Output column names
70 | mycols <- c("id","in_address","match_status","match_type","matched_address","lon_lat","tlid","street_side", "state_fips", "county_fips","tract_fips", "block_fips")
71 |
72 | #read output file in to a data frame (not sure how to do these two in one step)
73 | # create temp file
74 | mytempfile <- tempfile()
75 | #write raw output to tempfile
76 | # content(geocoded_addresses, "text", encoding = "UTF-8")
77 |
78 | capture.output(cat(content(geocoded_addresses)), file=mytempfile)
79 | #read the data into a data frame
80 | mylocs <- read.csv(mytempfile,header=FALSE, col.names = mycols)
81 | #delete tempfile
82 | unlink(mytempfile)
83 |
84 | # split the lat,long values into two separate columns
85 | mylocs$lon = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][1]))
86 | mylocs$lat = unlist(lapply(mylocs$lon_lat, function (x) strsplit(as.character(x), ",", fixed=TRUE)[[1]][2]))
87 |
88 | mylocs$lon <- as.numeric(mylocs$lon)
89 | mylocs$lat <- as.numeric(mylocs$lat)
90 |
91 | # save geocoded addresses to a file
92 | if (skip_rows == 0) {
93 | # create and write to the file
94 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE)
95 | } else {
96 | #append to the file
97 | write.csv(mylocs,file=geocoded_output_file, row.names=FALSE, append=TRUE)
98 | }
99 | return(mylocs)
100 | }
101 |
102 | # some counters to keep track of the number of addresses we need to process
103 | # we can only batch geocode 1000 addresses at a time
104 | skip_rows <- 0
105 | read_rows <- 1000
106 | processed_rows <- 0
107 |
108 | if (num_addresses < 1000) {
109 | # geocode them
110 | my_results <- get_geocoded_addresses(tiger_input_addressFile)
111 | } else {
112 | #process 1000 addresses at a time
113 |
114 | while (processed_rows < num_addresses) {
115 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, nrows=read_rows, skip=skip_rows)
116 | temp_infile <- tempfile()
117 | # save geocoded addresses to a file
118 | write.csv(addresses_to_geocode,file=temp_infile, row.names=FALSE, col.names = FALSE)
119 | my_results <- get_geocoded_addresses(temp_infile)
120 | unlink(temp_infile)
121 | skip_rows <- skip_rows + read_rows
122 |
123 | }
124 | }
125 |
126 | # Use ggmap to plot geocoded addresses
127 | # as red dots on a google map image
128 | map <- get_map(location=c(lon=mean(my_results$lon),lat=mean(my_results$lat)), zoom=15)
129 | ggmap(map) +
130 | geom_point(aes(x = lon, y = lat), size = 4, col="red", data = my_results)
131 |
132 | #------------------------------------------------------
133 | # Data Linkage Example:
134 | # Link geocoded addresses to census data
135 | #------------------------------------------------------
136 | library(acs)
137 | source("keys/census_api_key.R")
138 | api.key.install(key=my_census_api_key)
139 |
140 | geo<-geo.make(state=c("CA"),county=c(1), tract="*")
141 |
142 | # !!!! important note -- the package has not been updated to 2013
143 | # data so I'm using the five year span that ends in 2012
144 |
145 | income<-acs.fetch(endyear = 2014, span = 5, geography = geo, table.number = "B19001", col.names = "pretty")
146 | attr(income, "acs.colnames")
147 |
148 | income_df <- data.frame(paste0(str_pad(income@geography$state, 2, "left", pad="0"),
149 | str_pad(income@geography$county, 3, "left", pad="0"),
150 | str_pad(income@geography$tract, 6, "left", pad="0")),
151 | income@estimate[,c("B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): Total:" ,
152 | "B19001. Household Income in the Past 12 Months (in 2014 Inflation-Adjusted Dollars): $200,000 or more")],
153 | stringsAsFactors = FALSE)
154 |
155 | income_df <- select(income_df, 1:3)
156 | rownames(income_df)<-1:nrow(income_df)
157 | names(income_df)<-c("GEOID", "total", "over_200")
158 | income_df$percent <- 100*(income_df$over_200/income_df$total)
159 |
160 | # read in geocoded addresses
161 | geocoded_output_file <- "geocoded_addresses_out.csv"
162 | my_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE)
163 |
164 | # Create the Key on which we will join the geocoded addresses to the
165 | # Census data - this is the FIPS code, often called the GEOID
166 | my_results$GEOID <- paste0(str_pad(my_results$state_fips, 2, "left", pad="0"),
167 | str_pad(my_results$county_fips, 3, "left", pad="0"),
168 | str_pad(my_results$tract_fips, 6, "left", pad="0"))
169 |
170 | # Now Join the census data to the geocoded addresses by the GEOID
171 | my_results2 <- merge(my_results,income_df, by="GEOID")
172 |
173 | # Map the results with Leaflet for Interactive mapping
174 | # This way we can click on any address and see the census data value.
175 | #popup = paste("Address:
", my_results2$matched_address,"
Percent Below Poverty Line:", my_results2$pctpov),
176 | leaflet() %>% addProviderTiles("CartoDB.Positron") %>%
177 | addCircleMarkers(data = my_results2, lng = ~lon,
178 | lat = ~lat, radius = 5, stroke=F,
179 | popup = paste("Address:
", my_results2$matched_address,"
Percent of Households
above $200k:", my_results2$percent),
180 | color = "red",
181 | fillOpacity = 0.7)
182 |
183 |
184 | #-----------------------------------------------------------------------
185 | # Spatial Overlay #1
186 | # Question:
187 | #-----------------------------------------------------------------------
188 | library(sp)
189 | library(rgdal)
190 | library(rgeos)
191 |
192 | # read in geocoded addresses
193 | geocoded_output_file <- "geocoded_addresses_out.csv"
194 | my_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE)
195 | head(my_results) # take a look at the results
196 |
197 | #what is the type of object
198 | class(my_results)
199 |
200 | #let's make it spatial points data frame
201 | coordinates(my_results) <- ~lon+lat
202 | class(my_results)
203 | #plot the points
204 | plot(my_results)
205 |
206 | # Alameda Community College Districts
207 | # Format: ESRI Shapefile
208 | # Source: https://data.acgov.org/Geospatial-Data/Community-College-Districts-within-Alameda-County/bdqp-je9q
209 | alameda_ccds <- readOGR(dsn="./shapefiles/AlamedaCommunityCollegeDistricts", layer="geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c") # Read it into R.
210 | class(alameda_ccds) # what is the data object type?
211 | plot(alameda_ccds) #plot the CCDs
212 | points(my_results, col="red") # add the geocoded points to the plot
213 |
214 | head(alameda_ccds@data) #look at the attributes that describe each polygon
215 |
216 | # Let's use the rGEOS over function to find out
217 | # the CCD of each of our addresses
218 | # over stands for spatial overlay
219 | address_ccd <-over(my_results,alameda_ccds)
220 |
221 | # over requires both data sets to be spatial objects (they are)
222 | # with the same coordinate reference system (CRS)
223 | # what is the CRS of the CCDs?
224 | alameda_ccds@proj4string # or proj4string(alameda_ccds)
225 |
226 | #what is the CRS of our geocoded points?
227 | my_results@proj4string # undefined
228 |
229 | #Let's set the CRS of our points to that of the CCDs
230 | # Why is that ok? the geocoded points are NAD83 CRS if Census geocoder was used,
231 | # WGS84 (same as the CCDs) if Google geocoder was used.
232 | # However in USA those are for the most part identical (may be a few meters off)
233 | proj4string(my_results) <- CRS(proj4string(alameda_ccds))
234 |
235 | #make sure they are the same
236 | proj4string(alameda_ccds) == proj4string(my_results)
237 |
238 | # Now try the overlay operation again:
239 | address_ccd <-over(my_results,alameda_ccds)
240 | address_ccd
241 |
242 | # Now we can join the ccd district name (dist_name) to our geocoded addresses
243 | # first, subset the
244 | ccd_df <- address_ccd[c('dist_name')]
245 |
246 | #now make sure it is a character string not a factor
247 | str(ccd_df)
248 | ccd_df[] <- lapply(ccd_df, as.character)
249 | str(ccd_df)
250 | #now set NAs to a default value
251 | ccd_df[c("dist_name")][is.na(ccd_df[c('dist_name')])] <- "unknown"
252 | head(ccd_df) # take a look
253 | #join it to our geocoded data
254 | my_results <- cbind(my_results, ccd_df)
255 | #view results
256 | head(my_results)
257 |
258 | # Plot it - leaflet Interactive mapping
259 | leaflet() %>% addTiles() %>%
260 | setView(lng = mean(my_results$lon), lat = mean(my_results$lat), zoom = 16) %>%
261 | addCircleMarkers(data = my_results, lng = ~lon,
262 | lat = ~lat, radius = 5, stroke=F,
263 | popup = paste("Address:
", my_results3$matched_address,"
264 |
Communit College District:
", my_results$dist_name),
265 | color = "red",
266 | fillOpacity = 0.9)
267 |
268 | #
269 | # Question: How many addresses are in each CCD?
270 | #
271 | # create a cross-tab from our overlay (over) operation
272 | addressByCCD_df <- as.data.frame(table(address_ccd$dist_name))
273 |
274 | #look at it
275 | head(addressByCCD_df)
276 |
277 | #relabel the columsn
278 | names(addressByCCD_df)[names(addressByCCD_df)=="Var1"] <- "ccd_name"
279 | names(addressByCCD_df)[names(addressByCCD_df)=="Freq"] <- "address_count"
280 |
281 | #look at it again
282 | head(addressByCCD_df)
283 |
284 | #-----------------------------------------------------------------------
285 | # Spatial Overlay #2
286 | # Question: What addresses are within 1000 meters of a school?
287 | #-----------------------------------------------------------------------
288 |
289 | library(sp)
290 | library(rgdal)
291 | library(rgeos)
292 |
293 |
294 | # read in geocoded addresses
295 | geocoded_output_file <- "geocoded_addresses_out.csv"
296 | my_results <- read.csv(geocoded_output_file,stringsAsFactors = FALSE)
297 | head(my_results) # take a look at the results
298 |
299 | #what is the type of object
300 | class(my_results)
301 |
302 | #let's make it spatial points data frame
303 | coordinates(my_results) <- ~lon+lat
304 | class(my_results)
305 | #plot the points
306 | plot(my_results)
307 |
308 | #create a spatialpoints dataframe object from our geocoded address locations
309 | class(my_results)
310 | coordinates(my_results) <- ~lon+lat
311 | class(my_results)
312 |
313 | #what is the coordinate system of our data?
314 | my_results@proj4string #undefined
315 |
316 | alameda_schools <- readOGR(dsn="./shapefiles/Alameda County Schools", layer="geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca") # Read it into R.
317 | class(alameda_schools)
318 | alameda_schools@proj4string # or proj4string(alameda_schools)
319 |
320 | #let's set the CRS of the geocoded points to that of the alameda schools
321 | proj4string(my_results) <- CRS(proj4string(alameda_schools))
322 |
323 |
324 | #make sure they are the same
325 | proj4string(alameda_schools) == proj4string(my_results)
326 |
327 | #now that both are in the same coordinate space let's transform them to a planar projected CRS
328 | #http://spatialreference.org/ref/epsg/32610/
329 | my_results_utm10 <- spTransform(my_results, CRS("+init=epsg:32610"))
330 | alameda_schools_utm10 <- spTransform(alameda_schools, CRS("+init=epsg:32610"))
331 |
332 | # Let's assume
333 | # A sex offender cannot live within 1,000 feet of any school, childcare facility, or place where children gather.
334 | # 1000 feet = 305 meters
335 | alschools_buf <-gBuffer(alameda_schools_utm10, byid=TRUE,width=305)
336 | plot(alschools_buf)
337 | points(my_results_utm10, col="red")
338 |
339 | in_school_zone <- over(my_results_utm10,alschools_buf)
340 | in_school_zone$site
341 | in_buf <- in_school_zone[c('site'),]
342 | in_buf[] <- lapply(in_buf, as.character)
343 | in_buf[c("site")][is.na(in_buf[c('site')])] <- "Not within school zone"
344 |
345 | #join it to our geocoded data
346 | my_results3 <- cbind(my_results, in_buf)
347 |
348 | #plot it using ggmaps - static map
349 | map <- get_map(location=c(lon=mean(my_results3$lon),lat=mean(my_results3$lat)), zoom=15)
350 | ggmap(map) +
351 | geom_point(aes(x = x, y = y), size = 4, col="black", data = alameda_schools@data) +
352 | geom_point(aes(x = lon, y = lat), size = 4, col="blue", data = my_results3[my_results3$site == 'Not within school zone',]) +
353 | geom_point(aes(x = lon, y = lat), size = 5, col="red", data = my_results3[!my_results3$site == 'Not within school zone',])
354 |
355 | #plot it - leaflet Interactive mapping
356 | leaflet() %>% addTiles() %>%
357 | setView(lng = mean(my_results3$lon), lat = mean(my_results3$lat), zoom = 16) %>%
358 | addCircleMarkers(data = my_results3, lng = ~lon,
359 | lat = ~lat, radius = 5, stroke=F,
360 | popup = paste("Address:
", my_results3$matched_address,"
In School Zone:", my_results3$site),
361 | color = "red",
362 | fillOpacity = 0.9) %>%
363 | addCircleMarkers(data = my_results3[my_results3$site == 'Not within school zone',], lng = ~lon,
364 | lat = ~lat, radius = 5, stroke=F,
365 | popup = paste("Address:
", my_results3$matched_address,"
In School Zone:", my_results3$site),
366 | color = "blue",
367 | fillOpacity = 0.7) %>%
368 | addMarkers(data= alameda_schools, lng= ~x, lat=~y,
369 | popup = paste("School:
", alameda_schools$site),
370 | )
371 |
--------------------------------------------------------------------------------
/scripts/older_scripts/tiger_one_at_a_time.R:
--------------------------------------------------------------------------------
1 | #library(httr)
2 | library(RJSONIO)
3 | gurl <- "http://geocoding.geo.census.gov/geocoder/geographies/address?street=912+Kingston+Ave&city=Piedmont&state=CA&benchmark=Public_AR_Census2010&vintage=Census2010_Census2010&format=json"
4 |
5 | bad_gurl <-"http://geocoding.geo.census.gov/geocoder/geographies/address?street=912+Kingston+Ave&city=donkey&state=CA&benchmark=Public_AR_Census2010&vintage=Census2010_Census2010&format=json"
6 |
7 | tiger_prefix <- "http://geocoding.geo.census.gov/geocoder/geographies/address?"
8 | tiger_suffix <- "&benchmark=Public_AR_Census2010&vintage=Census2010_Census2010&format=json"
9 |
10 | #g_out <- GET(gurl)
11 |
12 |
13 | g_out <- fromJSON(gurl)
14 | str(g_out)
15 |
16 | # take the first returned values in case > 1 matches
17 | lon <- g_out$result$addressMatches[[1]]$coordinates[['x']]
18 | lat <- g_out$result$addressMatches[[1]]$coordinates[['y']]
19 | matchedAddress <- g_out$result$addressMatches[[1]]$matchedAddress
20 | tractfips <- g_out$result$addressMatches[[1]]$geographies$`Census Tracts`[[1]]$GEOID
21 | blockfips <- g_out$result$addressMatches[[1]]$geographies$`Census Blocks`[[1]]$GEOID
22 |
23 | # another way
24 | g_out2 <- unlist(g_out)
25 | head(g_out2)
26 | g_out2['result.addressMatches.coordinates.x']
27 | #Now process a file of addresses:
28 | tiger_input_addressFile <- "tiger/tiger_12addresses_to_geocode.csv"
29 | # let's take a look at the addresses that we will geocode
30 | addresses_to_geocode <- read.csv(tiger_input_addressFile, stringsAsFactors = FALSE, col.names = c('id','street','city','state','zip'))
31 |
32 | addresses_to_geocode
33 |
34 | addresses_to_geocode$tiger_format <- paste0(
35 | "street=",addresses_to_geocode$street,
36 | "&city=",addresses_to_geocode$city,
37 | "&state=",addresses_to_geocode$state,
38 | "&zip=",addresses_to_geocode$zip
39 | )
40 |
41 | # geocode a file of addresses - one at at time
42 | tgeocode <- function(address){
43 | address <- URLencode(address)
44 | g_address <- paste0(tiger_prefix, address,tiger_suffix)
45 | print(g_address)
46 |
47 |
48 | g_out <- tryCatch(
49 | fromJSON(g_address) # result will be returned if no error
50 |
51 | )
52 | if (length(g_out$result$addressMatches) > 0) {
53 | print(g_out$result$addressMatches[[1]]$matchedAddress)
54 | } else{
55 | #no results
56 | }
57 | }
58 |
59 | ## apply the geocoding function to the CSV file
60 | library(plyr)
61 | ldply(addresses_to_geocode$tiger_format,function(x) tgeocode(x))
62 | #address <- c("The White House, Washington, DC","The Capitol, Washington, DC")
63 | #locations <- ldply(address, function(x) geoCode(x))
64 | #names(locations) <- c("lat","lon","location_type", "formatted")
65 | #head(locations)
66 |
67 |
--------------------------------------------------------------------------------
/scripts/older_scripts/tigris_acs_census.R:
--------------------------------------------------------------------------------
1 | #
2 | # census data with
3 | # tigris and acs packages
4 | # after
5 | # http://rstudio-pubs-static.s3.amazonaws.com/90665_de25062951e540e7b732f21de53001f0.html
6 | # https://github.com/walkerke/tigris
7 | # http://zevross.com/blog/2015/10/14/manipulating-and-mapping-us-census-data-in-r-using-the-acs-tigris-and-leaflet-packages-3/
8 |
9 | # download US Census TIGER Data, eg
10 | # http://www2.census.gov/geo/tiger
11 | # Then go to: http://www.census.gov/geo/maps-data/data/tiger-line.html (read how do i choose...)
12 | # Use the web interface to download
13 | #
14 |
15 | library(tigris)
16 | library(sp)
17 |
18 | cenpolys <- tracts(state = 'CA', county = c('Alameda'))
19 |
20 | plot(dfw)
21 |
22 | source("keys/census_api_key.R")
23 | api.key.install(my_census_api_key)
24 | income_data <- acs.fetch(endyear = 2012,
25 | geography = geo.make(state = "CA",
26 | county = c("Alameda"),
27 | tract = "*"),
28 | variable = "B19013_001")
29 |
30 | str(income_data@geography) #see how state and county are formmated
31 | #need to create the key on which to join spatial and demo data
32 |
33 | income_df <- data.frame(paste0("0",as.character(income_data@geography$state),
34 | "00",as.character(income_data@geography$county),
35 | income_data@geography$tract),
36 | income_data@estimate)
37 |
38 | colnames(income_df) <- c("GEOID", "hhincome")
39 |
40 | censusT <- geo_join(dfw, income_df, "GEOID", "GEOID")
41 |
42 | library(classInt)
43 | library(RColorBrewer)
44 | myclass <- classIntervals(censusT$hhincome, 9, style = "jenks")
45 | colpal <- findColours(myclass, brewer.pal(5, "OrRd"))
46 | plot(censusT, border="grey", col=colpal)
47 |
48 | plotData <- fortify(censusT, data=censusT@data, region="GEOID")
49 | head(plotData) # take a look at the result of the fortify command
50 |
51 | ggplot() + geom_polygon(data=plotData, aes(x=long, y=lat, group=group))
52 | # Map it.
53 | map <- get_map("Berkeley", zoom=10)
54 | ggmap(map) + geom_polygon(data=plotData, aes(x=long, y=lat, group=group))
55 |
56 | # Add transparency to better see the reference basemap.
57 | ggmap(map) +
58 | geom_polygon(data=plotData, aes(x=long, y=lat, group=group), alpha=0.5)
59 |
60 | #Now, join the census data to the geo data frame.
61 | plotData <- merge(plotData,censusT@data, by.x="id", by.y="GEOID")
62 | head(plotData) # now you can see the attribute data re-joined to the geographic data
63 |
64 | #map it - color regions by census variable
65 | ggmap(map) +
66 | geom_polygon(data = plotData, aes(x = long, y = lat, group = group,
67 | fill = hhincome), color = "black", size = 0.25) +
68 | coord_map()
69 |
70 | # get rid of tracts with NA
71 | #censusT <- censusT[!is.na(censusT$hhincome),]
72 | # Too dark - try this
73 | library(scales) #for pretty_breaks
74 | myplot <- ggmap(map) +
75 | geom_polygon(data = plotData, aes(x = long, y = lat, group = group,
76 | fill = hhincome)) +
77 | coord_map() +
78 | scale_fill_distiller(palette = "Greens",
79 | breaks = pretty_breaks(n = 8)) +
80 | guides(fill = guide_legend(reverse = TRUE))
81 |
82 | myplot
83 |
84 | p2 <- subset(plotData,ALAND > 0)
85 | censusT <-p2
86 | ggmap(map) +
87 | geom_polygon(data = p2, aes(x = long, y = lat, group = group,
88 | fill = hhincome), color = "black", size = 0.25) +
89 | coord_map()
90 |
91 | map <- get_map(location=c(lon=mean(geocoded_output$lon), lat=mean(geocoded_output$lat)), zoom=12)
92 | myplot <- ggmap(map) +
93 | geom_polygon(data = p2, aes(x = long, y = lat, group = group,
94 | fill = hhincome), alpha=0.75) +
95 | geom_point(aes(x = lon, y = lat), data=geocoded_output, size = 6, col="red" ) +
96 | coord_map() +
97 | scale_fill_distiller(palette = "Greens",
98 | breaks = pretty_breaks(n = 8)) +
99 | guides(fill = guide_legend(reverse = TRUE))
100 |
101 | myplot
102 |
103 | map <- get_map(location=c(lon=mean(geocoded_output$lon), lat=mean(geocoded_output$lat)), zoom=12)
104 | ggmap(map) +
105 | geom_point(aes(x = lon, y = lat), data=geocoded_output, size = 6, col="red" )
106 |
--------------------------------------------------------------------------------
/scripts/older_scripts/yahoo_geocoding.R:
--------------------------------------------------------------------------------
1 | # Geocoding with Yahoo Placefinder
2 | ## pattyf@berkeley.edu, 12/8/2015
3 | #
4 | # 2000 addresses per day limit!
5 | #
6 | # Review the following blog post by Zev Ross
7 | # http://zevross.com/blog/2015/05/19/scrape-website-data-with-the-new-r-package-rvest/
8 | #
9 | # You need to first apply for an account on https://developer.yahoo.com
10 | #
11 |
12 | #set working directory
13 | setwd("~/Documents/Dlab/dlab_workshops/rgeocoding")
14 |
15 | # If you haven't already installed rydn get it now using devtools
16 | # devtools::install_github("trestletech/rydn")
17 | library(rydn)
18 |
19 | # readin your YDN keys - keep in a separate file
20 | source("keys/ydn_keys.R")
21 | # That file looks like this:
22 | #mykey="dj0.......................00Zg--"
23 | #mysecret ="00....................8"
24 |
25 | #test it
26 | myloc <- find_place("Barrows Hall, Berkeley,ca",key=mykey,secret=mysecret)
27 | myloc #see what was returned
28 |
29 | # TO interpret response see: https://developer.yahoo.com/boss/geo/docs/supported_responses.html
30 |
31 | #convert strings to numerics
32 | myloc$longitude <- as.numeric(myloc$longitude)
33 | myloc$latitude <- as.numeric(myloc$latitude)
34 |
35 | # work with subset of the returned info
36 | myloc_sub <- myloc[1 ,c("quality", "latitude", "longitude", "radius")] #subset
37 |
38 | #lets plot it
39 | library(ggplot2)
40 | library(ggmap)
41 |
42 | map <- get_map(location=c(lon=as.numeric(myloc_sub$longitude),lat=as.numeric(myloc_sub$latitude)), zoom=17)
43 | ggmap(map) +
44 | geom_point(aes(x = longitude, y = latitude), size = 6, col="red", data = myloc)
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/shapefiles/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/.DS_Store
--------------------------------------------------------------------------------
/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.dbf
--------------------------------------------------------------------------------
/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]]
--------------------------------------------------------------------------------
/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shp
--------------------------------------------------------------------------------
/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCommunityCollegeDistricts/geo_export_ffa93779-e8e7-4680-a57c-75b25ae5830c.shx
--------------------------------------------------------------------------------
/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.dbf
--------------------------------------------------------------------------------
/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]]
--------------------------------------------------------------------------------
/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shp
--------------------------------------------------------------------------------
/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/Geocoding-in-R/40a0369f3b29a5874394ffafd793edc7012144ea/shapefiles/AlamedaCountySchools/geo_export_c08c26d7-65c8-4b7f-8675-fac05e9b6dca.shx
--------------------------------------------------------------------------------
/tiger/test_out.txt:
--------------------------------------------------------------------------------
1 | "3","10834 GOLF LINKS RD, Oakland, CA, 94605","Match","Exact","10834 Golf Links Rd, OAKLAND, CA, 94605","-122.12688,37.753845","125011765","L","06","001","409900","4003"
2 | "2","4728 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4728 Scotia Ave, OAKLAND, CA, 94605","-122.125015,37.75487","125011836","R","06","001","409900","4004"
3 | "1","10709 COTTER ST, Oakland, , 94605","Match","Exact","10709 Cotter St, OAKLAND, CA, 94605","-122.12374,37.755764","125011838","R","06","001","409900","4001"
4 | "10","271 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","271 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13144,37.75938","125011739","R","06","001","409900","3019"
5 | "7","4855 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4855 Scotia Ave, OAKLAND, CA, 94605","-122.12387,37.7552","125011839","L","06","001","409900","4001"
6 | "6","111 DONNA WAY, Oakland, CA, 94605","Match","Exact","111 Donna Way, OAKLAND, CA, 94605","-122.13216,37.760204","125011738","L","06","001","409900","3005"
7 | "5","380 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","380 Elysian Fields Dr, OAKLAND, CA, 94605","-122.1282,37.76107","125011702","L","06","001","409900","3005"
8 | "4","4627 GRASS VALLEY RD, Oakland, CA, 94605","Match","Exact","4627 Grass Valley Rd, OAKLAND, CA, 94605","-122.12255,37.75109","617284248","L","06","001","409900","4010"
9 | "9","10520 PEBBLE BEACH DR, Oakland, CA, 94605","Match","Exact","10520 Pebble Beach Dr, OAKLAND, CA, 94605","-122.12747,37.76139","125011703","L","06","001","409900","3015"
10 | "8","248 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","248 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13248,37.759678","125011736","L","06","001","409900","3010"
11 | "11","4840 SHETLAND AVE, Oakland, CA, 94605","Match","Exact","4840 Shetland Ave, OAKLAND, CA, 94605","-122.1222,37.752895","125011864","R","06","001","409900","4010"
12 | "12","24 KEY CT, Oakland, CA, 94605","Match","Exact","24 Key Ct, OAKLAND, CA, 94605","-122.124405,37.75517","125011835","R","06","001","409900","4001"
13 |
--------------------------------------------------------------------------------
/tiger/test_out2.txt:
--------------------------------------------------------------------------------
1 | "3","10834 GOLF LINKS RD, Oakland, CA, 94605","Match","Exact","10834 Golf Links Rd, OAKLAND, CA, 94605","-122.12688,37.753845","125011765","L","06","001","409900","4003"
2 | "2","4728 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4728 Scotia Ave, OAKLAND, CA, 94605","-122.125015,37.75487","125011836","R","06","001","409900","4004"
3 | "1","10709 COTTER ST, Oakland, , 94605","Match","Exact","10709 Cotter St, OAKLAND, CA, 94605","-122.12374,37.755764","125011838","R","06","001","409900","4001"
4 | "10","271 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","271 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13144,37.75938","125011739","R","06","001","409900","3019"
5 | "7","4855 SCOTIA AVE, Oakland, CA, 94605","Match","Exact","4855 Scotia Ave, OAKLAND, CA, 94605","-122.12387,37.7552","125011839","L","06","001","409900","4001"
6 | "6","111 DONNA WAY, Oakland, CA, 94605","Match","Exact","111 Donna Way, OAKLAND, CA, 94605","-122.13216,37.760204","125011738","L","06","001","409900","3005"
7 | "5","380 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","380 Elysian Fields Dr, OAKLAND, CA, 94605","-122.1282,37.76107","125011702","L","06","001","409900","3005"
8 | "4","4627 GRASS VALLEY RD, Oakland, CA, 94605","Match","Exact","4627 Grass Valley Rd, OAKLAND, CA, 94605","-122.12255,37.75109","617284248","L","06","001","409900","4010"
9 | "9","10520 PEBBLE BEACH DR, Oakland, CA, 94605","Match","Exact","10520 Pebble Beach Dr, OAKLAND, CA, 94605","-122.12747,37.76139","125011703","L","06","001","409900","3015"
10 | "8","248 ELYSIAN FIELDS DR, Oakland, CA, 94605","Match","Exact","248 Elysian Fields Dr, OAKLAND, CA, 94605","-122.13248,37.759678","125011736","L","06","001","409900","3010"
11 | "11","4840 SHETLAND AVE, Oakland, CA, 94605","Match","Exact","4840 Shetland Ave, OAKLAND, CA, 94605","-122.1222,37.752895","125011864","R","06","001","409900","4010"
12 | "12","24 KEY CT, Oakland, CA, 94605","Match","Exact","24 Key Ct, OAKLAND, CA, 94605","-122.124405,37.75517","125011835","R","06","001","409900","4001"
13 |
--------------------------------------------------------------------------------
/tiger/tiger_12addresses_to_geocode.csv:
--------------------------------------------------------------------------------
1 | 1,10709 COTTER ST,Oakland,,94605
2 | 2,4728 SCOTIA AVE,Oakland,CA,94605
3 | 3,10834 GOLF LINKS RD,Oakland,CA,94605
4 | 4,4627 GRASS VALLEY RD,Oakland,CA,94605
5 | 5,380 ELYSIAN FIELDS DR,Oakland,CA,94605
6 | 6,111 DONNA WAY,Oakland,CA,94605
7 | 7,4855 SCOTIA AVE,Oakland,CA,94605
8 | 8,248 ELYSIAN FIELDS DR,Oakland,CA,94605
9 | 9,10520 PEBBLE BEACH DR,Oakland,CA,94605
10 | 10,271 ELYSIAN FIELDS DR,Oakland,CA,94605
11 | 11,4840 SHETLAND AVE,Oakland,CA,94605
12 | 12,24 KEY CT,Oakland,CA,94605
13 |
--------------------------------------------------------------------------------