├── Shapefiles ├── ntas.dbf ├── ntas.shp ├── ntas.shx ├── boroughs.dbf ├── boroughs.shp ├── boroughs.shx ├── ntas.prj └── boroughs.prj ├── Data Dictionaries ├── lion_metadata.pdf ├── nybb_metadata.pdf ├── nynta2010_metadata.pdf └── Data_Dictionary_OpenRestaurants.xlsx ├── R Ladies Tutorial - Feb 2022 Meetup.pptx ├── README.md ├── R Ladies Geospatial Tutorial Extras.Rmd ├── R Ladies Geospatial Tutorial_md.Rmd └── .Rhistory /Shapefiles/ntas.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Shapefiles/ntas.dbf -------------------------------------------------------------------------------- /Shapefiles/ntas.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Shapefiles/ntas.shp -------------------------------------------------------------------------------- /Shapefiles/ntas.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Shapefiles/ntas.shx -------------------------------------------------------------------------------- /Shapefiles/boroughs.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Shapefiles/boroughs.dbf -------------------------------------------------------------------------------- /Shapefiles/boroughs.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Shapefiles/boroughs.shp -------------------------------------------------------------------------------- /Shapefiles/boroughs.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Shapefiles/boroughs.shx -------------------------------------------------------------------------------- /Data Dictionaries/lion_metadata.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Data Dictionaries/lion_metadata.pdf -------------------------------------------------------------------------------- /Data Dictionaries/nybb_metadata.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Data Dictionaries/nybb_metadata.pdf -------------------------------------------------------------------------------- /Data Dictionaries/nynta2010_metadata.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Data Dictionaries/nynta2010_metadata.pdf -------------------------------------------------------------------------------- /R Ladies Tutorial - Feb 2022 Meetup.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/R Ladies Tutorial - Feb 2022 Meetup.pptx -------------------------------------------------------------------------------- /Data Dictionaries/Data_Dictionary_OpenRestaurants.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtr13/r-ladies-mapping-tutorial/main/Data Dictionaries/Data_Dictionary_OpenRestaurants.xlsx -------------------------------------------------------------------------------- /Shapefiles/ntas.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]] -------------------------------------------------------------------------------- /Shapefiles/boroughs.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # r-ladies-mapping-tutorial 2 | 3 | This is for the R-Ladies NYC workshop on Thursday, February 24th https://www.meetup.com/rladies-newyork/events/284081788/. 4 | 5 | # Pre Workshop Setup Instructions 6 | 7 | Internet access is preferred for downloading the data, but it will also be available in the repo in the "Shapefiles" folder. 8 | 9 | Please install the following packages from CRAN using `install.packages("package_name")`: 10 | 11 | `install.package("rgdal")` 12 | `install.package("ggplot2")` 13 | `install.package("sf")` 14 | `install.package("data.table")` 15 | `install.package("dplyr")` 16 | `install.package("RColorBrewer")` 17 | `install.package("stringr")` 18 | `install.package("ggthemes")` 19 | `install.package("magrittr")` 20 | `install.package("classInt")` 21 | `install.package("tmap")` 22 | 23 | 24 | 25 | By the end of this presentation, you will be able to import, process, and plot geospatial data in static and dynamic maps. 26 | There are two markdown files: 27 | - main presentation - this will be used during the event 28 | - supplementary content - this contains some other useful spatial tools and custom mapping. This notebook CANNOT be run on its own 29 | 30 | 31 | Supplementary packages: 32 | 33 | `install.package(extrafont)` 34 | `loadfonts(device = "win")` 35 | `install.package(scales)` 36 | `install.package(ggsn)` 37 | -------------------------------------------------------------------------------- /R Ladies Geospatial Tutorial Extras.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Ladies Geospatial Tutorial Extras" 3 | output: html_document 4 | --- 5 | 6 | 7 | ```{r} 8 | library(scales) ## commas in legend 9 | library(extrafont) ##fonts for map 10 | loadfonts(device = "win") ### fonts for map 11 | library(ggsn) ##scalebar 12 | library(rgdal) ###used for projections and importing data 13 | library(ggplot2) ##creating the map 14 | library(sf) ## data is stored as sf dataframes 15 | library(data.table) 16 | library(dplyr) ##data transformations 17 | library(stringr) 18 | ``` 19 | 20 | ```{r} 21 | #### destination for all unzipped folders 22 | zip_dl = "C:/Users/argun/Documents/Shapefiles/NYC/Testing" 23 | 24 | 25 | ##create a temporary folder to unzip (only do this once, not for every file) 26 | temp_folder= paste0(zip_dl, "/temp") 27 | 28 | 29 | ###final folder for all shapefiles 30 | ###create new folder (only do this once, not for every file) 31 | new_dir <- paste0(zip_dl, "/shps") 32 | 33 | ``` 34 | 35 | ### Working With Files from a Geodatabase 36 | 37 | ### We're Going to Work with LION, which is a representation of NYC's Street Network and Other Boundaries 38 | 39 | ### Reading in Files from a Geodatabase 40 | 41 | A geodatabase contains multiple files, so you need to know which file you're reading in. In this case, we only want NYC's road network 42 | 43 | ```{r} 44 | 45 | ##read lion from gdb 46 | download.file("https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/nyclion_21d.zip", 47 | destfile = paste0(zip_dl, "/nyclion_21d.zip"), 48 | mode = "wb") 49 | 50 | unzip(paste0(zip_dl, "/nyclion_21d.zip"), 51 | exdir=new_dir) 52 | 53 | 54 | gdb <- "C:/Users/argun/Documents/Shapefiles/NYC/Testing/shps/lion/lion.gdb" ## make path an object 55 | 56 | ### let's check out what's in the file geodatabase 57 | ogrListLayers(gdb) ### create an object that is a list of layers 58 | 59 | ``` 60 | 61 | ### Based on the metadata, we know LION is the road network 62 | 63 | ```{r} 64 | 65 | lion <- st_read(dsn=gdb,layer="lion") ## read in data 66 | lion_cast <- st_cast(lion, "MULTILINESTRING") 67 | lion_condensed <- lion_cast %>% 68 | ### remove railroads, water edge, census block boundary, paper street 69 | ### district boundary, alley, and ferry route 70 | filter(!FeatureTyp %in% c("1", "2", "3", "5", "7", "8", "A", "F" )) %>% 71 | filter(!RB_Layer %in% c("R", "S", "F")) ## remove roadbed, suppressed, and fake segments 72 | lion_condensed <- st_transform(lion_condensed, 2263) 73 | ``` 74 | ### Checking LION Geometry before st_cast command above 75 | ```{r} 76 | st_geometry_type(lion, by_geometry = FALSE) 77 | ``` 78 | ### LION Geometry after st_cast 79 | ### If you want to perform spatial operations on a sf file, the geometry type needs to be consistent 80 | ```{r} 81 | st_geometry_type(lion_cast, by_geometry = FALSE) 82 | ``` 83 | ```{r} 84 | rm(lion) 85 | rm(lion_cast) 86 | ``` 87 | 88 | ### Read In All Other Data - Process for Downloading boroughs, parks, and ntas is in the main file 89 | ### Here I also added parks properties 90 | 91 | ```{r} 92 | boroughs <- st_transform(st_read(new_dir, "boroughs" ), 2263) 93 | ntas <- st_transform(st_read(new_dir, "ntas"), 2263) 94 | 95 | 96 | ### restaurant data 97 | rest <- fread("C:/Users/argun/Documents/Shapefiles/NYC/Testing/open_restaurants.csv", header = TRUE) 98 | 99 | ###remove rows without lat/long data 100 | rest <- rest[!is.na(rest$Latitude) | !is.na(rest$Longitude), ] 101 | 102 | ###make the imported table in to a sf dataframe 103 | rest_sf <- st_as_sf(rest , coords = c("Longitude", "Latitude")) 104 | 105 | 106 | ###set geographic coordinate system and projected coordinate system 107 | sf_proj <- st_transform(st_set_crs(rest_sf, value = 4326), "EPSG: 2263") 108 | 109 | ##PCS 110 | #sf_proj <- st_transform(sf_proj, "EPSG: 2263") 111 | 112 | ###remove duplicates 113 | rests_sf <- unique(sf_proj, by = "objectid") ## sf version for spatial join 114 | 115 | 116 | ### Parks Properties - Download and import 117 | 118 | ####NTAS, eventually derive parks from this 119 | download.file("https://data.cityofnewyork.us/api/geospatial/enfh-gkve?method=export&format=Shapefile", 120 | destfile = paste0(zip_dl, "/parks_properties.zip"), 121 | mode = "wb") 122 | 123 | unzip(paste0(zip_dl, "/parks_properties.zip"), 124 | exdir=temp_folder) 125 | 126 | old_files <- list.files(temp_folder, pattern = "geo*", full.names = TRUE) 127 | 128 | new_names = gsub("^.*?\\.","parks_properties.",old_files) 129 | 130 | file.copy(from=old_files, to=paste0(new_dir, "/", new_names)) 131 | file.remove(old_files) 132 | list.files(new_dir) 133 | 134 | 135 | parks_properties <- st_transform(st_read(new_dir, "parks_properties"), 2263) 136 | 137 | 138 | ###remove excess sf dataframes 139 | rm(rest) 140 | rm(rest_sf) 141 | rm(sf_proj) 142 | gc() 143 | ``` 144 | 145 | 146 | ### Let's Make a Closeup Map. I picked Crown Heights North 147 | ### In order to get the proper boundaries, you need the bounding box of the neighborhood; the bounding box defines the coordinates of the neighborhood 148 | 149 | ```{r} 150 | 151 | bounding_box = st_bbox(ntas[ntas$ntaname=="Crown Heights North",]) 152 | 153 | ####data for zoomed in map 154 | ntas_cropped <- st_crop(ntas, bounding_box) 155 | rest_cropped <- st_intersection(rests_sf, ntas_cropped) 156 | streets_cropped <- st_intersection(lion_condensed, ntas_cropped) 157 | parks_properties_cropped <- st_intersection(parks_properties, ntas_cropped) %>% 158 | ###remove excess parks area like the green space on Eastern Parkway 159 | subset(!typecatego %in% c("Mall", "Parkway")) 160 | ``` 161 | 162 | ### What does this look like on a map? 163 | 164 | ```{r} 165 | ggplot()+ 166 | geom_sf(data=ntas_cropped, fill="white")+ 167 | geom_sf(data=parks_properties_cropped, fill="#a1c690", color="lightgrey")+ 168 | geom_sf(data = streets_cropped, color="#f2f2f2", size=.005)+ 169 | geom_sf(data=rest_cropped, color="red", size=.2)+ 170 | theme(panel.background = element_rect(fill = "white"), 171 | axis.ticks = element_blank(), 172 | axis.text = element_blank(), 173 | panel.grid = element_line(color = "white", size = 0.8)) 174 | ``` 175 | 176 | 177 | 178 | 179 | ### Watch What Happens When you Try and Add Street Labels...chaos! 180 | 181 | ### This is because each street is not one long line in lion. Each segment between two intersections is a record in the data, so when every segment is labeled, chaos ensues. 182 | 183 | ```{r} 184 | ggplot()+ 185 | geom_sf(data=ntas_cropped, fill="white")+ 186 | geom_sf(data=parks_properties_cropped, fill="#a1c690", color="lightgrey")+ 187 | geom_sf(data = streets_cropped, color="#f2f2f2", size=.005)+ 188 | geom_sf_text(data=streets_cropped, 189 | aes(label=Street, family="serif"), colour = "black", size=2)+ 190 | geom_sf(data=rest_cropped, color="red", size=.2)+ 191 | theme(panel.background = element_rect(fill = "white"), 192 | axis.ticks = element_blank(), 193 | axis.text = element_blank(), 194 | panel.grid = element_line(color = "white", size = 0.8)) 195 | ``` 196 | 197 | ### Processing for Labeling Roads 198 | 199 | ###Each section of the road between two intersections is a separate line in the data. Therefore, there are several lines that represent each street 200 | 201 | ### The spatial function st_union() allows us to merge these lines based on a column. Now each street name is represented by a single row 202 | 203 | ```{r} 204 | ###union roads for the purpose of labeling 205 | roads.out <- streets_cropped %>% 206 | group_by(Street) %>% 207 | summarize(geometry = st_union(SHAPE)) 208 | ``` 209 | 210 | ### In order to make sure the label appears in the right place on the map, you need to manually adjust the the x/y coordinates of the label using geom_sf_text(). This is a very tedious process; the main drawback of using R as a geospatial tool is that labeling is time-consuming. It took me several tries to find a place where the label was sitting close enough to the street without overlapping any points. 211 | 212 | ```{r} 213 | ggplot() + 214 | geom_sf(data=streets_cropped, aes(color="streets"), show.legend = "line", size=.005)+ 215 | geom_sf(data=parks_properties_cropped, fill="#a1c690", color=NA)+ 216 | geom_sf_text(data=roads.out[roads.out$Street=="ATLANTIC AVENUE",], 217 | aes(label=Street, family="serif"), colour = "black", size=2, angle=-5 218 | ,nudge_x=1600, nudge_y=-35)+ 219 | geom_sf_text(data=roads.out[roads.out$Street=="FRANKLIN AVENUE",], 220 | aes(label=Street, family="serif"), colour = "black", size=2, 221 | angle=80 , 222 | nudge_x = -70, nudge_y = 350)+ 223 | geom_sf_text(data=roads.out[roads.out$Street=="EASTERN PARKWAY",], 224 | aes(label=Street, family="serif"), colour = "black", size=2, angle=-5, 225 | nudge_x = 5000, nudge_y=-750)+ 226 | geom_sf(data=rest_cropped, aes(color="Restaurant Permits"), show.legend = "point", size=1.2)+ 227 | theme(axis.text.x = element_blank(), 228 | axis.text.y = element_blank(), 229 | axis.ticks = element_blank(), 230 | rect = element_blank(), 231 | panel.background = element_blank(), 232 | plot.background = element_rect(fill = "white"), 233 | plot.title= element_text(colour="black", size = 20, face = "bold", hjust=0.5,), 234 | legend.position = "right") + 235 | theme(legend.title = element_blank(), 236 | legend.spacing.y = unit(0, "mm"), 237 | panel.border = element_rect(colour = "black", fill=NA), 238 | aspect.ratio = 1, axis.text = element_text(colour = 1, size = 12), 239 | legend.background = element_blank(), 240 | legend.box.background = element_rect(colour = "black"))+ 241 | labs(title = "Open Restaurant Permits \n Crown Heights North")+ 242 | xlab("") + 243 | ylab("")+ 244 | scale_colour_manual(values = c("Restaurant Permits" = "red", "streets"="darkgrey"), 245 | guide = guide_legend(override.aes = list(linetype = c("blank", "solid"), 246 | shape=c(16, NA))))+ 247 | scalebar(ntas_cropped, dist = .5, dist_unit = "mi", location="bottomleft", 248 | transform = FALSE, model = "WGS84") 249 | 250 | ``` 251 | 252 | 253 | 254 | -------------------------------------------------------------------------------- /R Ladies Geospatial Tutorial_md.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: R Ladies Geospatial Tutorial_md.ipynb" 3 | output: html_document 4 | --- 5 | 6 | ```{r} 7 | library(rgdal) ###used for projections and importing data 8 | library(ggplot2) ##creating the map 9 | library(sf) ## data is stored as sf dataframes 10 | library(data.table) 11 | library(dplyr) ##data transformations 12 | library(RColorBrewer) #color palettes 13 | library(stringr) ##string manipulation when filtering dataframes 14 | library(ggthemes) ##map design 15 | library(magrittr) ##pipes 16 | library(classInt) ##intervals for legend 17 | library(grDevices) ### added for export to PDF 18 | library(tmap) ###interactive mapping 19 | ``` 20 | 21 | ### Importing Shapefile Data, Projecting, and Converting to R SF format 22 | 23 | There are two ways you can download data: 24 | 25 | 1. Manually download 26 | 27 | 2. Get URL for download and do all processing in R 28 | 29 | 30 | ### Since the file names are so long and complicated, the process will be as follows: 31 | 32 | - download the file 33 | 34 | - unzip 35 | 36 | - create a subfolder to process and rename the files in 37 | 38 | - rename the file 39 | 40 | - create a final folder where the processed and renamed files will go 41 | 42 | #### First, Let's Create the Directions We'll Be Downloading the Data in To, Cleaning the Data In, 43 | ### and then the Final Folder 44 | 45 | ```{r} 46 | #### destination for all unzipped folders 47 | 48 | zip_dl = "C:/Users/AyanthiGunawardana/Documents/r-ladies-mapping-tutorial-main/Testing" 49 | 50 | 51 | ##create a temporary folder to unzip (only do this once, not for every file) 52 | ###note that every folder here is already on my hard drive EXCEPT the folder at the end of string. if the entire string is a series of new folders, you need to add recursive=TRUE 53 | temp_folder= paste0(zip_dl, "/temp") 54 | dir.create(temp_folder) 55 | 56 | ###final folder for all shapefiles 57 | ###create new folder (only do this once, not for every file) 58 | new_dir <- paste0(zip_dl, "/shps") 59 | dir.create(new_dir) 60 | 61 | 62 | 63 | 64 | ``` 65 | 66 | ### Boroughs - Download 67 | 68 | ```{r} 69 | 70 | 71 | ###the open data shapefiles don't have clear names, so we need to do some pre-processing 72 | ###download the data (these are the borough boundaries) 73 | download.file("https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=Shapefile", 74 | destfile = paste0(zip_dl, "/boroughs.zip"), 75 | mode = "wb") 76 | 77 | ##unzip the folder 78 | unzip(paste0(zip_dl, "/boroughs.zip"), 79 | exdir=temp_folder) 80 | 81 | ###we know all the downloads have the file pattern of "geo" with several characters after 82 | old_files <- list.files(temp_folder, full.names = TRUE) 83 | old_files 84 | utils::browseURL(temp_folder) 85 | 86 | ``` 87 | 88 | ### Boroughs - Cleaning the Files 89 | 90 | ```{r} 91 | # rename the download 92 | new_names = gsub("^.*?\\.","boroughs.",old_files) 93 | 94 | 95 | 96 | ##move unzipped and clean files to a new folder 97 | file.copy(from=old_files, to=paste0(new_dir, "/", new_names)) 98 | file.remove(old_files) 99 | 100 | 101 | ###let's check out the new files 102 | new_files <- list.files(new_dir) 103 | new_files 104 | utils::browseURL(new_dir) 105 | 106 | ``` 107 | 108 | 109 | ### NYC Neighborhoods (NTAs) 110 | #### note: we are going to use the parks subset of this dataset eventually 111 | 112 | ```{r} 113 | ####NTAS, eventually derive parks from this 114 | download.file("https://data.cityofnewyork.us/api/geospatial/d3qk-pfyz?method=export&format=Shapefile", 115 | destfile = paste0(zip_dl, "/ntas.zip"), 116 | mode = "wb") 117 | 118 | unzip(paste0(zip_dl, "/ntas.zip"), 119 | exdir=temp_folder) 120 | 121 | old_files <- list.files(temp_folder, pattern = "geo*", full.names = TRUE) 122 | 123 | new_names = gsub("^.*?\\.","ntas.",old_files) 124 | 125 | file.copy(from=old_files, to=paste0(new_dir, "/", new_names)) 126 | file.remove(old_files) 127 | list.files(new_dir) 128 | ``` 129 | 130 | ### Read in All Those Shapefiles 131 | 132 | ```{r} 133 | ###read in and project shapefiles to 2263 134 | 135 | boroughs <- st_transform(st_read(new_dir, "boroughs" ), 2263) 136 | ntas <- st_transform(st_read(new_dir, "ntas"), 2263) 137 | parks = ntas %>% filter(str_detect(ntaname, paste0("park", collapse = "|"))) 138 | gc() 139 | ``` 140 | 141 | ### Working With CSVs 142 | 143 | #### csvs require a different type of processing; shapefiles are inherent spatial, excel files need to be converted 144 | #### since csvs are not downloaded in a zip file, there is less processing involved 145 | 146 | ### Open Restaurant Applications - Download 147 | 148 | ```{r} 149 | ###download the csv 150 | 151 | download.file("https://data.cityofnewyork.us/api/views/pitm-atqc/rows.csv?accessType=DOWNLOAD&bom=true&format=true", 152 | destfile = paste0(zip_dl, "/open_restaurants.csv"), 153 | mode = "wb") 154 | 155 | ``` 156 | 157 | ### Import and Process the Data 158 | 159 | ```{r} 160 | ### import the csv 161 | rest <- fread(paste0(zip_dl, "/open_restaurants.csv"), header = TRUE) 162 | colnames(rest) 163 | ``` 164 | 165 | ```{r} 166 | ###remove rows without lat/long data 167 | rest <- rest[!is.na(rest$Latitude) | !is.na(rest$Longitude), ] 168 | 169 | ###make the imported table in to a sf dataframe 170 | rest_sf <- st_as_sf(rest , coords = c("Longitude", "Latitude")) 171 | 172 | ### remove old file 173 | rm(rest) 174 | colnames(rest_sf) 175 | ``` 176 | 177 | 178 | 179 | ### We've made it spatial, but let's check the coordinate system and see what it looks like plotted 180 | 181 | ```{r} 182 | ##check coordinate system 183 | st_crs(rest_sf) 184 | ``` 185 | 186 | ```{r} 187 | ### now let's see what it looks like 188 | ###uncomment boroughs to show that you cannot have two datasets with conflicting projections 189 | ggplot()+ 190 | geom_sf(data = rest_sf, color="black", size=.8)#+ 191 | #geom_sf(data = boroughs, color="lightgrey") 192 | ``` 193 | 194 | ### Why does the above data look distorted? Because we haven't assigned a coordinate system. Let's do that now 195 | 196 | ```{r} 197 | st_crs(rest_sf)$IsGeographic ##to check is the CRS is geographic or not 198 | st_crs(rest_sf)$units_gdal ##to find out the CRS units 199 | st_crs(rest_sf)$srid ##extracts its ‘SRID’ identifier (when available) 200 | st_crs(rest_sf)$proj4string ##extracts the proj4string representation 201 | ``` 202 | 203 | 204 | ### Let's See What Happens When We Set the Coordinate Reference System 205 | 206 | ```{r} 207 | ### set coordinate reference system 208 | ###World Geodetic System, 99% of GPS systems use this, I have yet to use data that doesn't 209 | sf_proj <- st_set_crs(rest_sf, value = 4326) 210 | ggplot()+ 211 | geom_sf(data = sf_proj, color="black", size=.8) 212 | 213 | ``` 214 | 215 | ### No Longer Distorted, But We Can't Measure Distances Without a Projection 216 | 217 | ```{r} 218 | st_crs(sf_proj)$units_gdal ##to find out the CRS units 219 | ``` 220 | 221 | ### In Order to Properly Measure Distances, We Need to Project the Data to the Correct Geographic Area 222 | 223 | ```{r} 224 | ### set projection 225 | sf_proj <- st_transform(sf_proj, "EPSG: 2263") 226 | 227 | ##check units 228 | st_crs(sf_proj)$units_gdal ##to find out the CRS units 229 | st_crs(sf_proj)$srid ##extracts its ‘SRID’ identifier (when available) 230 | st_crs(sf_proj)$proj4string ##extracts the proj4string representation 231 | rm(rest_sf) 232 | ``` 233 | 234 | ## Now That The Geospatial Processing is Done, We Can Clean the Data as Needed 235 | 236 | ### Check for duplicates 237 | 238 | ```{r} 239 | sf_proj %>% 240 | group_by(objectid) %>% 241 | filter(n()>1) 242 | ``` 243 | 244 | ### Remove Duplicates 245 | 246 | ```{r} 247 | rests_sf <- unique(sf_proj, by = "objectid") ## sf version for spatial join 248 | rm(sf_proj) 249 | gc() 250 | 251 | ``` 252 | 253 | 254 | 255 | 256 | ### Create a simple map using ggplot2 syntax 257 | 258 | How do you think this design can be made better? 259 | 260 | - add context via parks (explains the gaps) 261 | 262 | ```{r} 263 | ggplot()+ 264 | geom_sf(data=boroughs, fill=NA, color="black", lwd=.01)+ 265 | geom_sf(data=ntas, fill=NA, color="black", lwd=.002)+ 266 | geom_sf(data=parks, fill="#a1c690", color="darkgrey")+ 267 | geom_sf(data=rests_sf, color="red", size=.2, shape=16)+ 268 | theme(panel.background = element_rect(fill = "white"), 269 | axis.ticks = element_blank(), 270 | axis.text = element_blank(), 271 | panel.grid = element_line(color = "white", size = 0.8), 272 | plot.title= element_text(colour="black", size = 20, face = "bold", hjust=0.5))+ 273 | labs(title = "Open Restaurant Permits \n New York City", 274 | caption = "Source: NYC Open Data, Retrieved on 2/22/2022") 275 | ``` 276 | 277 | The above map tells shows us the distribution of restaurants across NYC. But what does this mean to the average person? After a certain critical mass of points, you can't visually tell where there are more or less points, or how relevant those points are to the analysis you're trying to do 278 | 279 | ### We can make what's called a choropleth map, which aggregates points to a geometry and shows meaningful patterns 280 | 281 | 282 | ### Restaurants Grouped by NTA 283 | 284 | There are two ways we can do this: 285 | 286 | 1. since the restaurants file has a NTA column we can use dplyr to get the counts by NTA 287 | 288 | 2. use a spatial join to count the number of points in polygons 289 | 290 | 291 | ### Note: You CANNOT use dplyr for grouping with spatial objects; even if you group by one column, it automatically also groups by the geometry of each point as well 292 | 293 | 294 | ### Spatial Join Method 295 | 296 | Add a column to the ntas data with the count of restaurants. 297 | 298 | ```{r} 299 | ntas$count_restaurants <- lengths(st_intersects(ntas, rests_sf)) 300 | head(ntas) 301 | ``` 302 | 303 | 304 | ### Normalize the Data and Remove Neighborhoods That Are Outliers 305 | 306 | ```{r} 307 | ###normalize counts 308 | nta_remove = c("cemetery", "Airport") ###list of outliers 309 | airports = c("Airport") 310 | 311 | 312 | nta_rest <- ntas %>% 313 | filter(!grepl(paste(nta_remove, collapse="|"), ntaname)) %>% ##remove outliers 314 | mutate(rest_sqmi= count_restaurants/(shape_area/27878400)) ###get restaurants per square mile 315 | 316 | 317 | 318 | ### keep the airports as a separate df 319 | airports <- ntas %>% 320 | filter(grepl(paste(airports, collapse="|"), ntaname)) 321 | 322 | ``` 323 | 324 | 325 | ### Create a choropleth map from this 326 | 327 | 328 | We don't want every value in restaurants/sq mile to be a separate value, it doesn't tell us much 329 | 330 | ```{r} 331 | ggplot(nta_rest) + 332 | geom_sf(aes(fill=rest_sqmi))+ 333 | scale_fill_viridis_c(option = "D", na.value = "darkgrey") + 334 | geom_sf(data=airports, fill="darkgrey")+ 335 | geom_sf(data=parks, fill="#a1c690", color="darkgrey")+ 336 | theme(panel.background = element_rect(fill = "white"), 337 | axis.ticks = element_blank(), 338 | axis.text = element_blank(), 339 | panel.grid = element_line(color = "white", size = 0.8), 340 | plot.title= element_text(colour="black", size = 24, face = "bold", hjust=0.5,))+ 341 | labs(title = "Open Restaurant Permits Per Square Mile \n New York City") 342 | ``` 343 | 344 | 345 | ```{r} 346 | nta_rest[which(is.na(nta_rest$rest_sqmi)),] 347 | nta_rest[nta_rest$rest_sqmi==0,] 348 | ``` 349 | 350 | ### In Order to Better Represent this Data, Let's Add Quantile Breaks 351 | 352 | ```{r} 353 | ### remove NTAs with no restaurants 354 | nta_no_rest <- nta_rest %>% 355 | filter(rest_sqmi==0) 356 | 357 | nta_rest <- nta_rest %>% 358 | filter(rest_sqmi > 0) 359 | 360 | 361 | ### use the "cut" function to add a breaks column in your sf object 362 | nta_rest <- mutate(nta_rest, brks = cut(nta_rest$rest_sqmi, quantile(nta_rest$rest_sqmi, probs = seq(0, 1, .2)), include.lowest = TRUE, dig.lab = 4)) 363 | 364 | ###results include the upper bound but NOT the lower bound! 365 | ###let's look at the breaks 366 | unique(nta_rest$brks) 367 | ``` 368 | 369 | ```{r} 370 | ##map colors 371 | brewer.pal(n=5,"YlOrRd") 372 | colors <- c("#FFFFB2", "#FECC5C", "#FD8D3C", "#F03B20", "#BD0026", "#A1C690", "darkgrey", "black") 373 | 374 | ``` 375 | 376 | ### To Properly Label Each Element, we need to manually assign each interval to a color 377 | ```{r} 378 | 379 | ggplot() + 380 | geom_sf(data=nta_rest, aes(fill=brks)) + 381 | geom_sf(data=parks, aes(fill="Parks/Cemeteries"))+ 382 | geom_sf(data=nta_no_rest, aes(fill="No Restaurants"))+ 383 | geom_sf(data=airports, aes(fill="Airports"))+ 384 | theme(panel.background = element_rect(fill = "white"), 385 | axis.ticks = element_blank(), 386 | axis.text = element_blank(), 387 | panel.grid = element_line(color = "white", size = 0.8), 388 | legend.position = c(.2,.8), 389 | plot.title= element_text(colour="black", size = 18, face = "bold", hjust=0.5,))+ 390 | scale_fill_manual("Legend", 391 | values=colors, 392 | breaks=c(levels(nta_rest$brks), 'Parks/Cemeteries', 'No Restaurants', 'Airports'))+ 393 | labs(title = "Open Restaurant Permits \nPer Square Mile \nby Neighborhood Tabulation Area", 394 | caption=paste0("source: NYC Open Data, Retrieved on ", Sys.Date())) 395 | 396 | ``` 397 | 398 | ### Save this as a pdf using the code below 399 | 400 | ```{r} 401 | ggsave("test_map_rest.pdf", 402 | plot=last_plot(), 403 | width=8.5, height=11, units="in", 404 | dpi=1200) 405 | 406 | ``` 407 | 408 | ```{r} 409 | #dev.off() 410 | ``` 411 | 412 | ###Same map in TMAP: easier syntax for quintiles 413 | 414 | ```{r} 415 | tmap_mode("plot")+ 416 | tm_shape(nta_rest) + 417 | tm_polygons("rest_sqmi", 418 | style="quantile" 419 | ,legend.format = list(text.separator="-", fun = function(x) formatC(x, digits = 2, big.mark = ",", format = "f")), title="" 420 | )+ 421 | tm_shape(nta_no_rest)+ 422 | tm_polygons(col = "darkgrey") + 423 | tm_shape(airports)+ 424 | tm_polygons(col="black")+ 425 | tm_shape(parks)+ 426 | tm_polygons(col="#a1c690", border.col = "darkgrey")+ 427 | ##custom legend start 428 | #tm_add_legend( 429 | # type = "fill", 430 | # labels = c(levels(nta_rest$brks), 'Parks/Cemeteries', 'No Restaurants', 'Airports'), 431 | # col = colors 432 | #) + 433 | ###custom legend end 434 | ###legend and title placement 435 | tm_layout(legend.position = c("left", "top"), 436 | title= "New York City\nOpen Restaurant Permits \nPer Square Mile", 437 | title.size = 1, 438 | title.position = c('left', 'top'))+ 439 | ###source 440 | tm_credits(paste0("Source: NYC Open Data, Retrieved On ", Sys.Date()), 441 | position = c("RIGHT", "BOTTOM")) 442 | 443 | ``` 444 | 445 | ###Interactive Map 446 | 447 | 448 | ```{r} 449 | tmap_mode("view") 450 | tm_shape(ntas)+ 451 | tm_polygons(col="white", alpha = .25)+ 452 | tm_shape(rests_sf)+ 453 | tm_symbols(shape=21, col="red", size = .0005, 454 | popup.vars=c("Restaurant Name"="Restaurant Name", "Seating Interest (Sidewalk/Roadway/Both)"="Seating Interest (Sidewalk/Roadway/Both)")) 455 | ``` 456 | -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | geom_sf(data=ntas, aes(fill="Neighborhood Boundaries"), color="black", lwd=.002)+ 2 | #geom_sf(data=parks, fill="#a1c690", color="darkgrey")+ 3 | geom_sf(data=parks_properties, aes(fill="Parks"), lwd=.002)+ 4 | #geom_sf(data = lion_condensed, color="#f2f2f2", aes(color="Street"), size=.001)+ 5 | geom_sf(data=rests_sf, aes(colour="Open Restaurant Permits"), color="red", size=.2, show.legend = "point", shape=16)+ 6 | theme(panel.background = element_rect(fill = "white"), 7 | axis.ticks = element_blank(), 8 | axis.text = element_blank(), 9 | panel.grid = element_line(color = "white", size = 0.8), 10 | plot.title= element_text(colour="black", size = 24, face = "bold", hjust=0.5,), 11 | legend.position = "left", 12 | legend.spacing.y = unit(0, 'cm'))+ 13 | ###legend for polygons 14 | scale_fill_manual(values = c("Parks" = "#a1c690", "Neighborhood Boundaries"="white"), name="Legend", 15 | guide = guide_legend(override.aes = list(linetype = c("solid", "solid"), 16 | shape = c(NA, NA)))) + 17 | ###legend for points 18 | scale_color_manual(values = c("Open Restaurant Permits" = "red"), name = NULL, 19 | guide=guide_legend(override.aes = list(shape = c(16), 20 | size=2))) + 21 | labs(title = "Open Restaurant Permits \n New York City", 22 | caption = "Source: NYC Open Data, Retrieved on 2/22/2022") 23 | library(rgdal) ###used for projections and importing data 24 | library(ggplot2) ##creating the map 25 | library(sf) ## data is stored as sf dataframes 26 | library(data.table) 27 | library(dplyr) ##data transformations 28 | library(RColorBrewer) #color palettes 29 | library(stringr) ##string manipulation when filtering dataframes 30 | library(ggthemes) ##map design 31 | library(magrittr) ##pipes 32 | library(classInt) ##intervals for legend 33 | library(grDevices) ### added for export to PDF 34 | library(tmap) ###interactive mapping 35 | #### destination for all unzipped folders 36 | zip_dl = "C:/Users/argun/Documents/Shapefiles/NYC/Testing" 37 | ##create a temporary folder to unzip (only do this once, not for every file) 38 | temp_folder= paste0(zip_dl, "/temp") 39 | dir.create(temp_folder) 40 | ###final folder for all shapefiles 41 | ###create new folder (only do this once, not for every file) 42 | new_dir <- "C:/Users/argun/Documents/Shapefiles/NYC/Testing/shps" 43 | dir.create(new_dir) 44 | ###the open data shapefiles don't have clear names, so we need to do some pre-processing 45 | ###download the data (these are the borough boundaries) 46 | download.file("https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=Shapefile", 47 | destfile = paste0(zip_dl, "/boroughs.zip"), 48 | mode = "wb") 49 | ##unzip the folder 50 | unzip("C:/Users/argun/Documents/Shapefiles/NYC/Testing/boroughs.zip", 51 | exdir=temp_folder) 52 | ###we know all the downloads have the file pattern of "geo" with several characters after 53 | old_files <- list.files(temp_folder, full.names = TRUE) 54 | old_files 55 | # rename the download 56 | new_names = gsub("^.*?\\.","boroughs.",old_files) 57 | ##move unzipped and clean files to a new folder 58 | file.copy(from=old_files, to=paste0(new_dir, "/", new_names)) 59 | file.remove(old_files) 60 | ###let's check out the new files 61 | list.files(new_dir) 62 | ####NTAS, eventually derive parks from this 63 | download.file("https://data.cityofnewyork.us/api/geospatial/d3qk-pfyz?method=export&format=Shapefile", 64 | destfile = paste0(zip_dl, "/ntas.zip"), 65 | mode = "wb") 66 | unzip(paste0(zip_dl, "/ntas.zip"), 67 | exdir=temp_folder) 68 | old_files <- list.files(temp_folder, pattern = "geo*", full.names = TRUE) 69 | new_names = gsub("^.*?\\.","ntas.",old_files) 70 | file.copy(from=old_files, to=paste0(new_dir, "/", new_names)) 71 | file.remove(old_files) 72 | list.files(new_dir) 73 | ###read in and project shapefiles to 2263 74 | boroughs <- st_transform(st_read(new_dir, "boroughs" ), 2263) 75 | ntas <- st_transform(st_read(new_dir, "ntas"), 2263) 76 | parks = ntas %>% filter(str_detect(ntaname, paste0("park", collapse = "|"))) 77 | gc() 78 | ###download the csv 79 | download.file("https://data.cityofnewyork.us/api/views/pitm-atqc/rows.csv?accessType=DOWNLOAD&bom=true&format=true", 80 | destfile = "C:/Users/argun/Documents/Shapefiles/NYC/Testing/open_restaurants.csv", 81 | mode = "wb") 82 | ### import the csv 83 | rest <- fread("C:/Users/argun/Documents/Shapefiles/NYC/Testing/open_restaurants.csv", header = TRUE) 84 | colnames(rest) 85 | ###remove rows without lat/long data 86 | rest <- rest[!is.na(rest$Latitude) | !is.na(rest$Longitude), ] 87 | ###make the imported table in to a sf dataframe 88 | rest_sf <- st_as_sf(rest , coords = c("Longitude", "Latitude")) 89 | ### remove old file 90 | rm(rest) 91 | colnames(rest_sf) 92 | rest_sf$geometry 93 | ##check coordinate system 94 | st_crs(rest_sf) 95 | ### now let's see what it looks like 96 | ###uncomment boroughs to show that you cannot have two datasets with conflicting projections 97 | ggplot()+ 98 | geom_sf(data = rest_sf, color="black", size=.8)#+ 99 | #geom_sf(data = boroughs, color="lightgrey") 100 | st_crs(rest_sf)$IsGeographic ##to check is the CRS is geographic or not 101 | st_crs(rest_sf)$units_gdal ##to find out the CRS units 102 | st_crs(rest_sf)$srid ##extracts its ‘SRID’ identifier (when available) 103 | st_crs(rest_sf)$proj4string ##extracts the proj4string representation 104 | ### set coordinate reference system 105 | ###World Geodetic System, 99% of GPS systems use this, I have yet to use data that doesn't 106 | sf_proj <- st_set_crs(rest_sf, value = 4326) 107 | ggplot()+ 108 | geom_sf(data = sf_proj, color="black", size=.8) 109 | st_crs(sf_proj)$units_gdal ##to find out the CRS units 110 | ### set projection 111 | sf_proj <- st_transform(sf_proj, "EPSG: 2263") 112 | ##check units 113 | st_crs(sf_proj)$units_gdal ##to find out the CRS units 114 | st_crs(sf_proj)$srid ##extracts its ‘SRID’ identifier (when available) 115 | st_crs(sf_proj)$proj4string ##extracts the proj4string representation 116 | rm(rest_sf) 117 | sf_proj %>% 118 | group_by(objectid) %>% 119 | filter(n()>1) 120 | rests_sf <- unique(sf_proj, by = "objectid") ## sf version for spatial join 121 | rm(sf_proj) 122 | gc() 123 | ggplot()+ 124 | geom_sf(data=boroughs, fill=NA, color="black", lwd=.01)+ 125 | geom_sf(data=ntas, fill=NA, color="black", lwd=.002)+ 126 | geom_sf(data=parks, fill="#a1c690", color="darkgrey")+ 127 | geom_sf(data=rests_sf, color="red", size=.2, shape=16)+ 128 | theme(panel.background = element_rect(fill = "white"), 129 | axis.ticks = element_blank(), 130 | axis.text = element_blank(), 131 | panel.grid = element_line(color = "white", size = 0.8), 132 | plot.title= element_text(colour="black", size = 20, face = "bold", hjust=0.5))+ 133 | labs(title = "Open Restaurant Permits \n New York City", 134 | caption = "Source: NYC Open Data, Retrieved on 2/22/2022") 135 | ntas$count_restaurants <- lengths(st_intersects(ntas, rests_sf)) 136 | head(ntas) 137 | ###normalize counts 138 | nta_remove = c("cemetery", "Airport") ###list of outliers 139 | airports = c("Airport") 140 | nta_rest <- ntas %>% 141 | filter(!grepl(paste(nta_remove, collapse="|"), ntaname)) %>% ##remove outliers 142 | mutate(rest_sqmi= count_restaurants/(shape_area/27878400)) ###get restaurants per square mile 143 | ### keep the airports as a separate df 144 | airports <- ntas %>% 145 | filter(grepl(paste(airports, collapse="|"), ntaname)) 146 | ggplot(nta_rest) + 147 | geom_sf(aes(fill=rest_sqmi))+ 148 | scale_fill_viridis_c(option = "D", na.value = "darkgrey") + 149 | geom_sf(data=airports, fill="darkgrey")+ 150 | geom_sf(data=parks, fill="#a1c690", color="darkgrey")+ 151 | theme(panel.background = element_rect(fill = "white"), 152 | axis.ticks = element_blank(), 153 | axis.text = element_blank(), 154 | panel.grid = element_line(color = "white", size = 0.8), 155 | plot.title= element_text(colour="black", size = 24, face = "bold", hjust=0.5,))+ 156 | labs(title = "Open Restaurant Permits Per Square Mile \n New York City") 157 | nta_rest[nta_rest$rest_sqmi==0,] 158 | ### remove NTAs with no restaurants 159 | nta_no_rest <- nta_rest %>% 160 | filter(rest_sqmi==0) 161 | nta_rest <- nta_rest %>% 162 | filter(rest_sqmi > 0) 163 | # get quantile breaks. 164 | breaks_qt <- classIntervals(c(min(nta_rest$rest_sqmi), nta_rest$rest_sqmi), n = 5, style = "quantile") 165 | ### use the "cut" function to add a breaks column in your sf object 166 | nta_rest <- mutate(nta_rest, brks = cut(rest_sqmi, breaks_qt$brks, include.lowest = TRUE,dig.lab=4)) 167 | ###results include the upper bound but NOT the lower bound! 168 | breaks_qt 169 | ##map colors 170 | brewer.pal(n=5,"YlOrRd") 171 | ggplot() + 172 | geom_sf(data=nta_rest, aes(fill=brks)) + 173 | geom_sf(data=parks, aes(fill="Parks/Cemeteries"))+ 174 | geom_sf(data=nta_no_rest, aes(fill="No Restaurants"))+ 175 | geom_sf(data=airports, aes(fill="Airports"))+ 176 | scale_fill_manual("Legend", 177 | values=c('[0.4253,4.546]'='#FFFFB2', 178 | '(4.546,13.56]'='#FECC5C', 179 | '(13.56,31.94]'='#FD8D3C', 180 | '(31.94,93.01]'='#F03B20', 181 | '(93.01,1359]'='#BD0026', 182 | 'Parks/Cemeteries'='#a1c690', 183 | 'No Restaurants'="darkgrey", 184 | 'Airports'="black"), 185 | breaks=c('[0.4253,4.546]','(4.546,13.56]','(13.56,31.94]', 186 | '(31.94,93.01]','(93.01,1359]', 'Parks/Cemeteries', 'No Restaurants', 'Airports'), 187 | labels=c('[0.4253,4.546]','(4.546,13.56]','(13.56,31.94]', 188 | '(31.94,93.01]','(93.01,1359]', 'Parks/Cemeteries', 'No Restaurants','Airports'), 189 | guide = guide_legend(override.aes = list(linetype = c("solid", "solid", "solid","solid","solid", "solid", "solid", "solid"), shape = c(NA, NA,NA,NA,NA, NA, NA, NA))))+ 190 | labs(title = "Open Restaurant Permits \nPer Square Mile \nby Neigborhood Tabulation Area", 191 | caption="source: NYC Open Data, Retrieved on 2/24/2022") 192 | tmap_mode("plot")+ 193 | tm_shape(nta_rest) + 194 | tm_polygons("rest_sqmi", 195 | style="quantile", 196 | legend.format = list(text.separator="-", fun = function(x) formatC(x, digits = 2, big.mark = ",", format = "f")) , 197 | title="New York City\nOpen Restaurant Permits \nper Square Mile")+ 198 | tm_shape(nta_no_rest)+ 199 | tm_polygons(col = "darkgrey") + 200 | tm_shape(airports)+ 201 | tm_polygons(col="black")+ 202 | tm_shape(parks)+ 203 | tm_polygons(col="#a1c690", border.col = "darkgrey") 204 | tmap_mode("view") 205 | #tm_shape(ntas)+ 206 | # tm_polygons(col="white", alpha = .5)+ 207 | tmap_leaflet(tm_shape(nta_rest)+ 208 | tm_symbols(shape=21, col="red", size = .0005, 209 | popup.vars=c("Restaurant Name"="Restaurant Name", "Seating Interest (Sidewalk/Roadway/Both)"="Seating Interest (Sidewalk/Roadway/Both)"))) 210 | tmap_mode("view") 211 | #tm_shape(ntas)+ 212 | # tm_polygons(col="white", alpha = .5)+ 213 | tmap_leaflet(tm_shape(nta_rest)+ 214 | tm_symbols(shape=21, col="red", size = .0005, 215 | popup.vars=c("Restaurant Name"="Restaurant Name", "Seating Interest (Sidewalk/Roadway/Both)"="Seating Interest (Sidewalk/Roadway/Both)"))) 216 | tmap_mode("view") 217 | tm_shape(ntas)+ 218 | tm_polygons(col="white", alpha = .5)+ 219 | tmap_leaflet(tm_shape(nta_rest)+ 220 | tm_symbols(shape=21, col="red", size = .0005, 221 | popup.vars=c("Restaurant Name"="Restaurant Name", "Seating Interest (Sidewalk/Roadway/Both)"="Seating Interest (Sidewalk/Roadway/Both)"))) 222 | tmap_mode("view") 223 | tm_shape(ntas)+ 224 | tm_polygons(col="white", alpha = .5)+ 225 | tm_shape(nta_rest)+ 226 | tm_symbols(shape=21, col="red", size = .0005, 227 | popup.vars=c("Restaurant Name"="Restaurant Name", "Seating Interest (Sidewalk/Roadway/Both)"="Seating Interest (Sidewalk/Roadway/Both)")) 228 | colnames(nta_rest) 229 | tmap_mode("view") 230 | tm_shape(ntas)+ 231 | tm_polygons(col="white", alpha = .5)+ 232 | tm_shape(rests_sf)+ 233 | tm_symbols(shape=21, col="red", size = .0005, 234 | popup.vars=c("Restaurant Name"="Restaurant Name", "Seating Interest (Sidewalk/Roadway/Both)"="Seating Interest (Sidewalk/Roadway/Both)")) 235 | library(scales) ## commas in legend 236 | library(extrafont) ##fonts for map 237 | loadfonts(device = "win") ### fonts for map 238 | library(ggsn) ##scalebar 239 | library(rgdal) ###used for projections and importing data 240 | library(ggplot2) ##creating the map 241 | library(sf) ## data is stored as sf dataframes 242 | library(data.table) 243 | library(dplyr) ##data transformations 244 | #### destination for all unzipped folders 245 | zip_dl = "C:/Users/argun/Documents/Shapefiles/NYC/Testing" 246 | ##create a temporary folder to unzip (only do this once, not for every file) 247 | temp_folder= paste0(zip_dl, "/temp") 248 | ###final folder for all shapefiles 249 | ###create new folder (only do this once, not for every file) 250 | new_dir <- paste0(zip_dl, "/shps") 251 | ##read lion from gdb 252 | download.file("https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/nyclion_21d.zip", 253 | destfile = paste0(zip_dl, "/nyclion_21d.zip"), 254 | mode = "wb") 255 | unzip(paste0(zip_dl, "/nyclion_21d.zip"), 256 | exdir=new_dir) 257 | gdb <- "C:/Users/argun/Documents/Shapefiles/NYC/Testing/shps/lion/lion.gdb" ## make path an object 258 | ### let's check out what's in the file geodatabase 259 | ogrListLayers(gdb) ### create an object that is a list of layers 260 | lion <- st_read(dsn=gdb,layer="lion") ## read in data 261 | lion_cast <- st_cast(lion, "MULTILINESTRING") 262 | lion_condensed <- lion_cast %>% 263 | ### remove railroads, water edge, census block boundary, paper street 264 | ### district boundary, alley, and ferry route 265 | filter(!FeatureTyp %in% c("1", "2", "3", "5", "7", "8", "A", "F" )) %>% 266 | filter(!RB_Layer %in% c("R", "S", "F")) ## remove roadbed, suppressed, and fake segments 267 | lion_condensed <- st_transform(lion_condensed, 2263) 268 | st_geometry_type(lion, by_geometry = FALSE) 269 | st_geometry_type(lion_cast, by_geometry = FALSE) 270 | boroughs <- st_transform(st_read(new_dir, "boroughs" ), 2263) 271 | ntas <- st_transform(st_read(new_dir, "ntas"), 2263) 272 | parks = ntas %>% filter(str_detect(ntaname, paste0("park", collapse = "|"))) 273 | gc() 274 | library(scales) ## commas in legend 275 | library(extrafont) ##fonts for map 276 | loadfonts(device = "win") ### fonts for map 277 | library(ggsn) ##scalebar 278 | library(rgdal) ###used for projections and importing data 279 | library(ggplot2) ##creating the map 280 | library(sf) ## data is stored as sf dataframes 281 | library(data.table) 282 | library(dplyr) ##data transformations 283 | library(stringr) 284 | boroughs <- st_transform(st_read(new_dir, "boroughs" ), 2263) 285 | ntas <- st_transform(st_read(new_dir, "ntas"), 2263) 286 | parks = ntas %>% filter(str_detect(ntaname, paste0("park", collapse = "|"))) 287 | gc() 288 | boroughs <- st_transform(st_read(new_dir, "boroughs" ), 2263) 289 | ntas <- st_transform(st_read(new_dir, "ntas"), 2263) 290 | parks = ntas %>% filter(str_detect(ntaname, paste0("park", collapse = "|"))) 291 | ### restaurant data 292 | rest <- fread("C:/Users/argun/Documents/Shapefiles/NYC/Testing/open_restaurants.csv", header = TRUE) 293 | ###remove rows without lat/long data 294 | rest <- rest[!is.na(rest$Latitude) | !is.na(rest$Longitude), ] 295 | ###make the imported table in to a sf dataframe 296 | rest_sf <- st_as_sf(rest , coords = c("Longitude", "Latitude")) 297 | ###set geographic coordinate system and projected coordinate system 298 | sf_proj <- st_transform(st_set_crs(rest_sf, value = 4326), "EPSG: 2263") 299 | ##PCS 300 | #sf_proj <- st_transform(sf_proj, "EPSG: 2263") 301 | ###remove duplicates 302 | rests_sf <- unique(sf_proj, by = "objectid") ## sf version for spatial join 303 | ###remove excess sf dataframes 304 | rm(rest) 305 | rm(rest_sf) 306 | rm(sf_proj) 307 | gc() 308 | ####data for zoomed in map 309 | ntas_cropped <- st_crop(ntas, xmin = 994462.6, xmax = 1005982.5, 310 | ymin = 181378.1, ymax = 188278.7) 311 | rest_cropped <- st_intersection(sf_proj, ntas_cropped) 312 | streets_cropped <- st_intersection(lion_condensed, ntas_cropped) 313 | parks_properties_cropped <- st_intersection(parks_properties, ntas_cropped) %>% 314 | subset(!landuse %in% c("Mall", "Parkway")) 315 | boroughs <- st_transform(st_read(new_dir, "boroughs" ), 2263) 316 | ntas <- st_transform(st_read(new_dir, "ntas"), 2263) 317 | ### restaurant data 318 | rest <- fread("C:/Users/argun/Documents/Shapefiles/NYC/Testing/open_restaurants.csv", header = TRUE) 319 | ###remove rows without lat/long data 320 | rest <- rest[!is.na(rest$Latitude) | !is.na(rest$Longitude), ] 321 | ###make the imported table in to a sf dataframe 322 | rest_sf <- st_as_sf(rest , coords = c("Longitude", "Latitude")) 323 | ###set geographic coordinate system and projected coordinate system 324 | sf_proj <- st_transform(st_set_crs(rest_sf, value = 4326), "EPSG: 2263") 325 | ##PCS 326 | #sf_proj <- st_transform(sf_proj, "EPSG: 2263") 327 | ###remove duplicates 328 | rests_sf <- unique(sf_proj, by = "objectid") ## sf version for spatial join 329 | ### Parks Properties - Download and import 330 | ####NTAS, eventually derive parks from this 331 | download.file("https://data.cityofnewyork.us/api/geospatial/enfh-gkve?method=export&format=Shapefile", 332 | destfile = paste0(zip_dl, "/parks_properties.zip"), 333 | mode = "wb") 334 | unzip(paste0(zip_dl, "/parks_properties.zip"), 335 | exdir=temp_folder) 336 | old_files <- list.files(temp_folder, pattern = "geo*", full.names = TRUE) 337 | new_names = gsub("^.*?\\.","parks_properties.",old_files) 338 | file.copy(from=old_files, to=paste0(new_dir, "/", new_names)) 339 | file.remove(old_files) 340 | list.files(new_dir) 341 | parks_properties <- st_transform(st_read(new_dir, "parks_properties"), 2263) 342 | ###remove excess sf dataframes 343 | rm(rest) 344 | rm(rest_sf) 345 | rm(sf_proj) 346 | gc() 347 | rm(parks) 348 | ####data for zoomed in map 349 | ntas_cropped <- st_crop(ntas, xmin = 994462.6, xmax = 1005982.5, 350 | ymin = 181378.1, ymax = 188278.7) 351 | rest_cropped <- st_intersection(sf_proj, ntas_cropped) 352 | streets_cropped <- st_intersection(lion_condensed, ntas_cropped) 353 | parks_properties_cropped <- st_intersection(parks_properties, ntas_cropped) %>% 354 | subset(!landuse %in% c("Mall", "Parkway")) 355 | ggplot()+ 356 | geom_sf(data=ntas_cropped, fill="white")+ 357 | #geom_sf(data=parks_cropped, fill="#a1c690", color="lightgrey")+ 358 | geom_sf(data = streets_cropped, color="#f2f2f2", size=.005)+ 359 | geom_sf(data=rest_cropped, color="red", size=.2)+ 360 | theme(panel.background = element_rect(fill = "white"), 361 | axis.ticks = element_blank(), 362 | axis.text = element_blank(), 363 | panel.grid = element_line(color = "white", size = 0.8)) 364 | ####data for zoomed in map 365 | ntas_cropped <- st_crop(ntas, xmin = 994462.6, xmax = 1005982.5, 366 | ymin = 181378.1, ymax = 188278.7) 367 | rest_cropped <- st_intersection(rests_sf, ntas_cropped) 368 | streets_cropped <- st_intersection(lion_condensed, ntas_cropped) 369 | parks_properties_cropped <- st_intersection(parks_properties, ntas_cropped) %>% 370 | subset(!landuse %in% c("Mall", "Parkway")) 371 | View(parks_properties) 372 | View(parks_properties) 373 | View(parks_properties) 374 | unique(parks_properties$typecatego) 375 | ####data for zoomed in map 376 | ntas_cropped <- st_crop(ntas, xmin = 994462.6, xmax = 1005982.5, 377 | ymin = 181378.1, ymax = 188278.7) 378 | rest_cropped <- st_intersection(rests_sf, ntas_cropped) 379 | streets_cropped <- st_intersection(lion_condensed, ntas_cropped) 380 | parks_properties_cropped <- st_intersection(parks_properties, ntas_cropped) %>% 381 | ###remove excess parks area like the green space on Eastern Parkway 382 | subset(!typecatego %in% c("Mall", "Parkway")) 383 | View(ntas) 384 | st_bbox(ntas[ntas$ntaname=="Crown Heights North"]) 385 | str(ntas) 386 | st_bbox(ntas[ntas$ntaname=="Crown Heights North",]) 387 | bounding_box = st_bbox(ntas[ntas$ntaname=="Crown Heights North"]) 388 | ####data for zoomed in map 389 | ntas_cropped <- st_crop(ntas, bounding_box) 390 | rest_cropped <- st_intersection(rests_sf, ntas_cropped) 391 | streets_cropped <- st_intersection(lion_condensed, ntas_cropped) 392 | parks_properties_cropped <- st_intersection(parks_properties, ntas_cropped) %>% 393 | ###remove excess parks area like the green space on Eastern Parkway 394 | subset(!typecatego %in% c("Mall", "Parkway")) 395 | bounding_box = st_bbox(ntas[ntas$ntaname=="Crown Heights North",]) 396 | ####data for zoomed in map 397 | ntas_cropped <- st_crop(ntas, bounding_box) 398 | rest_cropped <- st_intersection(rests_sf, ntas_cropped) 399 | streets_cropped <- st_intersection(lion_condensed, ntas_cropped) 400 | parks_properties_cropped <- st_intersection(parks_properties, ntas_cropped) %>% 401 | ###remove excess parks area like the green space on Eastern Parkway 402 | subset(!typecatego %in% c("Mall", "Parkway")) 403 | ggplot()+ 404 | geom_sf(data=ntas_cropped, fill="white")+ 405 | geom_sf(data=parks_properties_cropped, fill="#a1c690", color="lightgrey")+ 406 | geom_sf(data = streets_cropped, color="#f2f2f2", size=.005)+ 407 | geom_sf(data=rest_cropped, color="red", size=.2)+ 408 | theme(panel.background = element_rect(fill = "white"), 409 | axis.ticks = element_blank(), 410 | axis.text = element_blank(), 411 | panel.grid = element_line(color = "white", size = 0.8)) 412 | ggplot()+ 413 | geom_sf(data=ntas_cropped, fill="white")+ 414 | geom_sf(data=parks_cropped, fill="#a1c690", color="lightgrey")+ 415 | geom_sf(data = streets_cropped, color="#f2f2f2", size=.005)+ 416 | geom_sf_text(data=streets_cropped, 417 | aes(label=Street, family="serif"), colour = "black", size=2)+ 418 | geom_sf(data=rest_cropped, color="red", size=.2)+ 419 | theme(panel.background = element_rect(fill = "white"), 420 | axis.ticks = element_blank(), 421 | axis.text = element_blank(), 422 | panel.grid = element_line(color = "white", size = 0.8)) 423 | ggplot()+ 424 | geom_sf(data=ntas_cropped, fill="white")+ 425 | geom_sf(data=parks_properties_cropped, fill="#a1c690", color="lightgrey")+ 426 | geom_sf(data = streets_cropped, color="#f2f2f2", size=.005)+ 427 | geom_sf_text(data=streets_cropped, 428 | aes(label=Street, family="serif"), colour = "black", size=2)+ 429 | geom_sf(data=rest_cropped, color="red", size=.2)+ 430 | theme(panel.background = element_rect(fill = "white"), 431 | axis.ticks = element_blank(), 432 | axis.text = element_blank(), 433 | panel.grid = element_line(color = "white", size = 0.8)) 434 | ###union roads for the purpose of labeling 435 | roads.out <- streets_cropped %>% 436 | group_by(Street) %>% 437 | summarize(geometry = st_union(SHAPE)) 438 | #g <- 439 | ggplot() + 440 | geom_sf(data=streets_cropped, aes(color="streets"), show.legend = "line", size=.005)+ 441 | #geom_sf(data = ntas_cropped, fill=NA, color="black")+ 442 | geom_sf(data=parks_properties_cropped, fill="#a1c690", color=NA)+ 443 | geom_sf_text(data=roads.out[roads.out$Street=="ATLANTIC AVENUE",], 444 | aes(label=Street, family="serif"), colour = "black", size=2, angle=-5 445 | ,nudge_x=1600, nudge_y=-35)+ 446 | geom_sf_text(data=roads.out[roads.out$Street=="FRANKLIN AVENUE",], 447 | aes(label=Street, family="serif"), colour = "black", size=2, 448 | angle=80 , 449 | nudge_x = -70, nudge_y = 350)+ 450 | geom_sf_text(data=roads.out[roads.out$Street=="EASTERN PARKWAY",], 451 | aes(label=Street, family="serif"), colour = "black", size=2, angle=-5, 452 | nudge_x = 5000, nudge_y=-750)+ 453 | geom_sf(data=rest_cropped, aes(color="Restaurants"), show.legend = "point", size=1.2)+ 454 | theme(axis.text.x = element_blank(), 455 | axis.text.y = element_blank(), 456 | axis.ticks = element_blank(), 457 | rect = element_blank(), 458 | panel.background = element_blank(), 459 | plot.background = element_rect(fill = "white"),#, color = "grey20"),size = 2), 460 | plot.title= element_text(colour="black", size = 24, face = "bold", hjust=0.5,), 461 | legend.position = "right") + 462 | theme(legend.title = element_blank(), 463 | legend.spacing.y = unit(0, "mm"), 464 | panel.border = element_rect(colour = "black", fill=NA), 465 | aspect.ratio = 1, axis.text = element_text(colour = 1, size = 12), 466 | legend.background = element_blank(), 467 | legend.box.background = element_rect(colour = "black"))+ 468 | labs(title = "Open Restaurant Permits \n Crown Heights North")+ 469 | xlab("") + 470 | ylab("")+ 471 | scale_colour_manual(values = c("Restaurants" = "red", "streets"="darkgrey"), 472 | guide = guide_legend(override.aes = list(linetype = c("blank", "solid"), 473 | shape=c(16, NA))))+ 474 | scalebar(ntas_cropped, dist = .5, dist_unit = "mi", location="bottomleft", 475 | transform = FALSE, model = "WGS84") 476 | #g <- 477 | ggplot() + 478 | geom_sf(data=streets_cropped, aes(color="streets"), show.legend = "line", size=.005)+ 479 | geom_sf(data=parks_properties_cropped, fill="#a1c690", color=NA)+ 480 | geom_sf_text(data=roads.out[roads.out$Street=="ATLANTIC AVENUE",], 481 | aes(label=Street, family="serif"), colour = "black", size=2, angle=-5 482 | ,nudge_x=1600, nudge_y=-35)+ 483 | geom_sf_text(data=roads.out[roads.out$Street=="FRANKLIN AVENUE",], 484 | aes(label=Street, family="serif"), colour = "black", size=2, 485 | angle=80 , 486 | nudge_x = -70, nudge_y = 350)+ 487 | geom_sf_text(data=roads.out[roads.out$Street=="EASTERN PARKWAY",], 488 | aes(label=Street, family="serif"), colour = "black", size=2, angle=-5, 489 | nudge_x = 5000, nudge_y=-750)+ 490 | geom_sf(data=rest_cropped, aes(color="Restaurant Permits"), show.legend = "point", size=1.2)+ 491 | theme(axis.text.x = element_blank(), 492 | axis.text.y = element_blank(), 493 | axis.ticks = element_blank(), 494 | rect = element_blank(), 495 | panel.background = element_blank(), 496 | plot.background = element_rect(fill = "white"), 497 | plot.title= element_text(colour="black", size = 20, face = "bold", hjust=0.5,), 498 | legend.position = "right") + 499 | theme(legend.title = element_blank(), 500 | legend.spacing.y = unit(0, "mm"), 501 | panel.border = element_rect(colour = "black", fill=NA), 502 | aspect.ratio = 1, axis.text = element_text(colour = 1, size = 12), 503 | legend.background = element_blank(), 504 | legend.box.background = element_rect(colour = "black"))+ 505 | labs(title = "Open Restaurant Permits \n Crown Heights North")+ 506 | xlab("") + 507 | ylab("")+ 508 | scale_colour_manual(values = c("Restaurant Permits" = "red", "streets"="darkgrey"), 509 | guide = guide_legend(override.aes = list(linetype = c("blank", "solid"), 510 | shape=c(16, NA))))+ 511 | scalebar(ntas_cropped, dist = .5, dist_unit = "mi", location="bottomleft", 512 | transform = FALSE, model = "WGS84") 513 | --------------------------------------------------------------------------------