├── tests ├── testthat.R └── testthat │ └── test-that.R ├── inst └── extdata │ ├── gangs.kml.zip │ └── states.kml.zip ├── man ├── kml_folders.Rd ├── kml_finalize.Rd ├── kml_info.Rd ├── kml_size.Rd ├── kml_placemarks.Rd ├── tidykml.Rd ├── kml_element.Rd ├── kml_elements.Rd ├── gangs.Rd ├── states.Rd ├── kml_bounds.Rd ├── kml_read.Rd ├── kml_points.Rd ├── kml_lines.Rd ├── kml_coords.Rd └── kml_polygons.Rd ├── R ├── tidykml.R ├── data-gangs.R ├── data-states.R ├── kml_info.R ├── kml_bounds.R ├── kml_read.R ├── kml_lines.R ├── kml_points.R ├── kml_coords.R ├── kml_polygons.R └── internals.R ├── NAMESPACE ├── DESCRIPTION ├── NEWS.md └── README.md /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(tidykml) 3 | 4 | test_check("tidykml") 5 | -------------------------------------------------------------------------------- /inst/extdata/gangs.kml.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/briatte/tidykml/HEAD/inst/extdata/gangs.kml.zip -------------------------------------------------------------------------------- /inst/extdata/states.kml.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/briatte/tidykml/HEAD/inst/extdata/states.kml.zip -------------------------------------------------------------------------------- /man/kml_folders.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internals.R 3 | \name{kml_folders} 4 | \alias{kml_folders} 5 | \title{Extract KML Folders.} 6 | \usage{ 7 | kml_folders(x, ns = "d1") 8 | } 9 | \arguments{ 10 | \item{x}{An XML document.} 11 | 12 | \item{ns}{The name of the namespace to extract from; defaults to \code{"d1"}.} 13 | } 14 | \value{ 15 | A nodeset of Folders. 16 | } 17 | \description{ 18 | Extract KML Folders. 19 | } 20 | \seealso{ 21 | Google Developers. KML Reference: Element. 22 | \url{https://developers.google.com/kml/documentation/kmlreference#folder} 23 | } 24 | \keyword{internal} 25 | 26 | -------------------------------------------------------------------------------- /R/tidykml.R: -------------------------------------------------------------------------------- 1 | #' Functions to turn KML (Keyhole Markup Language) files into tidy data frames. 2 | #' 3 | #' The \code{tidykml} package reads selected elements and values from KML files, 4 | #' such as those produced by Google My Maps, and turns them into tidy data 5 | #' frames, intended for use with packages like \link[dplyr:dplyr]{dplyr} and 6 | #' \link[ggplot2:ggplot2]{ggplot2}. 7 | #' 8 | #' See the README file of the package for further details and a few examples: 9 | #' \url{https://github.com/briatte/tidykml} 10 | #' @seealso Google Developers. KML Reference. 11 | #' \url{https://developers.google.com/kml/documentation/kmlreference} 12 | #' @docType package 13 | #' @name tidykml 14 | NULL 15 | -------------------------------------------------------------------------------- /man/kml_finalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internals.R 3 | \name{kml_finalize} 4 | \alias{kml_finalize} 5 | \title{Finalize a KML tidy data frame} 6 | \usage{ 7 | kml_finalize(x, folders, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{The KML data frame to tidy.} 11 | 12 | \item{folders}{The number of folders in the data frame.} 13 | 14 | \item{verbose}{Whether to report invalid coordinates and/or negative 15 | altitudes (below sea level); defaults to \code{TRUE}.} 16 | } 17 | \description{ 18 | Reads the coordinates out of the \code{coordinates} variable, checks them, 19 | and returns the data. 20 | } 21 | \keyword{internal} 22 | 23 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(kml_bounds) 4 | export(kml_coords) 5 | export(kml_info) 6 | export(kml_lines) 7 | export(kml_points) 8 | export(kml_polygons) 9 | export(kml_read) 10 | importFrom(dplyr,bind_rows) 11 | importFrom(dplyr,data_frame) 12 | importFrom(stringr,"%>%") 13 | importFrom(stringr,str_c) 14 | importFrom(stringr,str_count) 15 | importFrom(stringr,str_detect) 16 | importFrom(stringr,str_extract) 17 | importFrom(stringr,str_length) 18 | importFrom(stringr,str_replace) 19 | importFrom(stringr,str_split) 20 | importFrom(stringr,str_trim) 21 | importFrom(utils,unzip) 22 | importFrom(xml2,read_xml) 23 | importFrom(xml2,xml_find_all) 24 | importFrom(xml2,xml_find_first) 25 | importFrom(xml2,xml_name) 26 | importFrom(xml2,xml_ns) 27 | importFrom(xml2,xml_text) 28 | -------------------------------------------------------------------------------- /man/kml_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kml_info.R 3 | \name{kml_info} 4 | \alias{kml_info} 5 | \title{Find the elements of a KML file.} 6 | \usage{ 7 | kml_info(x, ns = "d1") 8 | } 9 | \arguments{ 10 | \item{x}{A KML source. See \link{kml_read}.} 11 | 12 | \item{ns}{The name of the namespace to extract from: defaults to \code{"d1"}.} 13 | } 14 | \value{ 15 | A named numeric vector of five elements corresponding to the number 16 | of Folders, Placemarks, LineStrings, Points and Polygons in the source. 17 | } 18 | \description{ 19 | Find the elements of a KML file. 20 | } 21 | \examples{ 22 | # demo data: U.S. Civil War map 23 | # see ?states for details 24 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 25 | kml_info(f) 26 | } 27 | 28 | -------------------------------------------------------------------------------- /man/kml_size.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internals.R 3 | \name{kml_size} 4 | \alias{kml_size} 5 | \title{Find the number of coordinates in a KML file.} 6 | \usage{ 7 | kml_size(x, ns = "d1") 8 | } 9 | \arguments{ 10 | \item{x}{A KML source. See \link{kml_read}.} 11 | 12 | \item{ns}{The name of the namespace to extract from; defaults to \code{"d1"}.} 13 | } 14 | \value{ 15 | A named numeric vector of three elements containing the total number 16 | of coordinates, the total number of coordinates found in 17 | elements (outer polygon boundaries), and the total number of coordinates 18 | found in elements (inner polygon boundaries). 19 | } 20 | \description{ 21 | Find the number of coordinates in a KML file. 22 | } 23 | \keyword{internal} 24 | 25 | -------------------------------------------------------------------------------- /man/kml_placemarks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internals.R 3 | \name{kml_placemarks} 4 | \alias{kml_placemarks} 5 | \title{Extract KML Placemarks containing a specific Geometry.} 6 | \usage{ 7 | kml_placemarks(x, geometry, ns = "d1") 8 | } 9 | \arguments{ 10 | \item{x}{A nodeset of Folders.} 11 | 12 | \item{geometry}{The name of the Geometry to subset on, e.g. \code{"Point"}.} 13 | 14 | \item{ns}{The name of the namespace to extract from; defaults to \code{"d1"}.} 15 | } 16 | \value{ 17 | A nodeset of Placemarks. 18 | } 19 | \description{ 20 | Extract KML Placemarks containing a specific Geometry. 21 | } 22 | \seealso{ 23 | Google Developers. KML Reference: Element. 24 | \url{https://developers.google.com/kml/documentation/kmlreference#placemark} 25 | } 26 | \keyword{internal} 27 | 28 | -------------------------------------------------------------------------------- /R/data-gangs.R: -------------------------------------------------------------------------------- 1 | #' Map of Non-Hispanic Gangs in South Los Angeles (2016) 2 | #' 3 | #' This map shows the non-Hispanic gangs present in South Los Angeles as of 4 | #' December 2016. 5 | #' 6 | #' The map comes from Google My Maps website and has been used to illustrate the 7 | #' Wikipedia entry for the service. 8 | #' 9 | #' The precise identity of the author of the map could not be determined. 10 | #' @source Anonymous. Gangs of Los Angeles (2016). \url{https://goo.gl/7Ar1Aa} 11 | #' (Google My Maps, accessed 30 December 2016). 12 | #' @seealso LA Hood Maps. \emph{Instagram}. 13 | #' \url{https://www.instagram.com/la_hood_maps/} (accessed 30 December 2016). 14 | #' @docType data 15 | #' @name gangs 16 | #' @format Zipped KML file. 17 | #' @examples 18 | #' f <- system.file("extdata", "gangs.kml.zip", package = "tidykml") 19 | #' kml_polygons(f) 20 | NULL -------------------------------------------------------------------------------- /man/tidykml.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidykml.R 3 | \docType{package} 4 | \name{tidykml} 5 | \alias{tidykml} 6 | \alias{tidykml-package} 7 | \title{Functions to turn KML (Keyhole Markup Language) files into tidy data frames.} 8 | \description{ 9 | The \code{tidykml} package reads selected elements and values from KML files, 10 | such as those produced by Google My Maps, and turns them into tidy data 11 | frames, intended for use with packages like \link[dplyr:dplyr]{dplyr} and 12 | \link[ggplot2:ggplot2]{ggplot2}. 13 | } 14 | \details{ 15 | See the README file of the package for further details and a few examples: 16 | \url{https://github.com/briatte/tidykml} 17 | } 18 | \seealso{ 19 | Google Developers. KML Reference. 20 | \url{https://developers.google.com/kml/documentation/kmlreference} 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: tidykml 2 | Title: Turn KML Files Into Tidy Data Frames 3 | Version: 0.1.2 4 | Authors@R: person("Francois", "Briatte", email = "f.briatte@gmail.com", role = c("aut", "cre")) 5 | Description: This package reads selected elements and values from KML files, 6 | such as those produced by Google My Maps, and turns them into tidy data 7 | frames, intended for use with packages such as 'dplyr' or 'ggplot2'. 8 | URL: https://github.com/briatte/tidykml 9 | BugReports: https://github.com/briatte/tidykml/issues 10 | Depends: 11 | R (>= 3.3.2) 12 | Imports: 13 | dplyr, 14 | stringr, 15 | xml2 16 | Suggests: 17 | ggmap, 18 | ggplot2, 19 | httr, 20 | testthat 21 | License: GPL-3 22 | Encoding: UTF-8 23 | LazyData: true 24 | Author: Francois Briatte 25 | Maintainer: Francois Briatte 26 | RoxygenNote: 5.0.1 27 | -------------------------------------------------------------------------------- /man/kml_element.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internals.R 3 | \name{kml_element} 4 | \alias{kml_element} 5 | \title{Extract a single KML element from a Placemark.} 6 | \usage{ 7 | kml_element(x, element, ns = "d1") 8 | } 9 | \arguments{ 10 | \item{x}{A nodeset of Placemarks.} 11 | 12 | \item{element}{The name of the element to extract, e.g. \code{"name"}.} 13 | 14 | \item{ns}{The name of the namespace to extract from; defaults to \code{"d1"}.} 15 | } 16 | \value{ 17 | A character vector holding the text of the element. 18 | Missing values, i.e. empty elements, will be returned as \code{NA} values. 19 | } 20 | \description{ 21 | Extract a single KML element from a Placemark. 22 | } 23 | \seealso{ 24 | Google Developers. KML Reference: Element. 25 | \url{https://developers.google.com/kml/documentation/kmlreference#placemark} 26 | } 27 | \keyword{internal} 28 | 29 | -------------------------------------------------------------------------------- /man/kml_elements.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internals.R 3 | \name{kml_elements} 4 | \alias{kml_elements} 5 | \title{Extract multiple KML elements from a Placemark.} 6 | \usage{ 7 | kml_elements(x, element, ns = "d1") 8 | } 9 | \arguments{ 10 | \item{x}{A nodeset of Placemarks.} 11 | 12 | \item{element}{The name of the element to extract, e.g. \code{"name"}.} 13 | 14 | \item{ns}{The name of the namespace to extract from; defaults to \code{"d1"}.} 15 | } 16 | \value{ 17 | A character vector holding the text of the element. 18 | Missing values, i.e. empty elements, will be returned as \code{NA} values. 19 | } 20 | \description{ 21 | Extract multiple KML elements from a Placemark. 22 | } 23 | \seealso{ 24 | Google Developers. KML Reference: Element. 25 | \url{https://developers.google.com/kml/documentation/kmlreference#placemark} 26 | } 27 | \keyword{internal} 28 | 29 | -------------------------------------------------------------------------------- /man/gangs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-gangs.R 3 | \docType{data} 4 | \name{gangs} 5 | \alias{gangs} 6 | \title{Map of Non-Hispanic Gangs in South Los Angeles (2016)} 7 | \format{Zipped KML file.} 8 | \source{ 9 | Anonymous. Gangs of Los Angeles (2016). \url{https://goo.gl/7Ar1Aa} 10 | (Google My Maps, accessed 30 December 2016). 11 | } 12 | \description{ 13 | This map shows the non-Hispanic gangs present in South Los Angeles as of 14 | December 2016. 15 | } 16 | \details{ 17 | The map comes from Google My Maps website and has been used to illustrate the 18 | Wikipedia entry for the service. 19 | 20 | The precise identity of the author of the map could not be determined. 21 | } 22 | \examples{ 23 | f <- system.file("extdata", "gangs.kml.zip", package = "tidykml") 24 | kml_polygons(f) 25 | } 26 | \seealso{ 27 | LA Hood Maps. \emph{Instagram}. 28 | \url{https://www.instagram.com/la_hood_maps/} (accessed 30 December 2016). 29 | } 30 | 31 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | tidykml 0.1.2 (2017-01-01) 2 | -------------------------- 3 | 4 | CHANGES 5 | 6 | * Option to read data directly from Google My Maps. 7 | 8 | * Option to force kml_polygons to fuse multiple-geometry polygons together. This 9 | option is experimental, and is highly likely to return erroneous geometries. 10 | 11 | * More detailed internal functions. 12 | 13 | 14 | tidykml 0.1.1 (2016-12-31) 15 | -------------------------- 16 | 17 | This version has been tested against GADM (gadm.org) files -- with very mixed 18 | success: maps with inner boundaries will not render appropriately, and detailed 19 | maps will take a long time to process. 20 | 21 | FIXES 22 | 23 | * Deal with whitespace in elements. 24 | 25 | CHANGES 26 | 27 | * Support for files with no element(s). 28 | 29 | * Support for KMZ sources. 30 | 31 | 32 | tidykml 0.1.0 (2016-12-30) 33 | -------------------------- 34 | 35 | First release. 36 | 37 | This version has been tested against KML files from Google My Maps. 38 | -------------------------------------------------------------------------------- /R/data-states.R: -------------------------------------------------------------------------------- 1 | #' Map of U.S. Civil War 2 | #' 3 | #' This map shows the major battles of the U.S. Civil War, the states engaged in 4 | #' the conflict and their status as of 1863, and the itinerary of Union General 5 | #' William T. Sherman's forces from Atlanta to Savannah, Georgia, in 1864. 6 | #' 7 | #' The map comes from Google My Maps website and has been used to illustrate the 8 | #' Wikipedia entry for the service. 9 | #' 10 | #' The precise identity of the author of the map could not be determined. 11 | #' @source Anonymous. US Civil War. \url{https://goo.gl/rezvty} 12 | #' (Google My Maps, accessed 30 December 2016). 13 | #' @seealso Wikipedia. Google My Maps. \emph{Wikipedia, The Free Encyclopedia}. 14 | #' \url{https://en.wikipedia.org/wiki/Google_My_Maps} (revision 732328417, 15 | #' accessed 30 December 2016). 16 | #' @docType data 17 | #' @name states 18 | #' @format Zipped KML file. 19 | #' @examples 20 | #' f <- system.file("extdata", "states.kml.zip", package = "tidykml") 21 | #' kml_points(f) 22 | NULL -------------------------------------------------------------------------------- /R/kml_info.R: -------------------------------------------------------------------------------- 1 | #' Find the elements of a KML file. 2 | #' 3 | #' @param x A KML source. See \link{kml_read}. 4 | #' @param ns The name of the namespace to extract from: defaults to \code{"d1"}. 5 | #' @return A named numeric vector of five elements corresponding to the number 6 | #' of Folders, Placemarks, LineStrings, Points and Polygons in the source. 7 | #' @examples 8 | #' # demo data: U.S. Civil War map 9 | #' # see ?states for details 10 | #' f <- system.file("extdata", "states.kml.zip", package = "tidykml") 11 | #' kml_info(f) 12 | #' @importFrom stringr %>% str_c 13 | #' @importFrom xml2 xml_find_all 14 | #' @export 15 | kml_info <- function(x, ns = "d1") { 16 | 17 | x <- kml_read(x) 18 | 19 | c( 20 | sapply(c("Folder", "Placemark"), function(y) { 21 | xml_find_all(x, str_c("//", ns, ":", y)) %>% 22 | length 23 | }), 24 | sapply(c("LineString", "Point", "Polygon"), function(y) { 25 | xml_find_all(x, str_c("//", ns, ":", y, "//", ns, ":coordinates")) %>% 26 | length 27 | }) 28 | ) 29 | 30 | } 31 | -------------------------------------------------------------------------------- /man/states.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-states.R 3 | \docType{data} 4 | \name{states} 5 | \alias{states} 6 | \title{Map of U.S. Civil War} 7 | \format{Zipped KML file.} 8 | \source{ 9 | Anonymous. US Civil War. \url{https://goo.gl/rezvty} 10 | (Google My Maps, accessed 30 December 2016). 11 | } 12 | \description{ 13 | This map shows the major battles of the U.S. Civil War, the states engaged in 14 | the conflict and their status as of 1863, and the itinerary of Union General 15 | William T. Sherman's forces from Atlanta to Savannah, Georgia, in 1864. 16 | } 17 | \details{ 18 | The map comes from Google My Maps website and has been used to illustrate the 19 | Wikipedia entry for the service. 20 | 21 | The precise identity of the author of the map could not be determined. 22 | } 23 | \examples{ 24 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 25 | kml_points(f) 26 | } 27 | \seealso{ 28 | Wikipedia. Google My Maps. \emph{Wikipedia, The Free Encyclopedia}. 29 | \url{https://en.wikipedia.org/wiki/Google_My_Maps} (revision 732328417, 30 | accessed 30 December 2016). 31 | } 32 | 33 | -------------------------------------------------------------------------------- /man/kml_bounds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kml_bounds.R 3 | \name{kml_bounds} 4 | \alias{kml_bounds} 5 | \title{Find the boundaries of a KML file.} 6 | \usage{ 7 | kml_bounds(x, ns = "d1", verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{A KML source. See \link{kml_read}. \code{x} can also be a data frame 11 | with two numeric variables named \code{longitude} and \code{latitude}.} 12 | 13 | \item{ns}{The name of the namespace to extract from: defaults to \code{"d1"}.} 14 | 15 | \item{verbose}{Whether to report invalid coordinates and/or altitudes below 16 | sea level; defaults to \code{TRUE}. See \link{kml_coords}.} 17 | } 18 | \value{ 19 | A named numeric vector of four elements corresponding to the 20 | left, bottom, right and top values of the bounding box; 'left' and 'right' 21 | are the minimal and maximal longitudes; 'bottom' and 'top' are the minimal 22 | and maximal latitudes. 23 | } 24 | \description{ 25 | Find the boundaries of a KML file. 26 | } 27 | \examples{ 28 | # demo data: U.S. Civil War map 29 | # see ?states for details 30 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 31 | kml_bounds(f) 32 | } 33 | \seealso{ 34 | \link{kml_coords} 35 | } 36 | 37 | -------------------------------------------------------------------------------- /man/kml_read.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kml_read.R 3 | \name{kml_read} 4 | \alias{kml_read} 5 | \title{Read a KML file.} 6 | \usage{ 7 | kml_read(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A string, a connection, or a raw vector. 11 | All inputs accepted by \link[xml2:read_xml]{read_xml} are also accepted, as 12 | long as they are valid KML. This means that URLs and local compressed files, 13 | such as \code{.zip} files, are also supported. 14 | 15 | If the source is a local file with a name ending in \code{.kmz}, 16 | \code{kml_read} will treat it as a zipped KML file and will try to read its 17 | first file, as listed by \link[utils:unzip]{unzip}. 18 | 19 | If the source is a link to Google My Maps, \code{kml_read} will try to 20 | download the most recent version of the map and will then treat it as a 21 | \code{.kmz} file.} 22 | 23 | \item{...}{Arguments passed to \link[xml2:read_xml]{read_xml}, such as 24 | \code{encoding} or \code{base_url}. 25 | See \link[xml2:read_xml]{read_xml} for details.} 26 | } 27 | \value{ 28 | A nodeset. 29 | } 30 | \description{ 31 | Read a KML file. 32 | } 33 | \examples{ 34 | # demo data: U.S. Civil War map 35 | # see ?states for details 36 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 37 | kml_read(f) 38 | } 39 | \seealso{ 40 | Google Developers. KML Reference. 41 | \url{https://developers.google.com/kml/documentation/kmlreference} 42 | } 43 | 44 | -------------------------------------------------------------------------------- /man/kml_points.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kml_points.R 3 | \name{kml_points} 4 | \alias{kml_points} 5 | \title{Read Points out of a KML file.} 6 | \usage{ 7 | kml_points(x, ns = "d1", verbose = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A KML source. See \link{kml_read}.} 11 | 12 | \item{ns}{The name of the namespace to extract from: defaults to \code{"d1"}.} 13 | 14 | \item{verbose}{Whether to report invalid coordinates and/or altitudes below 15 | sea level; defaults to \code{TRUE}. See \link{kml_coords}.} 16 | 17 | \item{...}{Arguments passed to \link[xml2:read_xml]{read_xml}. 18 | See \link{kml_read}.} 19 | } 20 | \value{ 21 | A \link[tibble:tibble]{tibble} containing the \code{folder} (layer), 22 | \code{name}, \code{description}, \code{styleUrl} and geographic coordinates 23 | (\code{longitude}, \code{latitude} and \code{altitude}) of the \emph{first} 24 | Point contained within each Placemark element of the KML source. 25 | Other Placemark elements will be ignored. 26 | 27 | If there are no Points in the KML source, the function returns \code{NULL}. 28 | If there are no Folders in the KML source, the \code{folder} variable will be 29 | filled with \code{NA}. 30 | } 31 | \description{ 32 | Read Points out of a KML file. 33 | } 34 | \note{ 35 | The function only extracts the \strong{first} Point out of each 36 | Placemark element. As a result, multi-points built into 37 | elements are \emph{not} fully supported: only the first Point will be present 38 | in the results. 39 | } 40 | \examples{ 41 | # demo data: U.S. Civil War map 42 | # see ?states for details 43 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 44 | kml_points(f) 45 | } 46 | \references{ 47 | Google Developers. KML Reference: Element. 48 | \url{https://developers.google.com/kml/documentation/kmlreference#point} 49 | } 50 | 51 | -------------------------------------------------------------------------------- /R/kml_bounds.R: -------------------------------------------------------------------------------- 1 | #' Find the boundaries of a KML file. 2 | #' 3 | #' @param x A KML source. See \link{kml_read}. \code{x} can also be a data frame 4 | #' with two numeric variables named \code{longitude} and \code{latitude}. 5 | #' @param ns The name of the namespace to extract from: defaults to \code{"d1"}. 6 | #' @param verbose Whether to report invalid coordinates and/or altitudes below 7 | #' sea level; defaults to \code{TRUE}. See \link{kml_coords}. 8 | #' @return A named numeric vector of four elements corresponding to the 9 | #' left, bottom, right and top values of the bounding box; 'left' and 'right' 10 | #' are the minimal and maximal longitudes; 'bottom' and 'top' are the minimal 11 | #' and maximal latitudes. 12 | #' @examples 13 | #' # demo data: U.S. Civil War map 14 | #' # see ?states for details 15 | #' f <- system.file("extdata", "states.kml.zip", package = "tidykml") 16 | #' kml_bounds(f) 17 | #' @seealso \link{kml_coords} 18 | #' @importFrom stringr %>% str_c str_length str_split 19 | #' @importFrom xml2 xml_find_all xml_text 20 | #' @export 21 | kml_bounds <- function(x, ns = "d1", verbose = TRUE) { 22 | 23 | if ("data.frame" %in% class(x)) { 24 | 25 | stopifnot(c("longitude", "latitude") %in% names(x)) 26 | 27 | lon <- unique(x$longitude) 28 | lat <- unique(x$latitude) 29 | 30 | } else { 31 | 32 | x <- kml_read(x) %>% 33 | xml_find_all(str_c("//", ns, ":coordinates")) %>% 34 | xml_text %>% 35 | str_split("\\s+") %>% # deal with Polygon coordinates 36 | unlist 37 | 38 | # drop empty 39 | x <- x[ str_length(x) > 0 ] 40 | 41 | lon <- unique(kml_coords(x, 1, verbose)) 42 | lat <- unique(kml_coords(x, 2, verbose)) 43 | 44 | } 45 | 46 | x <- c( 47 | left = min(lon), 48 | bottom = min(lat), 49 | right = max(lon), 50 | top = max(lat) 51 | ) 52 | 53 | return(x) 54 | 55 | } -------------------------------------------------------------------------------- /man/kml_lines.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kml_lines.R 3 | \name{kml_lines} 4 | \alias{kml_lines} 5 | \title{Read Lines out of a KML file.} 6 | \usage{ 7 | kml_lines(x, ns = "d1", verbose = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A KML source. See \link{kml_read}.} 11 | 12 | \item{ns}{The name of the namespace to extract from: defaults to \code{"d1"}.} 13 | 14 | \item{verbose}{Whether to report invalid coordinates and/or altitudes below 15 | sea level; defaults to \code{TRUE}. See \link{kml_coords}.} 16 | 17 | \item{...}{Arguments passed to \link[xml2:read_xml]{read_xml}. 18 | See \link{kml_read}.} 19 | } 20 | \value{ 21 | A \link[tibble:tibble]{tibble} containing the \code{folder} (layer), 22 | \code{name}, \code{description}, \code{styleUrl} and geographic coordinates 23 | (\code{longitude}, \code{latitude} and \code{altitude}) of the \emph{first} 24 | LineString contained within each Placemark element of the KML source. 25 | Other Placemark elements will be ignored. 26 | 27 | If there are no LineStrings in the KML source, the function returns 28 | \code{NULL}. 29 | If there are no Folders in the KML source, the \code{folder} variable will be 30 | filled with \code{NA}. 31 | } 32 | \description{ 33 | Read Lines out of a KML file. 34 | } 35 | \note{ 36 | The function only extracts the \strong{first} LineString out of each 37 | Placemark element. As a result, multi-LineStrings built into 38 | elements are \emph{not} fully supported: only the first LineString will be 39 | present in the results. 40 | } 41 | \examples{ 42 | # demo data: U.S. Civil War map 43 | # see ?states for details 44 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 45 | kml_lines(f) 46 | } 47 | \references{ 48 | Google Developers. KML Reference: Element. 49 | \url{https://developers.google.com/kml/documentation/kmlreference#linestring} 50 | } 51 | 52 | -------------------------------------------------------------------------------- /man/kml_coords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kml_coords.R 3 | \name{kml_coords} 4 | \alias{kml_coords} 5 | \title{Extract KML coordinates (longitude, latitude or altitude).} 6 | \usage{ 7 | kml_coords(x, coord, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{A character vector of KML coordinates, of the form 11 | \code{"longitude,latitude[,altitude]"}.} 12 | 13 | \item{coord}{Which coordinate to extract: either \code{1} (longitude), 14 | \code{2} (latitude) or \code{3} (altitude). The function also accepts 15 | \code{"lon"}, \code{"lat"} and \code{"alt"}.} 16 | 17 | \item{verbose}{Whether to report invalid coordinates and/or negative 18 | altitudes (below sea level); defaults to \code{TRUE}. See 'Note'.} 19 | } 20 | \value{ 21 | A numeric vector. 22 | 23 | If \code{coord} has been set to return altitude and there are no altitude 24 | values in the KML coordinates, the function returns a numeric vector of 25 | \code{NA} values of the same length as \code{x}. 26 | } 27 | \description{ 28 | Extract KML coordinates (longitude, latitude or altitude). 29 | } 30 | \note{ 31 | KML coordinates are \code{lon,lat[,alt]} tuples. 32 | 33 | Invalid coordinates are longitudes outside of [-180, 180] (angular distance 34 | in degrees, relative to the Prime Meridian) and latitudes outside of 35 | [-90, 90] (angular distance in degrees, relative to the Equator). 36 | 37 | Altitude is the distance from the earth's surface, in meters. Depending on 38 | the altitude mode used in the KML source, altitude might be computed in 39 | absolute terms (relative to sea level), or from the actual ground elevation 40 | of a particular location. 41 | } 42 | \examples{ 43 | # longitude 44 | kml_coords("11.0,22.0,-99.0", 1) 45 | # latitude 46 | kml_coords("11.0,22.0,-99.0", 2) 47 | # altitude 48 | kml_coords("11.0,22.0,-99.0", "alt") 49 | kml_coords("11.0,22.0,-99.0", "alt", verbose = FALSE) 50 | } 51 | \seealso{ 52 | Google Developers. KML Reference: Element. 53 | \url{https://developers.google.com/kml/documentation/kmlreference#camera} 54 | } 55 | 56 | -------------------------------------------------------------------------------- /man/kml_polygons.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kml_polygons.R 3 | \name{kml_polygons} 4 | \alias{kml_polygons} 5 | \title{Read Polygons out of a KML file.} 6 | \usage{ 7 | kml_polygons(x, ns = "d1", verbose = TRUE, fuse = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A KML source. See \link{kml_read}.} 11 | 12 | \item{ns}{The name of the namespace to extract from: defaults to \code{"d1"}.} 13 | 14 | \item{verbose}{Whether to report invalid coordinates and/or altitudes below 15 | sea level; defaults to \code{TRUE}. See \link{kml_coords}.} 16 | 17 | \item{fuse}{Whether to fuse multi-polygons into a single element; defaults 18 | to \code{FALSE}. Experimental. Might not return nice things.} 19 | 20 | \item{...}{Arguments passed to \link[xml2:read_xml]{read_xml}. 21 | See \link{kml_read}.} 22 | } 23 | \value{ 24 | A \link[tibble:tibble]{tibble} containing the \code{folder} (layer), 25 | \code{name}, \code{description}, \code{styleUrl} and geographic coordinates 26 | (\code{longitude}, \code{latitude} and \code{altitude}) of the \emph{first} 27 | Polygon contained within each Placemark element of the KML source. 28 | Other Placemark elements will be ignored. 29 | 30 | If there are no Polygons in the KML source, the function returns \code{NULL}. 31 | If there are no Folders in the KML source, the \code{folder} variable will be 32 | filled with \code{NA}. 33 | } 34 | \description{ 35 | Read Polygons out of a KML file. 36 | } 37 | \note{ 38 | The function only extracts the outer bounds of Polygon elements, and it 39 | only extracts the \strong{first} Polygon out of each Placemark element. As a 40 | result, multi-polygons built into elements are \emph{not} 41 | fully supported: only the first Polygon will be present in the results. 42 | } 43 | \examples{ 44 | # demo data: U.S. Civil War map 45 | # see ?states for details 46 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 47 | kml_polygons(f) 48 | } 49 | \references{ 50 | Google Developers. KML Reference: Element. 51 | \url{https://developers.google.com/kml/documentation/kmlreference#polygon} 52 | } 53 | 54 | -------------------------------------------------------------------------------- /R/kml_read.R: -------------------------------------------------------------------------------- 1 | #' Read a KML file. 2 | #' 3 | #' @param x A string, a connection, or a raw vector. 4 | #' All inputs accepted by \link[xml2:read_xml]{read_xml} are also accepted, as 5 | #' long as they are valid KML. This means that URLs and local compressed files, 6 | #' such as \code{.zip} files, are also supported. 7 | #' 8 | #' If the source is a local file with a name ending in \code{.kmz}, 9 | #' \code{kml_read} will treat it as a zipped KML file and will try to read its 10 | #' first file, as listed by \link[utils:unzip]{unzip}. 11 | #' 12 | #' If the source is a link to Google My Maps, \code{kml_read} will try to 13 | #' download the most recent version of the map and will then treat it as a 14 | #' \code{.kmz} file. 15 | #' @param ... Arguments passed to \link[xml2:read_xml]{read_xml}, such as 16 | #' \code{encoding} or \code{base_url}. 17 | #' See \link[xml2:read_xml]{read_xml} for details. 18 | #' @return A nodeset. 19 | #' @examples 20 | #' # demo data: U.S. Civil War map 21 | #' # see ?states for details 22 | #' f <- system.file("extdata", "states.kml.zip", package = "tidykml") 23 | #' kml_read(f) 24 | #' @seealso Google Developers. KML Reference. 25 | #' \url{https://developers.google.com/kml/documentation/kmlreference} 26 | #' @importFrom stringr %>% str_detect str_extract 27 | #' @importFrom utils unzip 28 | #' @importFrom xml2 read_xml xml_ns 29 | #' @export 30 | kml_read <- function(x, ...) { 31 | 32 | if (is.character(x) && 33 | str_detect(x, "^https?://(www\\.)?google\\.com/maps/d/(embed|viewer)")) { 34 | 35 | # case: live KML file from Google My Maps 36 | 37 | f <- tempfile(fileext = ".kmz") 38 | x <- str_c("https://google.com/maps/d/kml?", 39 | str_extract(x, "mid=(_|-|\\w)+")) 40 | 41 | message("KMZ: ", x) 42 | 43 | httr::GET(x) %>% 44 | httr::content("raw") %>% 45 | writeBin(f) 46 | 47 | x <- kml_read(f) 48 | unlink(f) # be clean 49 | 50 | return(x) 51 | 52 | } else if (is.character(x) && file.exists(x) && str_detect(x, "\\.kmz$")) { 53 | # case: local KMZ file 54 | 55 | y <- unzip(x, list = TRUE)[1, 1] 56 | message("KML: ", y) 57 | 58 | x <- read_xml(unz(x, y), ...) 59 | 60 | } else if (is.character(x)) { 61 | x <- read_xml(x, ...) 62 | } 63 | 64 | y <- xml_ns(x) 65 | if (!length(y) || !str_detect(y, "/kml/")) { 66 | stop("Source does not seem to be a valid KML document.") 67 | } 68 | 69 | return(x) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /R/kml_lines.R: -------------------------------------------------------------------------------- 1 | #' Read Lines out of a KML file. 2 | #' 3 | #' @inheritParams kml_points 4 | #' @return A \link[tibble:tibble]{tibble} containing the \code{folder} (layer), 5 | #' \code{name}, \code{description}, \code{styleUrl} and geographic coordinates 6 | #' (\code{longitude}, \code{latitude} and \code{altitude}) of the \emph{first} 7 | #' LineString contained within each Placemark element of the KML source. 8 | #' Other Placemark elements will be ignored. 9 | #' 10 | #' If there are no LineStrings in the KML source, the function returns 11 | #' \code{NULL}. 12 | #' If there are no Folders in the KML source, the \code{folder} variable will be 13 | #' filled with \code{NA}. 14 | #' @note The function only extracts the \strong{first} LineString out of each 15 | #' Placemark element. As a result, multi-LineStrings built into 16 | #' elements are \emph{not} fully supported: only the first LineString will be 17 | #' present in the results. 18 | #' @examples 19 | #' # demo data: U.S. Civil War map 20 | #' # see ?states for details 21 | #' f <- system.file("extdata", "states.kml.zip", package = "tidykml") 22 | #' kml_lines(f) 23 | #' @references Google Developers. KML Reference: Element. 24 | #' \url{https://developers.google.com/kml/documentation/kmlreference#linestring} 25 | #' @importFrom dplyr bind_rows data_frame 26 | #' @importFrom stringr %>% str_c str_split 27 | #' @importFrom xml2 xml_find_all xml_find_first xml_text 28 | #' @export 29 | kml_lines <- function(x, ns = "d1", verbose = TRUE, ...) { 30 | 31 | x <- kml_read(x, ...) 32 | y <- kml_folders(x, ns) 33 | 34 | if (!length(y)) { 35 | 36 | # case: no folders 37 | x <- xml_find_all(x, str_c("//", ns, ":Document")) 38 | 39 | } else { 40 | 41 | x <- y 42 | y <- length(y) 43 | 44 | } 45 | 46 | x <- lapply(x, function(x) { 47 | 48 | f <- kml_element(x, "name", ns) 49 | x <- kml_placemarks(x, "LineString", ns) 50 | 51 | # case: no placemarks 52 | if (!length(x)) { 53 | return(NULL) 54 | } 55 | 56 | data_frame( 57 | folder = f, 58 | name = kml_element(x, "name", ns), 59 | description = kml_element(x, "description", ns), 60 | styleUrl = kml_element(x, "styleUrl", ns), 61 | coordinates = kml_element(x, str_c( 62 | "LineString/", 63 | ns, 64 | ":coordinates"), 65 | ns) %>% 66 | str_split("\\s+") %>% 67 | unlist 68 | ) 69 | 70 | }) %>% 71 | bind_rows 72 | 73 | return(kml_finalize(x, folders = length(y) > 0, verbose)) 74 | 75 | } -------------------------------------------------------------------------------- /R/kml_points.R: -------------------------------------------------------------------------------- 1 | #' Read Points out of a KML file. 2 | #' 3 | #' @param x A KML source. See \link{kml_read}. 4 | #' @param ns The name of the namespace to extract from: defaults to \code{"d1"}. 5 | #' @param verbose Whether to report invalid coordinates and/or altitudes below 6 | #' sea level; defaults to \code{TRUE}. See \link{kml_coords}. 7 | #' @param ... Arguments passed to \link[xml2:read_xml]{read_xml}. 8 | #' See \link{kml_read}. 9 | #' @return A \link[tibble:tibble]{tibble} containing the \code{folder} (layer), 10 | #' \code{name}, \code{description}, \code{styleUrl} and geographic coordinates 11 | #' (\code{longitude}, \code{latitude} and \code{altitude}) of the \emph{first} 12 | #' Point contained within each Placemark element of the KML source. 13 | #' Other Placemark elements will be ignored. 14 | #' 15 | #' If there are no Points in the KML source, the function returns \code{NULL}. 16 | #' If there are no Folders in the KML source, the \code{folder} variable will be 17 | #' filled with \code{NA}. 18 | #' @note The function only extracts the \strong{first} Point out of each 19 | #' Placemark element. As a result, multi-points built into 20 | #' elements are \emph{not} fully supported: only the first Point will be present 21 | #' in the results. 22 | #' @references Google Developers. KML Reference: Element. 23 | #' \url{https://developers.google.com/kml/documentation/kmlreference#point} 24 | #' @examples 25 | #' # demo data: U.S. Civil War map 26 | #' # see ?states for details 27 | #' f <- system.file("extdata", "states.kml.zip", package = "tidykml") 28 | #' kml_points(f) 29 | #' @importFrom dplyr bind_rows data_frame 30 | #' @importFrom stringr %>% str_c str_split 31 | #' @importFrom xml2 xml_find_all xml_find_first xml_text 32 | #' @export 33 | kml_points <- function(x, ns = "d1", verbose = TRUE, ...) { 34 | 35 | x <- kml_read(x, ...) 36 | y <- kml_folders(x, ns) 37 | 38 | if (!length(y)) { 39 | 40 | # case: no folders 41 | x <- xml_find_all(x, str_c("//", ns, ":Document")) 42 | 43 | } else { 44 | 45 | x <- y 46 | y <- length(y) 47 | 48 | } 49 | 50 | x <- lapply(x, function(x) { 51 | 52 | f <- kml_element(x, "name", ns) 53 | x <- kml_placemarks(x, "Point", ns) 54 | 55 | # case: no placemarks 56 | if (!length(x)) { 57 | return(NULL) 58 | } 59 | 60 | data_frame( 61 | folder = f, 62 | name = kml_element(x, "name", ns), 63 | description = kml_element(x, "description", ns), 64 | styleUrl = kml_element(x, "styleUrl", ns), 65 | coordinates = kml_element(x, str_c( 66 | "Point/", 67 | ns, 68 | ":coordinates"), 69 | ns) 70 | ) 71 | 72 | }) %>% 73 | bind_rows 74 | 75 | return(kml_finalize(x, folders = length(y) > 0, verbose)) 76 | 77 | } -------------------------------------------------------------------------------- /R/kml_coords.R: -------------------------------------------------------------------------------- 1 | #' Extract KML coordinates (longitude, latitude or altitude). 2 | #' 3 | #' @param x A character vector of KML coordinates, of the form 4 | #' \code{"longitude,latitude[,altitude]"}. 5 | #' @param coord Which coordinate to extract: either \code{1} (longitude), 6 | #' \code{2} (latitude) or \code{3} (altitude). The function also accepts 7 | #' \code{"lon"}, \code{"lat"} and \code{"alt"}. 8 | #' @param verbose Whether to report invalid coordinates and/or negative 9 | #' altitudes (below sea level); defaults to \code{TRUE}. See 'Note'. 10 | #' @return A numeric vector. 11 | #' 12 | #' If \code{coord} has been set to return altitude and there are no altitude 13 | #' values in the KML coordinates, the function returns a numeric vector of 14 | #' \code{NA} values of the same length as \code{x}. 15 | #' @note KML coordinates are \code{lon,lat[,alt]} tuples. 16 | #' 17 | #' Invalid coordinates are longitudes outside of [-180, 180] (angular distance 18 | #' in degrees, relative to the Prime Meridian) and latitudes outside of 19 | #' [-90, 90] (angular distance in degrees, relative to the Equator). 20 | #' 21 | #' Altitude is the distance from the earth's surface, in meters. Depending on 22 | #' the altitude mode used in the KML source, altitude might be computed in 23 | #' absolute terms (relative to sea level), or from the actual ground elevation 24 | #' of a particular location. 25 | #' @seealso Google Developers. KML Reference: Element. 26 | #' \url{https://developers.google.com/kml/documentation/kmlreference#camera} 27 | #' @examples 28 | #' # longitude 29 | #' kml_coords("11.0,22.0,-99.0", 1) 30 | #' # latitude 31 | #' kml_coords("11.0,22.0,-99.0", 2) 32 | #' # altitude 33 | #' kml_coords("11.0,22.0,-99.0", "alt") 34 | #' kml_coords("11.0,22.0,-99.0", "alt", verbose = FALSE) 35 | #' @importFrom stringr %>% str_c str_count str_replace 36 | #' @export 37 | kml_coords <- function(x, coord, verbose = TRUE) { 38 | 39 | # case: coord is an abbreviation 40 | if (is.character(coord)) { 41 | coord <- which(c("lon", "lat", "alt") == coord) 42 | } 43 | 44 | stopifnot(length(coord) == 1 && coord %in% 1:3) 45 | 46 | n <- unique(str_count(x, ",")) 47 | 48 | # case: irregular format 49 | if (length(n) > 1) { 50 | stop("Irregular coordinates format.") 51 | } 52 | 53 | # case: invalid format 54 | if (!n %in% 1:2) { 55 | stop("Invalid coordinates format.") 56 | } 57 | 58 | # case: altitude was requested but is not present 59 | if (coord == 3 && n == 1) { 60 | return(rep(NA_real_, length(x))) 61 | } 62 | 63 | n <- str_c(rep("(.*)", n + 1), collapse = ",") 64 | x <- str_replace(x, n, str_c("\\", coord)) %>% 65 | as.numeric 66 | 67 | if (verbose) { 68 | 69 | if (coord == 1 && any(abs(x) > 180)) { 70 | message("Some longitudes are not contained within [-180, 180].") 71 | } 72 | 73 | if (coord == 2 && any(abs(x) > 90)) { 74 | message("Some latitudes are not contained within [-90, 90].") 75 | } 76 | 77 | if (coord == 3 && any(x < 0)) { 78 | message("Some altitudes are negative.") 79 | } 80 | 81 | } 82 | 83 | return(x) 84 | 85 | } -------------------------------------------------------------------------------- /R/kml_polygons.R: -------------------------------------------------------------------------------- 1 | #' Read Polygons out of a KML file. 2 | #' 3 | #' @inheritParams kml_points 4 | #' @param fuse Whether to fuse multi-polygons into a single element; defaults 5 | #' to \code{FALSE}. Experimental. Might not return nice things. 6 | #' @return A \link[tibble:tibble]{tibble} containing the \code{folder} (layer), 7 | #' \code{name}, \code{description}, \code{styleUrl} and geographic coordinates 8 | #' (\code{longitude}, \code{latitude} and \code{altitude}) of the \emph{first} 9 | #' Polygon contained within each Placemark element of the KML source. 10 | #' Other Placemark elements will be ignored. 11 | #' 12 | #' If there are no Polygons in the KML source, the function returns \code{NULL}. 13 | #' If there are no Folders in the KML source, the \code{folder} variable will be 14 | #' filled with \code{NA}. 15 | #' @note The function only extracts the outer bounds of Polygon elements, and it 16 | #' only extracts the \strong{first} Polygon out of each Placemark element. As a 17 | #' result, multi-polygons built into elements are \emph{not} 18 | #' fully supported: only the first Polygon will be present in the results. 19 | #' @references Google Developers. KML Reference: Element. 20 | #' \url{https://developers.google.com/kml/documentation/kmlreference#polygon} 21 | #' @examples 22 | #' # demo data: U.S. Civil War map 23 | #' # see ?states for details 24 | #' f <- system.file("extdata", "states.kml.zip", package = "tidykml") 25 | #' kml_polygons(f) 26 | #' @importFrom dplyr bind_rows data_frame 27 | #' @importFrom stringr %>% str_c str_split 28 | #' @importFrom xml2 xml_find_all xml_find_first xml_text 29 | #' @export 30 | kml_polygons <- function(x, ns = "d1", verbose = TRUE, fuse = FALSE, ...) { 31 | 32 | x <- kml_read(x, ...) 33 | y <- kml_folders(x, ns) 34 | 35 | if (!length(y)) { 36 | 37 | # case: no folders 38 | x <- xml_find_all(x, str_c("//", ns, ":Document")) 39 | 40 | } else { 41 | 42 | x <- y 43 | y <- length(y) 44 | 45 | } 46 | 47 | x <- lapply(x, function(x) { 48 | 49 | f <- kml_element(x, "name", ns) 50 | x <- kml_placemarks(x, "Polygon", ns) 51 | 52 | # case: no placemarks 53 | if (!length(x)) { 54 | return(NULL) 55 | } 56 | 57 | lapply(x, function(x, folder = f) { 58 | 59 | # 60 | # Multi-polygons are tough cookies. The solution taken here is the simple 61 | # way out: just take the first one into account, and assume that the rest 62 | # of the polygons are insignificant islands. Not optimal, I know. 63 | # 64 | geometry <- ifelse( 65 | xml_find_all(x, str_c(ns, ":MultiGeometry")) %>% 66 | length, # detect multi-polygons 67 | str_c("MultiGeometry/", ns, ":Polygon/"), 68 | "Polygon/" 69 | ) 70 | 71 | data_frame( 72 | folder, 73 | name = kml_element(x, "name"), 74 | description = kml_element(x, "description"), 75 | styleUrl = kml_element(x, "styleUrl"), 76 | # if the Placemark contains several Polygons in a MultiGeometry, the 77 | # next lines will only find the coordinates of the first Polygon (see 78 | # longer note above) 79 | coordinates = do.call( 80 | ifelse(fuse, "kml_elements", "kml_element"), 81 | args = list(x = x, element = str_c( 82 | geometry, 83 | ns, 84 | ":outerBoundaryIs/", 85 | ns, 86 | ":LinearRing/", 87 | ns, 88 | ":coordinates"), 89 | ns) 90 | ) %>% 91 | str_split("\\s+") %>% 92 | unlist 93 | ) 94 | 95 | }) %>% 96 | bind_rows 97 | 98 | }) %>% 99 | bind_rows 100 | 101 | return(kml_finalize(x, folders = length(y) > 0, verbose)) 102 | 103 | } 104 | -------------------------------------------------------------------------------- /tests/testthat/test-that.R: -------------------------------------------------------------------------------- 1 | context("Test tidykml") 2 | 3 | test_that("bounds, info, lines, points, polygons, size work", { 4 | 5 | # demo data: U.S. Civil War map 6 | # see ?states for details 7 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 8 | 9 | expect_is( kml_size(f) , class = "integer") 10 | expect_is( kml_info(f) , class = "integer") 11 | expect_is( kml_bounds(f) , class = "numeric") 12 | expect_is( kml_bounds(kml_lines(f)), class = "numeric") 13 | 14 | expect_is( kml_lines(f) , class = "data.frame") 15 | expect_is( kml_points(f) , class = "data.frame") 16 | expect_is( kml_polygons(f) , class = "data.frame") 17 | expect_is( kml_polygons(f, fuse = TRUE) , class = "data.frame") 18 | 19 | }) 20 | 21 | test_that("lines, points, polygons return NULLs when required", { 22 | 23 | # demo data: U.S. Civil War map 24 | # see ?states for details 25 | f <- system.file("extdata", "gangs.kml.zip", package = "tidykml") 26 | 27 | expect_null( kml_lines(f) ) 28 | expect_null( kml_points(f) ) 29 | 30 | # NO POLYGONS 31 | 32 | f <- ' 33 | 34 | 35 | foo 36 | bar 37 | 38 | empty 39 | 40 | 41 | ' 42 | 43 | expect_null( kml_polygons(f) ) 44 | 45 | # EMPTY POLYGON 46 | 47 | f <- ' 48 | 49 | 50 | foo 51 | bar 52 | 53 | empty 54 | 55 | empty polygon placemark 56 | 57 | empty polygon 58 | 59 | 60 | 61 | 62 | ' 63 | 64 | expect_null( kml_polygons(f) ) 65 | 66 | # NULL RETURNS WHEN THERE IS NOTHING IN THE FILE 67 | 68 | f <- ' 69 | 70 | 71 | foo 72 | bar 73 | 74 | ' 75 | 76 | expect_null( kml_lines(f) ) 77 | expect_null( kml_points(f) ) 78 | expect_null( kml_polygons(f) ) 79 | 80 | # EMPTY POLYGON INSIDE A DOCUMENT WITH NO FOLDER 81 | 82 | f <- ' 83 | 84 | 85 | foo 86 | bar 87 | 88 | empty polygon placemark 89 | 90 | empty polygon 91 | 92 | 93 | 94 | ' 95 | 96 | expect_null( kml_polygons(f) ) 97 | 98 | }) 99 | 100 | test_that("kml_read throws an error when the source is not KML", { 101 | 102 | expect_is(kml_read("https://www.google.com/maps/d/embed?mid=1ul5yqMj7_JgM5xpfOn5gtlO-bTk&hl=en"), 103 | "xml_document") 104 | 105 | }) 106 | 107 | test_that("kml_read can read Google My Maps URLs", { 108 | 109 | expect_error(kml_read("")) 110 | 111 | }) 112 | 113 | test_that("kml_coords works", { 114 | 115 | # longitude 116 | expect_equal(kml_coords("11.0,22.0,-99.0", 1), 11) 117 | 118 | # latitude 119 | expect_equal(kml_coords("11.0,22.0,-99.0", 2), 22) 120 | 121 | # altitude 122 | expect_message(kml_coords("11.0,22.0,-99.0", "alt")) 123 | expect_equal(kml_coords("11.0,22.0,-99.0", "alt", verbose = FALSE), -99) 124 | 125 | # CASES NOT COVERED BY EXAMPLES 126 | 127 | # irregular coordinates 128 | expect_error(kml_coords(c("11.0,22.0,-99.0", "11.0,22.0"), 1)) 129 | 130 | # invalid coordinates 131 | expect_error(kml_coords("nah", 1)) 132 | 133 | # no altitude 134 | expect_equal(kml_coords("11.0,22.0", "alt"), NA_real_) 135 | 136 | # weird coords 137 | expect_message(kml_coords("999.0,999.0", 1)) 138 | expect_message(kml_coords("999.0,999.0", 2)) 139 | 140 | }) 141 | -------------------------------------------------------------------------------- /R/internals.R: -------------------------------------------------------------------------------- 1 | #' Extract a single KML element from a Placemark. 2 | #' 3 | #' @param x A nodeset of Placemarks. 4 | #' @param element The name of the element to extract, e.g. \code{"name"}. 5 | #' @param ns The name of the namespace to extract from; defaults to \code{"d1"}. 6 | #' @return A character vector holding the text of the element. 7 | #' Missing values, i.e. empty elements, will be returned as \code{NA} values. 8 | #' @seealso Google Developers. KML Reference: Element. 9 | #' \url{https://developers.google.com/kml/documentation/kmlreference#placemark} 10 | #' @importFrom xml2 xml_find_first xml_text 11 | #' @importFrom stringr %>% str_c 12 | #' @keywords internal 13 | kml_element <- function(x, element, ns = "d1") { 14 | 15 | xml_find_first(x, str_c(ns, ":", element)) %>% 16 | xml_text 17 | 18 | } 19 | 20 | #' Extract multiple KML elements from a Placemark. 21 | #' 22 | #' @inheritParams kml_element 23 | #' @return A character vector holding the text of the element. 24 | #' Missing values, i.e. empty elements, will be returned as \code{NA} values. 25 | #' @seealso Google Developers. KML Reference: Element. 26 | #' \url{https://developers.google.com/kml/documentation/kmlreference#placemark} 27 | #' @importFrom xml2 xml_find_first xml_text 28 | #' @importFrom stringr %>% str_c 29 | #' @keywords internal 30 | kml_elements <- function(x, element, ns = "d1") { 31 | 32 | xml_find_all(x, str_c(ns, ":", element)) %>% 33 | xml_text 34 | 35 | } 36 | 37 | #' Finalize a KML tidy data frame 38 | #' 39 | #' Reads the coordinates out of the \code{coordinates} variable, checks them, 40 | #' and returns the data. 41 | #' @param x The KML data frame to tidy. 42 | #' @param folders The number of folders in the data frame. 43 | #' @param verbose Whether to report invalid coordinates and/or negative 44 | #' altitudes (below sea level); defaults to \code{TRUE}. 45 | #' @importFrom stringr str_length 46 | #' @keywords internal 47 | kml_finalize <- function(x, folders, verbose = TRUE) { 48 | 49 | stopifnot(is.data.frame(x)) 50 | 51 | if (!nrow(x)) { 52 | 53 | return(NULL) 54 | 55 | } else { 56 | 57 | # case: no folders 58 | if (!folders) { 59 | x$folder <- NA_character_ 60 | } 61 | 62 | # remove elements with no 63 | x <- x[ !is.na(x$coordinates), ] 64 | 65 | # drop blank lines around 66 | x <- x[ which(str_length(x$coordinates) > 0), ] 67 | 68 | if (!nrow(x)) { 69 | 70 | return(NULL) 71 | 72 | } else { 73 | 74 | x$longitude <- kml_coords(x$coordinates, 1, verbose) 75 | x$latitude <- kml_coords(x$coordinates, 2, verbose) 76 | x$altitude <- kml_coords(x$coordinates, 3, verbose) 77 | x$coordinates <- NULL 78 | 79 | return(x) 80 | 81 | } 82 | 83 | } 84 | 85 | } 86 | 87 | #' Extract KML Folders. 88 | #' 89 | #' @inheritParams kml_element 90 | #' @param x An XML document. 91 | #' @return A nodeset of Folders. 92 | #' @seealso Google Developers. KML Reference: Element. 93 | #' \url{https://developers.google.com/kml/documentation/kmlreference#folder} 94 | #' @importFrom xml2 xml_find_all xml_name 95 | #' @importFrom stringr str_c 96 | #' @keywords internal 97 | kml_folders <- function(x, ns = "d1") { 98 | 99 | xml_find_all(x, str_c("//", ns, ":Folder")) 100 | 101 | } 102 | 103 | #' Extract KML Placemarks containing a specific Geometry. 104 | #' 105 | #' @inheritParams kml_element 106 | #' @param x A nodeset of Folders. 107 | #' @param geometry The name of the Geometry to subset on, e.g. \code{"Point"}. 108 | #' @return A nodeset of Placemarks. 109 | #' @seealso Google Developers. KML Reference: Element. 110 | #' \url{https://developers.google.com/kml/documentation/kmlreference#placemark} 111 | #' @importFrom xml2 xml_find_all 112 | #' @importFrom stringr str_c 113 | #' @keywords internal 114 | kml_placemarks <- function(x, geometry, ns = "d1") { 115 | 116 | x <- xml_find_all(x, str_c(ns, ":Placemark//", ns, ":", geometry, "/..")) 117 | n <- xml_name(x) 118 | 119 | if ("MultiGeometry" %in% n) { 120 | 121 | n <- which(n == "MultiGeometry") 122 | x[ n ] <- xml_find_all(x[ n ], "..") 123 | 124 | } 125 | 126 | return(x) 127 | 128 | } 129 | 130 | #' Find the number of coordinates in a KML file. 131 | #' 132 | #' @inheritParams kml_element 133 | #' @param x A KML source. See \link{kml_read}. 134 | #' @return A named numeric vector of three elements containing the total number 135 | #' of coordinates, the total number of coordinates found in 136 | #' elements (outer polygon boundaries), and the total number of coordinates 137 | #' found in elements (inner polygon boundaries). 138 | #' @importFrom xml2 xml_find_all xml_text 139 | #' @importFrom stringr %>% str_c str_split str_trim 140 | #' @keywords internal 141 | kml_size <- function(x, ns = "d1") { 142 | 143 | x <- kml_read(x) 144 | 145 | c( 146 | "coordinates" = xml_find_all(x, str_c("//", ns, ":coordinates")) %>% 147 | xml_text %>% 148 | str_trim %>% 149 | str_split("\\s+") %>% 150 | unlist %>% 151 | length, 152 | sapply(c("outerBoundaryIs", "innerBoundaryIs"), function(y) { 153 | xml_find_all(x, str_c("//", ns, ":", y, "//", ns, ":coordinates")) %>% 154 | xml_text %>% 155 | str_trim %>% 156 | str_split("\\s+") %>% 157 | unlist %>% 158 | length 159 | }) 160 | ) 161 | 162 | } 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tidykml 2 | 3 | The `tidykml` package reads selected elements and values from [KML][kml] 4 | files, such as those produced by [Google My Maps][google-my-maps], and puts 5 | them into [tidy data frames][cran-tibble], intended for use with packages like [`dplyr`][cran-dplyr] and 6 | [`ggplot2`][cran-ggplot2]. 7 | 8 | ## Motivation 9 | 10 | The goal of `tidykml` is to make KML files usable for data wrangling and 11 | visualization in as few steps as possible. Several R packages can import KML 12 | files, but these packages do not offer a straightforward way to use their 13 | results with either `dplyr` or `ggplot2`. 14 | 15 | The reason for `tidykml` to exist will go away when packages like 16 | [`ggmap`][cran-ggmap], [`rgdal`][cran-rgdal] and [`sf`][cran-sf] implement easy 17 | ways to produce tidy data frames from KML data, or to fortify KML data into 18 | objects that can be passed to `ggplot2`. 19 | 20 | ## Limitations 21 | 22 | - The `tidykml` package was __tested only against a limited number of KML files__, all of which came either from [GADM][gadm] or from [Google My Maps][google-my-maps]. The fields that it extracts from the KML file might not fit other KML sources. 23 | - The `tidykml` package __does not fully support [MultiGeometry][kml-multigeom] elements__, such as multi-polygons, and will only handle their _first_ element, in order of appearance in the KML source. 24 | 25 | Due to these limitations, `tidykml` lives on GitHub but will probably never show up on CRAN. 26 | 27 | ## Installation 28 | 29 | Install `tidykml` with [`devtools`][cran-devtools]: 30 | 31 | ```R 32 | devtools::install_github("briatte/tidykml") 33 | library(tidykml) 34 | ``` 35 | 36 | ## Example 37 | 38 | The data used in this example is a [map of the U.S. Civil War][map-states] featured on Google My Maps. It is bundled in the `tidykml` package (see `?states` for details and usage). 39 | 40 | The `tidykml` package contains functions to return the [Points][kml-points], 41 | [Polygons][kml-polygons] or [LineStrings][kml-lines] of a KML file: 42 | 43 | ```R 44 | library(dplyr) 45 | f <- system.file("extdata", "states.kml.zip", package = "tidykml") 46 | kml_polygons(f) %>% 47 | glimpse 48 | ``` 49 | 50 | The results are always returned in the following form: 51 | 52 | ``` 53 | Observations: 9,930 54 | Variables: 7 55 | $ folder "States (status in 1863)", "States (status in 1863)", "S... 56 | $ name "Ohio", "Ohio", "Ohio", "Ohio", "Ohio", "Ohio", "Ohio", ... 57 | $ description "description: type: Union state
type: Union state", "... 58 | $ styleUrl "#poly-3F5BA9-1-196", "#poly-3F5BA9-1-196", "#poly-3F5BA... 59 | $ longitude -82.21486, -82.34138, -82.54884, -82.71695, -82.90893, -... 60 | $ latitude 41.46419, 41.43150, 41.39134, 41.45053, 41.42947, 41.456... 61 | $ altitude 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 62 | ``` 63 | 64 | These results are easy to pass to [`ggplot2`][cran-ggplot2]: 65 | 66 | ```R 67 | library(ggplot2) 68 | kml_polygons(f) %>% 69 | ggplot(aes(longitude, latitude, group = name)) + 70 | geom_polygon(color = "white") + 71 | coord_map("albers", at0 = 45.5, lat1 = 29.5) 72 | ``` 73 | 74 | ![](http://i.imgur.com/d9lyU6r.png) 75 | 76 | These results are also easy to pass to [`ggmap`][cran-ggmap]: 77 | 78 | ```R 79 | library(ggmap) 80 | m <- get_map(kml_bounds(f), source = "osm") 81 | ggmap(m) + 82 | geom_polygon(data = kml_polygons(f) %>% 83 | mutate(type = gsub("(.*)
type: (.*)", "\\2", description)), 84 | aes(longitude, latitude, group = name, fill = type), 85 | color = "white", alpha = 0.5) + 86 | scale_fill_brewer("", palette = "Set1") + 87 | theme(legend.position = "bottom", 88 | axis.text = element_blank(), 89 | axis.ticks = element_blank(), 90 | axis.title = element_blank()) 91 | ``` 92 | 93 | ![](http://i.imgur.com/3Xgox6x.jpg) 94 | 95 | The final map also shows the location of major U.S. civil war battles: 96 | 97 | ```R 98 | ggmap(m) + 99 | geom_polygon(data = kml_polygons(f) %>% 100 | mutate(type = gsub("(.*)
type: (.*)", "\\2", description)), 101 | aes(longitude, latitude, group = name, fill = type), 102 | color = "white", alpha = 0.5) + 103 | geom_point(data = kml_points(f), 104 | aes(longitude, latitude), 105 | color = "darkred", size = 6, alpha = 0.5) + 106 | scale_fill_brewer("", palette = "Set1") + 107 | theme(legend.position = "bottom", 108 | axis.text = element_blank(), 109 | axis.ticks = element_blank(), 110 | axis.title = element_blank()) 111 | ``` 112 | 113 | ![](http://i.imgur.com/hNSfpdi.jpg) 114 | 115 | ## Data 116 | 117 | In addition to the example map used above, the package also contains a [map of non-Hispanic gangs in South Los Angeles][map-gangs], created by Instagram user [@la\_hood\_maps][map-gangs-source] (see `?gangs` for details). 118 | 119 | ```R 120 | f <- system.file("extdata", "gangs.kml.zip", package = "tidykml") 121 | m <- get_map(kml_bounds(f), source = "osm") 122 | ggmap(m) + 123 | geom_polygon(data = kml_polygons(f), 124 | aes(longitude, latitude, group = name, fill = folder), 125 | color = "grey25", alpha = 0.75) + 126 | scale_fill_brewer("", palette = "Set3", 127 | guide = guide_legend(override.aes = list(color = NA))) + 128 | labs(title = "Non-Hispanic Gangs in South Los Angeles (2016)", 129 | caption = paste("Source: instagram.com/la_hood_maps", 130 | "(accessed 30 December 2016)."), 131 | x = NULL, y = NULL) + 132 | theme(legend.position = "right", 133 | legend.justification = c(0, 1), 134 | plot.title = element_text(face = "bold"), 135 | plot.caption = element_text(hjust = 0), 136 | axis.text = element_blank(), 137 | axis.ticks = element_blank()) 138 | ``` 139 | 140 | ![](http://i.imgur.com/UIUJCVz.png) 141 | 142 | ## Utilities 143 | 144 | The `tidykml` package contains a few helper functions to handle KML files: 145 | 146 | - `kml_bounds` returns the bounding box (longitude and latitude ranges) of the file. 147 | - `kml_coords` parses strings of KML coordinates (`longitude,latitude[,altitude]`). 148 | - `kml_info` returns the number of [Folders][kml-folders], [Placemarks][kml-placemarks], [LineStrings][kml-lines], [Points][kml-points] and [Polygons][kml-polygons] in the file 149 | - `kml_read` is a wrapper for [`xml2::read_xml`][read_xml] that returns KML sources as an XML nodeset. 150 | 151 | [cran-dplyr]: https://cran.r-project.org/package=dplyr "Package 'dplyr' (CRAN)" 152 | [cran-devtools]: https://cran.r-project.org/package=devtools "Package 'devtools' (CRAN)" 153 | [cran-ggmap]: https://cran.r-project.org/package=ggmap "Package 'ggmap' (CRAN)" 154 | [cran-ggplot2]: https://cran.r-project.org/package=ggplot2 "Package 'ggplot2' (CRAN)" 155 | [cran-rgdal]: https://cran.r-project.org/package=rgdal "Package 'rgdal' (CRAN)" 156 | [cran-sf]: https://cran.r-project.org/package=sf "Package 'sf' (CRAN)" 157 | [cran-tibble]: https://cran.r-project.org/package=tibble "Package 'tibble' (CRAN)" 158 | [gadm]: http://www.gadm.org/ "GADM database of Global Administrative Areas" 159 | [kml]: https://developers.google.com/kml/documentation/kmlreference "KML Reference (Google Developers)" 160 | [kml-folders]: https://developers.google.com/kml/documentation/kmlreference#folder "KML Reference: (Google Developers)" 161 | [kml-lines]: https://developers.google.com/kml/documentation/kmlreference#linestring "KML Reference: (Google Developers)" 162 | [kml-multigeom]: https://developers.google.com/kml/documentation/kmlreference#multigeometry "KML Reference: (Google Developers)" 163 | [kml-placemarks]: https://developers.google.com/kml/documentation/kmlreference#placemark "KML Reference: (Google Developers)" 164 | [kml-points]: https://developers.google.com/kml/documentation/kmlreference#point "KML Reference: (Google Developers)" 165 | [kml-polygons]: https://developers.google.com/kml/documentation/kmlreference#polygon "KML Reference: (Google Developers)" 166 | [google-my-maps]: https://en.wikipedia.org/wiki/Google_My_Maps "Google My Maps (Wikipedia)" 167 | [map-gangs-source]: https://www.instagram.com/la_hood_maps/ "La Hood Maps (Instagram)" 168 | [map-gangs]: https://goo.gl/7Ar1Aa "Gangs of Los Angeles (2016) (Google My Maps)" 169 | [map-states]: https://goo.gl/rezvty "US Civil War (Google My Maps)" 170 | [read_xml]: https://www.rdocumentation.org/packages/xml2/versions/1.0.0/topics/read_xml "xml2::read_xml (RDocumentation)" 171 | --------------------------------------------------------------------------------