├── .Rbuildignore
├── .gitignore
├── .gitmodules
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── datapkg_read.R
    ├── datapkg_validate.R
    ├── datapkg_write.R
    └── old
    │   ├── datapkg_new.R
    │   └── print.R
├── README-NOT.md
├── README.md
├── appveyor.yml
├── datapkg.Rproj
├── inst
    └── tabular-data-package.json
└── man
    └── datapackage.Rd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^appveyor\.yml$
4 | ^\.travis\.yml$
5 | ^data$
6 | ^tests/testsuite-py$
7 | ^datapackage.json$
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | src/*.o
 5 | src/*.so
 6 | src/*.dll
 7 | src/rexp.pb.cc
 8 | src/rexp.pb.h
 9 | src/Makevars
10 | inst/doc
11 | windows
12 | data/*
13 | datapackage.json
14 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci-archive/datapkg/d08f68d8dd8533aa0a7f49fe5a590736e3923754/.gitmodules


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
 2 | 
 3 | language: R
 4 | sudo: false
 5 | cache: packages
 6 | 
 7 | r_github_packages:
 8 |   - jimhester/covr
 9 | 
10 | warnings_are_errors: true
11 | #r_check_revdep: true
12 | 
13 | # V8 is required for jsonvalidate
14 | addons:
15 |   apt:
16 |     packages:
17 |       - libv8-dev
18 | 
19 | notifications:
20 |   email:
21 |     on_success: change
22 |     on_failure: change
23 | 
24 | after_success:
25 |   - Rscript -e 'covr::codecov(type = "all")'
26 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: datapkg
 2 | Type: Package
 3 | Title: Read and Write Data Packages
 4 | Version: 0.1
 5 | Authors@R: c(
 6 |     person("Jeroen", "Ooms", email = "jeroen.ooms@stat.ucla.edu", role = c("aut", "cre")),
 7 |     person("Karthik", "Ram", email = "karthik.ram@gmail.com", role = "aut"))
 8 | Description: Convenience functions for reading and writing datasets following
 9 |     the 'data packagist' format.
10 | URL: http://frictionlessdata.io/data-packages/, https://github.com/ropenscilabs/datapkg
11 | BugReports: https://github.com/ropenscilabs/datapkg/issues
12 | License: MIT + file LICENSE
13 | Imports:
14 |     methods,
15 |     readr,
16 |     git2r,
17 |     jsonlite,
18 |     curl
19 | Suggests:
20 |     jsonvalidate,
21 |     ggplot2
22 | Remotes:
23 |     ropenscilabs/jsonvalidate,
24 |     hadley/readr
25 | RoxygenNote: 5.0.1
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER: Jeroen Ooms, Karthik Ram
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | S3method(print,datapkg_data)
4 | S3method(print,datapkg_resources)
5 | export(datapkg_read)
6 | export(datapkg_validate)
7 | export(datapkg_write)
8 | import(readr)
9 | 


--------------------------------------------------------------------------------
/R/datapkg_read.R:
--------------------------------------------------------------------------------
  1 | #' Read/write data-package
  2 | #'
  3 | #' Read and write data frames to/from 'data-package' format. For reading
  4 | #' supported paths are disk, http or git. For writing only disk is supported.
  5 | #'
  6 | #' @import readr
  7 | #' @param path file path or URL to the data package directory
  8 | #' @rdname datapackage
  9 | #' @name datapackage
 10 | #' @aliases datapkg
 11 | #' @references \url{http://frictionlessdata.io/data-packages}, \url{https://github.com/datasets}
 12 | #' @export
 13 | #' @examples # Create new data package
 14 | #' pkgdir <- tempfile()
 15 | #' datapkg_write(mtcars, path = pkgdir)
 16 | #' datapkg_write(iris, path = pkgdir)
 17 | #'
 18 | #' # Read it back
 19 | #' mypkg <- datapkg_read(pkgdir)
 20 | #' print(mypkg$data$mtcars)
 21 | #'
 22 | #' # Clone package with git:
 23 | #' cities <- datapkg_read("git://github.com/datasets/world-cities")
 24 | #'
 25 | #' # Read over http
 26 | #' euribor <- datapkg_read("https://raw.githubusercontent.com/datasets/euribor/master")
 27 | datapkg_read <- function(path = getwd()){
 28 |   root <- sub("datapackage.json$", "", path)
 29 |   root <- sub("/$", "", root)
 30 |   if(is_git(root)){
 31 |     newroot <- tempfile()
 32 |     git2r::clone(root, newroot)
 33 |     root <- newroot
 34 |   }
 35 |   json_path <- file.path(root, "datapackage.json")
 36 |   json <- if(is_url(root)){
 37 |     con <- curl::curl(json_path, "r")
 38 |     on.exit(close(con))
 39 |     readLines(con, warn = FALSE)
 40 |   } else {
 41 |     readLines(normalizePath(json_path, mustWork = TRUE), warn = FALSE)
 42 |   }
 43 |   pkg_info <- jsonlite::fromJSON(json, simplifyVector = TRUE)
 44 |   if(is.data.frame(pkg_info$resources))
 45 |     class(pkg_info$resources) <- c("datapkg_resources", class(pkg_info$resources))
 46 |   if(is.data.frame(pkg_info$sources))
 47 |     class(pkg_info$sources) <- c("datapkg_sources", class(pkg_info$sources))
 48 |   pkg_info$data <- list(rep(NA, nrow(pkg_info$resources)))
 49 |   data_names <- pkg_info$resources$name
 50 |   for(i in seq_len(nrow(pkg_info$resources))){
 51 |     target <- as.list(pkg_info$resources[i, ])
 52 |     if(!length(target$schema))
 53 |       stop("Dataset ", i, "is missing a schema")
 54 |     if(!length(target$schema$fields))
 55 |       stop("Dataset ", i, "is missing the schema.fields property")
 56 |     pkg_info$data[[i]] <- read_data_package(get_data_path(target, root),
 57 |       dialect = as.list(target$dialect), hash = target$hash, target$schema$fields[[1]])
 58 |   }
 59 |   class(pkg_info$data) <- c("datapkg_data")
 60 |   if(length(data_names))
 61 |     names(pkg_info$data) <- ifelse(is.na(data_names), "", data_names)
 62 |   pkg_info
 63 | }
 64 | 
 65 | get_data_path <- function(x, root){
 66 |   if(length(x$path)){
 67 |     data_path <- normalizePath(file.path(root, x$path), mustWork = FALSE)
 68 |     if(is_url(data_path) || file.exists(data_path)){
 69 |       return(data_path)
 70 |     } else {
 71 |       if(length(x$url)){
 72 |         message("File not found: ", data_path)
 73 |         return(x$url)
 74 |       } else {
 75 |         stop("File not found: ", data_path)
 76 |       }
 77 |     }
 78 |   }
 79 | }
 80 | 
 81 | is_git <- function(x){
 82 |   grepl("^git://", x)
 83 | }
 84 | 
 85 | is_url <- function(x){
 86 |   grepl("^[a-zA-Z]+://", x)
 87 | }
 88 | 
 89 | read_data_package <- function(path, dialect = list(), hash = NULL, fields = NULL) {
 90 |   if(!length(fields))
 91 |     return(data.frame())
 92 |   col_types <- list()
 93 |   for(i in seq_len(nrow(fields)))
 94 |     col_types[[i]] <- do.call(make_field, as.list(fields[i,]))
 95 |   do.call(parse_data_file, c(list(file = path, col_types = col_types), dialect))
 96 | }
 97 | 
 98 | make_field <- function(name = "", type = "string", description = "", format = NULL, ...){
 99 | 
100 |   #datapkg prefixes strptime format with 'fmt:'
101 |   if(length(format))
102 |     format <- sub("^fmt:", "", format)
103 |   switch(type,
104 |     string = col_character(),
105 |     number = col_number(),
106 |     integer = col_integer(),
107 |     boolean = col_logical(),
108 |     object = col_character(),
109 |     array = col_character(),
110 |     date = col_date(format),
111 |     datetime = col_datetime(format),
112 |     time = col_time(format),
113 |     col_character()
114 |   )
115 | }
116 | 
117 | ## Defaults from http://dataprotocols.org/csv-dialect/
118 | parse_data_file <- function(file, col_types = NULL, delimiter = ",", doubleQuote = TRUE,
119 |     lineTerminator = "\r\n", quoteChar = '"', escapeChar = "", skipInitialSpace = TRUE,
120 |     header = TRUE, caseSensitiveHeader = FALSE){
121 |   # unused fields: lineTerminator, skipInitialSpace, caseSensitiveHeader
122 |   message("Reading file ", file)
123 |   readr::read_delim(
124 |     col_types = col_types,
125 |     file = file,
126 |     delim = delimiter,
127 |     escape_double = doubleQuote,
128 |     quote = quoteChar,
129 |     escape_backslash = identical(escapeChar, "\\"),
130 |     col_names = header
131 |   )
132 | }
133 | 
134 | #' @export
135 | print.datapkg_resources <- function(x, ...){
136 |   print_names <- names(x) %in% c("name", "path", "format")
137 |   print(as.data.frame(x)[print_names])
138 | }
139 | 
140 | #' @export
141 | print.datapkg_data <- function(x, ...){
142 |   for(i in seq_along(x)){
143 |     data_name <- names(x[i])
144 |     if(length(data_name) && !is.na(data_name)){
145 |       cat(" $", data_name, "\n", sep = "")
146 |     } else {
147 |       cat(" [[", i, "]]\n", sep = "")
148 |     }
149 |     mydata <- x[[i]]
150 |     for(j in seq_along(mydata)){
151 |       cat("  [", j, "] ", names(mydata)[j], " (", methods::is(mydata[[j]])[1], ")\n", sep = "")
152 |     }
153 |   }
154 | }
155 | 


--------------------------------------------------------------------------------
/R/datapkg_validate.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' @rdname datapackage
 3 | datapkg_validate <- function(path = getwd()){
 4 |   root <- sub("datapackage.json$", "", path)
 5 |   root <- sub("/$", "", root)
 6 |   json_path <- file.path(root, "datapackage.json")
 7 |   schema_path <- system.file("tabular-data-package.json", package = "datapkg")
 8 |   json <- paste(readLines(json_path), collapse = "\n")
 9 |   schema <- paste(readLines(schema_path), collapse = "\n")
10 |   jsonvalidate::json_validate(json, schema, verbose =TRUE, greedy = TRUE)
11 | }
12 | 


--------------------------------------------------------------------------------
/R/datapkg_write.R:
--------------------------------------------------------------------------------
 1 | #' @rdname datapackage
 2 | #' @param data a data frame to be added to the package
 3 | #' @param name what to name this dataset
 4 | #' @export
 5 | datapkg_write <- function(data, name, path = getwd()){
 6 |   if(missing(name))
 7 |     name <- deparse(substitute(data))
 8 |   stopifnot(is.data.frame(data))
 9 |   root <- sub("datapackage.json$", "", path)
10 |   root <- sub("/$", "", root)
11 |   dir.create(file.path(root, "data"), showWarnings = FALSE, recursive = TRUE)
12 |   json_path <- file.path(root, "datapackage.json")
13 |   csv_name <- file.path("data", paste0(name, ".csv"))
14 |   csv_path <- file.path(root, csv_name)
15 |   if(file.exists(csv_path))
16 |     stop("File already exists: ", csv_path, call. = FALSE)
17 |   pkg_info <- if(file.exists(json_path)){
18 |     message("Opening existing ", json_path)
19 |     jsonlite:::fromJSON(json_path, simplifyVector = FALSE)
20 |   } else {
21 |     message("Creating new ", json_path)
22 |     list(name = basename(path))
23 |   }
24 |   readr::write_csv(data, csv_path)
25 |   pkg_info$resources <- c(pkg_info$resources,
26 |     list(list(
27 |       path = csv_name,
28 |       name = name,
29 |       schema = make_schema(data)
30 |     ))
31 |   )
32 |   json <- jsonlite::toJSON(pkg_info, pretty = TRUE, auto_unbox = TRUE)
33 |   writeLines(json, json_path)
34 | }
35 | 
36 | make_schema <- function(data){
37 |   out <- as.list(rep(NA, length(data)))
38 |   for(i in seq_along(data)){
39 |     out[[i]] <- list(
40 |       name = names(data)[i],
41 |       type = get_type(data[[i]])
42 |     )
43 |   }
44 |   list(fields = out)
45 | }
46 | 
47 | get_type <- function(x){
48 |   if(inherits(x, "Date")) return("date")
49 |   if(inherits(x, "POSIXt")) return("datetime")
50 |   if(is.character(x)) return("string")
51 |   if(is.integer(x)) return("integer")
52 |   if(is.numeric(x)) return("number")
53 |   if(is.logical(x)) return("boolean")
54 |   return("string")
55 | }
56 | 


--------------------------------------------------------------------------------
/R/old/datapkg_new.R:
--------------------------------------------------------------------------------
  1 | #' Data-package
  2 | #'
  3 | #' Load or initiate a \href{http://dataprotocols.org/data-packages}{data package} for
  4 | #' reading / writing data and metadata. A data package can be an R package at the same
  5 | #' time. The default format for storing data is
  6 | #' \href{http://dataprotocols.org/linear-tsv}{linear-tsv} which is the least
  7 | #' ambiguous format and natively supported by R via \code{\link{read.table}}
  8 | #' or \code{\link[readr:read_tsv]{readr::read_tsv}}.
  9 | #'
 10 | #' @aliases datapackage
 11 | #' @importFrom tools md5sum
 12 | #' @param path root directory of the data package
 13 | #' @param verbose emits some debugging messages
 14 | #' @examples # Create a data package in a dir
 15 | #' pkgdir <- tempfile()
 16 | #' dir.create(pkgdir)
 17 | #' pkg <- data_package(pkgdir)
 18 | #'
 19 | #' # Show methods
 20 | #' print(pkg)
 21 | #'
 22 | #' # Examples
 23 | #' pkg$author("Jerry", "jerry@gmail.com")
 24 | #' pkg$resources$add(iris)
 25 | #' pkg$sources$add("Fisher, R. A. (1936)")
 26 | #'
 27 | #' # View json file
 28 | #' pkg$json()
 29 | #'
 30 | #' # Parse data
 31 | #' pkg$resources$read("iris")
 32 | datapkg_new <- function(path = ".", verbose = TRUE){
 33 |   pkg_file <- function(x, exists = TRUE) {
 34 |     normalizePath(file.path(path, x), mustWork = exists && !is_url(x))
 35 |   }
 36 | 
 37 |   pkg_json <- function(){
 38 |     pkg_file("datapackage.json")
 39 |   }
 40 | 
 41 |   pkg_read <- function(){
 42 |     from_json(pkg_json())
 43 |   }
 44 | 
 45 |   pkg_update <- function(...){
 46 |     meta <- pkg_read()
 47 |     args <- list(...)
 48 |     for(i in seq_along(args)){
 49 |       key <- names(args[i])
 50 |       meta[[key]] = args[[i]]
 51 |     }
 52 |     writeLines(to_json(meta), pkg_json())
 53 |     return(meta)
 54 |   }
 55 | 
 56 |   pkg_init <- function(){
 57 |     if(file.exists(pkg_file("datapackage.json", FALSE))){
 58 |       meta <- pkg_read()
 59 |       if(verbose)
 60 |         message("Opening existing datapackage: ", meta$name)
 61 |     } else {
 62 |       writeLines("{}", pkg_file("datapackage.json", FALSE))
 63 |       pkg_update(
 64 |         name = basename(normalizePath(path)),
 65 |         resources = list()
 66 |       )
 67 |     }
 68 |   }
 69 | 
 70 |   # Sources object
 71 |   pkg_contributors <- function(){
 72 |     find <- function(name = "", exact = FALSE){
 73 |       data <- Filter(function(x){
 74 |         if(isTRUE(exact)){
 75 |           return(x$name == name)
 76 |         } else {
 77 |           grepl(name, x$name, fixed = TRUE)
 78 |         }
 79 |       }, pkg_read()$contributors)
 80 |       jsonlite:::simplifyDataFrame(data, c("name", "email", "web"), flatten = FALSE, simplifyMatrix = FALSE)
 81 |     }
 82 |     add <- function(name, email, web){
 83 |       out <- list(name = name)
 84 |       if(!missing(email))
 85 |         out$email = email
 86 |       if(!missing(web))
 87 |         out$web = web
 88 |       pkg_update(contributors = c(pkg_read()$contributors, list(out)))
 89 |       find()
 90 |     }
 91 |     remove <- function(name){
 92 |       stopifnot(is_string(name))
 93 |       all <- find(name, exact = TRUE)
 94 |       if(!nrow(all))
 95 |         stop("No source found for: ", name)
 96 |       pkg_update(contributors = Filter(function(x){
 97 |         (x$name != name)
 98 |       }, pkg_read()$contributors))
 99 |       find()
100 |     }
101 |     lockEnvironment(environment(), TRUE)
102 |     structure(environment(), class=c("dpkg-contributors", "jeroen", "environment"))
103 |   }
104 | 
105 |   # Sources object
106 |   pkg_sources <- function(){
107 |     find <- function(name = "", exact = FALSE){
108 |       data <- Filter(function(x){
109 |         if(isTRUE(exact)){
110 |           return(x$name == name)
111 |         } else {
112 |           grepl(name, x$name, fixed = TRUE)
113 |         }
114 |       }, pkg_read()$sources)
115 |       jsonlite:::simplifyDataFrame(data, c("name", "email", "web"), flatten = FALSE, simplifyMatrix = FALSE)
116 |     }
117 |     add <- function(name, email, web){
118 |       out <- list(name = name)
119 |       if(!missing(email))
120 |         out$email = email
121 |       if(!missing(web))
122 |         out$web = web
123 |       pkg_update(sources = c(pkg_read()$sources, list(out)))
124 |       find()
125 |     }
126 |     remove <- function(name){
127 |       stopifnot(is_string(name))
128 |       all <- find(name, exact = TRUE)
129 |       if(!nrow(all))
130 |         stop("No source found for: ", name)
131 |       pkg_update(sources = Filter(function(x){
132 |         (x$name != name)
133 |       }, pkg_read()$sources))
134 |       find()
135 |     }
136 |     lockEnvironment(environment(), TRUE)
137 |     structure(environment(), class=c("datapkg-sources", "jeroen", "environment"))
138 |   }
139 | 
140 |   # Resources object
141 |   pkg_resources <- function(){
142 |     find <- function(name = "", folder = NULL){
143 |       data <- Filter(function(x){
144 |         res_path <- paste0("", x$path)
145 |         res_name <- paste0("", x$name)
146 |         if(length(folder) && !(grepl(paste0("^", folder, "/"), res_path)))
147 |           return(FALSE)
148 |         grepl(name, res_name, fixed = TRUE)
149 |       }, pkg_read()$resources)
150 |       for(i in seq_along(data)){
151 |         data[[i]]$read = function(){
152 |           target <- data[[i]]
153 |           read_data_package(pkg_file(target$path), dialect = target$dialect, hash = target$hash, target$schema)
154 |         }
155 |       }
156 |       jsonlite:::simplifyDataFrame(data, c("name", "path", "format", "read"), flatten = FALSE, simplifyMatrix = FALSE)
157 |     }
158 |     info <- function(name){
159 |       data <- Filter(function(x){
160 |         (x$name == name)
161 |       }, pkg_read()$resources)
162 |       if(!length(data))
163 |         stop("Resource not found: ", name)
164 |       data[[1]]
165 |     }
166 |     add <- function(data, name, folder = "data", format = "csv"){
167 |       stopifnot(is.data.frame(data))
168 |       if(missing(name))
169 |         name <- deparse(substitute(data))
170 |       format <- match.arg(format)
171 |       if(nrow(find(name)))
172 |         stop("Resource with name '", name, "' already exists.")
173 |       file_name <- paste(name, format, sep = ".")
174 |       file_path <- file.path(folder, file_name)
175 |       abs_path <- pkg_file(file_path, exists = FALSE)
176 |       dir.create(pkg_file(folder, exists = FALSE), showWarnings = FALSE)
177 |       write_data <- prepare_data(data)
178 |       readr::write_delim(write_data, abs_path, delim = ";", col_names = TRUE)
179 |       hash <- tools::md5sum(abs_path)
180 |       rec <- base::list(
181 |         name = name,
182 |         path = file_path,
183 |         format = "tsv",
184 |         hash = unname(hash),
185 |         schema = make_schema(data),
186 |         dialect = base::list(
187 |           header = TRUE,
188 |           delimiter = ";"
189 |         )
190 |       )
191 |       pkg_update(resources = c(pkg_read()$resources, base::list(rec)))
192 |       find()
193 |     }
194 |     remove <- function(name, folder = "data"){
195 |       stopifnot(is_string(name))
196 |       target <- info(name)
197 |       unlink(pkg_file(target$path))
198 |       pkg_update(resources = Filter(function(x){
199 |         (x$name != name)
200 |       }, pkg_read()$resources))
201 |       find()
202 |     }
203 |     read <- function(name){
204 |       target <- info(name)
205 |       data_path <- pkg_file(target$path)
206 |       read_data_package(data_path, dialect = target$dialect, hash = target$hash, target$schema)
207 |     }
208 |     lockEnvironment(environment(), TRUE)
209 |     structure(environment(), class=c("datapkg-resources", "jeroen", "environment"))
210 |   }
211 | 
212 |   # Exported methods
213 |   pkg_init()
214 |   self <- local({
215 |     sources <- pkg_sources()
216 |     resources <- pkg_resources()
217 |     contributors <- pkg_contributors()
218 |     name <- function(x){
219 |       if(!missing(x))
220 |         pkg_update(name = x)
221 |       pkg_read()$name
222 |     }
223 |     license <- function(type, url){
224 |       if(!missing(type)){
225 |         if(!missing(url)){
226 |           pkg_update(license = list(
227 |             type = type,
228 |             url = url
229 |           ))
230 |         } else {
231 |           pkg_update(license = type)
232 |         }
233 |       }
234 |       pkg_read()$license
235 |     }
236 |     author <- function(name, email, web){
237 |       if(!missing(name)){
238 |         out <- list(name = name)
239 |         if(!missing(email))
240 |           out$email = email
241 |         if(!missing(web))
242 |           out$web = web
243 |         pkg_update(author = out)
244 |       }
245 |       pkg_read()$author
246 |     }
247 |     description <- function(x){
248 |       if(!missing(x))
249 |         pkg_update(description = x)
250 |       pkg_read()$description
251 |     }
252 |     homepage <- function(x){
253 |       if(!missing(x))
254 |         pkg_update(homepage = x)
255 |       pkg_read()$homepage
256 |     }
257 |     version <- function(x){
258 |       if(!missing(x))
259 |         pkg_update(version = x)
260 |       pkg_read()$version
261 |     }
262 |     json <- function(){
263 |       str <- paste(readLines(pkg_json()), collapse = "\n")
264 |       structure(str, class = "json")
265 |     }
266 |     lockEnvironment(environment(), TRUE)
267 |     structure(environment(), class=c("dpkg", "jeroen", "environment"))
268 |   })
269 | }
270 | 
271 | prepare_data <- function(data){
272 |   for(i in seq_along(data)){
273 |     if(is.logical(data[[i]])){
274 |       out <- ifelse(data[[i]], "true", "false")
275 |       out[is.na(data[[i]])] <- ""
276 |       data[[i]] <- out
277 |     }
278 |   }
279 |   data
280 | }
281 | 
282 | make_schema <- function(data){
283 |   out <- as.list(rep(NA, length(data)))
284 |   for(i in seq_along(data)){
285 |     out[[i]] <- list(
286 |       name = names(data)[i],
287 |       type = get_type(data[[i]])
288 |     )
289 |   }
290 |   list(fields = out)
291 | }
292 | 
293 | from_json <- function(path){
294 |   path <- normalizePath(path, mustWork = TRUE)
295 |   jsonlite::fromJSON(readLines(path, warn = FALSE), simplifyVector = FALSE)
296 | }
297 | 
298 | to_json <- function(x){
299 |   jsonlite::toJSON(x, auto_unbox = TRUE, pretty = TRUE)
300 | }
301 | 
302 | is_string <- function(x){
303 |   is.character(x) && identical(length(x), 1L)
304 | }
305 | 
306 | is_url <- function(x){
307 |   grepl("^[a-zA-Z]+://", x)
308 | }
309 | 
310 | 
311 | # Implements: http://dataprotocols.org/json-table-schema/#schema
312 | coerse_type <- function(x, type){
313 |   switch(type,
314 |          string = as.character(x),
315 |          number = as.numeric(x),
316 |          integer = as.integer(x),
317 |          boolean = parse_bool(x),
318 |          object = lapply(x, from_json),
319 |          array = lapply(x, from_json),
320 |          date = parse_date(x),
321 |          datetime = parse_datetime(x),
322 |          time = paste_time(x),
323 |          as.character(x)
324 |   )
325 | }
326 | 
327 | get_type <- function(x){
328 |   if(inherits(x, "Date")) return("date")
329 |   if(inherits(x, "POSIXt")) return("datetime")
330 |   if(is.character(x)) return("string")
331 |   if(is.integer(x)) return("integer")
332 |   if(is.numeric(x)) return("number")
333 |   if(is.logical(x)) return("boolean")
334 |   return("string")
335 | }
336 | 
337 | parse_bool <- function(x){
338 |   is_true <- (x %in% c("yes", "y", "true", "t", "1"))
339 |   is_false <- (x %in% c("no", "n", "false", "f", "0"))
340 |   is_na <- is.na(x) | (x %in% c("NA", "na", ""))
341 |   is_none <- (!is_true & !is_false & !is_na)
342 |   if(any(is_none))
343 |     stop("Failed to parse boolean values: ", paste(head(x[is_none], 5), collapse = ", "))
344 |   out <- rep(FALSE, length(x))
345 |   out[is_na] <- NA
346 |   out[is_true] <- TRUE
347 |   out
348 | }
349 | 
350 | parse_date <- function(x){
351 |   as.Date(x)
352 | }
353 | 
354 | parse_datetime <- function(x){
355 |   as.POSIXct(x)
356 | }
357 | 
358 | paste_time <- function(x){
359 |   as.POSIXct(x)
360 | }
361 | 
362 | 


--------------------------------------------------------------------------------
/R/old/print.R:
--------------------------------------------------------------------------------
 1 | # A poor man's oo system.
 2 | 
 3 | #' @export
 4 | print.jeroen <- function(x, title = paste0("<", is(x), ">"), indent = 0, ...){
 5 |   ns <- ls(x)
 6 |   if(length(title)) cat(title, "\n")
 7 |   lapply(ns, function(fn){
 8 |     if(is.function(x[[fn]])){
 9 |       cat(format_function(x[[fn]], fn, indent = indent), sep = "\n")
10 |     } else {
11 |       cat(" $", fn, ":\n", sep = "")
12 |       print(x[[fn]], title = NULL, indent = indent + 2L)
13 |     }
14 |   })
15 |   invisible(x)
16 | }
17 | 
18 | #' @export
19 | `$.jeroen` <- function(x, y){
20 |   if(!exists(y, x, inherits = FALSE)){
21 |     stop("Class '", is(x), "' has no field '", y, "'", call. = FALSE)
22 |   }
23 |   get(y, x, inherits = FALSE)
24 | }
25 | 
26 | #' @export
27 | `[[.jeroen` <- `$.jeroen`
28 | 
29 | #' @export
30 | `[.jeroen` <- `$.jeroen`
31 | 
32 | # Pretty format function headers
33 | format_function <- function(fun, name = deparse(substitute(fun)), indent = 0){
34 |   #header <- sub("\\{$", "", capture.output(fun)[1])
35 |   header <- head(deparse(args(fun)), -1)
36 |   header <- sub("^[ ]*", "   ", header)
37 |   header[1] <- sub("^[ ]*function ?", paste0(" $", name), header[1])
38 |   paste(c(rep(" ", indent), header), collapse = "")
39 | }
40 | 
41 | # Override default call argument.
42 | stop <- function(..., call. = FALSE){
43 |   base::stop(..., call. = call.)
44 | }
45 | 
46 | # Override default call argument.
47 | warning <- function(..., call. = FALSE){
48 |   base::warning(..., call. = call.)
49 | }
50 | 


--------------------------------------------------------------------------------
/README-NOT.md:
--------------------------------------------------------------------------------
 1 | ## Data Package in R
 2 | 
 3 | [![Project Status: Inactive – The project has reached a stable, usable state but is no longer being actively developed; support/maintenance will be provided as time allows.](http://www.repostatus.org/badges/latest/inactive.svg)](http://www.repostatus.org/#inactive)
 4 | 
 5 | Data-packages is a [standard format](http://frictionlessdata.io/data-packages/) for describing meta-data for a collection of datasets. The package `datapkg` provides convenience functions for retrieving and parsing data packages in R. To install in R:
 6 | 
 7 | ```r
 8 | library(devtools)
 9 | install_github("hadley/readr")
10 | install_github("ropenscilabs/jsonvalidate")
11 | install_github("ropenscilabs/datapkg")
12 | ```
13 | 
14 | ## Reading data
15 | 
16 | The `datapkg_read` function retrieves and parses data packages from a local or remote sources. A few example packages are available from the [datasets](https://github.com/datasets) and [testsuite-py](https://github.com/frictionlessdata/testsuite-py) repositories. The path needs to point to a directory on disk or git remote or URL containing the root of the data package.
17 | 
18 | ```r
19 | # Load client
20 | library(datapkg)
21 | 
22 | # Clone via git
23 | cities <- datapkg_read("git://github.com/datasets/world-cities")
24 | 
25 | # Same data but download over http
26 | cities <- datapkg_read("https://raw.githubusercontent.com/datasets/world-cities/master")
27 | ```
28 | 
29 | The output object contains data and metadata from the data-package, with actual datasets inside the `$data` field.
30 | 
31 | ```r
32 | # Package info
33 | print(cities)
34 | 
35 | # Open actual data in RStudio Viewer
36 | View(cities$data[[1]])
37 | ```
38 | 
39 | In the case of multiple datasets, each one is either referenced by index or, if available, by name (names are optional in data packages).
40 | 
41 | ```r
42 | # Package with many datasets
43 | euribor <- datapkg_read("https://raw.githubusercontent.com/datasets/euribor/master")
44 | 
45 | # List datasets in this package
46 | names(euribor$data)
47 | View(euribor$data[[1]])
48 | ```
49 | 
50 | ## Writing data
51 | 
52 | The package also has basic functionality to save a data frame into a data package and 
53 | update the `datapackage.json` file accordingly.
54 | 
55 | ```r
56 | # Create new data package
57 | pkgdir <- tempfile()
58 | datapkg_write(mtcars, path = pkgdir)
59 | datapkg_write(iris, path = pkgdir)
60 | 
61 | # Read it back
62 | mypkg <- datapkg_read(pkgdir)
63 | print(mypkg$data$mtcars)
64 | ```
65 | 
66 | From here you can modify the `datapackage.json` file with other metadata.
67 | 
68 | ## Status
69 | 
70 | This package is work in progress. Current open issues:
71 | 
72 |  - Make `readr` parse `0`/`1` values for booleans: [PR#406](https://github.com/hadley/readr/pull/406)
73 |  - Support "year only" dates (`%Y`). Not sure if this constituates a valid date actually: [PR#407](https://github.com/hadley/readr/pull/407)
74 |  - R and `readr` require to specify which strings are interepreted as missing values. Default are empty string `""` and `NA`. A similar property needs to be defined in the spec.
75 |  - It is unclear what to do with parsing errors, or if the fields in `datapackage.json` does not match the csv data. Examples: [s-and-p-500](https://github.com/datasets/s-and-p-500) and [currency-codes](https://raw.githubusercontent.com/frictionlessdata/testsuite-py/master/datasets/currency-codes)
76 | 
77 | Features:
78 | 
79 |  - Writing data packages from data frames. 
80 | 
81 | [![rOpenSci](http://ropensci.org/public_images/github_footer.png)](http://ropensci.org)
82 | [![OKFN](http://assets.okfn.org/p/labs/img/logo.png)](https://okfn.org)
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # datapkg
2 | 
3 | [![Project Status: Abandoned](https://www.repostatus.org/badges/latest/abandoned.svg)](https://www.repostatus.org/#abandoned)
4 | 
5 | This repository has been archived. The former README is now in [README-NOT.md](README-NOT.md).
6 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   ps: Bootstrap
12 | 
13 | # Adapt as necessary starting from here
14 | 
15 | build_script:
16 |   - travis-tool.sh install_deps
17 | 
18 | test_script:
19 |   - travis-tool.sh run_tests
20 | 
21 | on_failure:
22 |   - 7z a failure.zip *.Rcheck\*
23 |   - appveyor PushArtifact failure.zip
24 | 
25 | artifacts:
26 |   - path: '*.Rcheck\**\*.log'
27 |     name: Logs
28 | 
29 |   - path: '*.Rcheck\**\*.out'
30 |     name: Logs
31 | 
32 |   - path: '*.Rcheck\**\*.fail'
33 |     name: Logs
34 | 
35 |   - path: '*.Rcheck\**\*.Rout'
36 |     name: Logs
37 | 
38 |   - path: '\*_*.tar.gz'
39 |     name: Bits
40 | 
41 |   - path: '\*_*.zip'
42 |     name: Bits
43 | 


--------------------------------------------------------------------------------
/datapkg.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/inst/tabular-data-package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$schema": "http://json-schema.org/draft-04/schema#",
  3 |   "title": "Tabular Data Package",
  4 |   "description": "Tabular Data Package is a simple specification for data access and delivery of tabular data.",
  5 |   "type": "object",
  6 |   "required": [ "name", "resources" ],
  7 |   "properties": {
  8 |     "name": {
  9 |       "$ref": "definitions.json#/define/name",
 10 |       "propertyOrder": 10
 11 |     },
 12 |     "title": {
 13 |       "$ref": "definitions.json#/define/title",
 14 |       "propertyOrder": 20
 15 |     },
 16 |     "description": {
 17 |       "$ref": "definitions.json#/define/description",
 18 |       "format": "textarea",
 19 |       "propertyOrder": 30
 20 |     },
 21 |     "homepage": {
 22 |       "$ref": "definitions.json#/define/homepage",
 23 |       "propertyOrder": 40
 24 |     },
 25 |     "version": {
 26 |       "$ref": "definitions.json#/define/version",
 27 |       "propertyOrder": 50
 28 |     },
 29 |     "license": {
 30 |       "$ref": "definitions.json#/define/license",
 31 |       "propertyOrder": 60
 32 |     },
 33 |     "author": {
 34 |       "$ref": "definitions.json#/define/author",
 35 |       "propertyOrder": 70
 36 |     },
 37 |     "contributors": {
 38 |       "$ref": "definitions.json#/define/contributors",
 39 |       "propertyOrder": 80,
 40 |       "options": { "hidden": true }
 41 |     },
 42 |     "resources": {
 43 |       "title": "Resources",
 44 |       "description": "The data resources that this package describes.",
 45 |       "type": "array",
 46 |       "propertyOrder": 90,
 47 |       "minItems": 0,
 48 |       "items": {
 49 |         "type": "object",
 50 |         "properties": {
 51 |           "name": {
 52 |             "$ref": "definitions.json#/define/name",
 53 |             "propertyOrder": 10
 54 |           },
 55 |           "title": {
 56 |             "$ref": "definitions.json#/define/title",
 57 |             "propertyOrder": 20
 58 |           },
 59 |           "description": {
 60 |             "$ref": "definitions.json#/define/description",
 61 |             "propertyOrder": 30,
 62 |             "format": "textarea"
 63 |           },
 64 |           "schema": {
 65 |             "$ref": "definitions.json#/define/schema",
 66 |             "propertyOrder": 40
 67 |           },
 68 |           "url": {
 69 |             "$ref": "definitions.json#/define/url",
 70 |             "propertyOrder": 50
 71 |           },
 72 |           "path": {
 73 |             "$ref": "definitions.json#/define/path",
 74 |             "propertyOrder": 60
 75 |           },
 76 |           "data": {
 77 |             "$ref": "definitions.json#/define/data",
 78 |             "propertyOrder": 70
 79 |           },
 80 |           "format": {
 81 |             "$ref": "definitions.json#/define/format",
 82 |             "propertyOrder": 80
 83 |           },
 84 |           "mediatype": {
 85 |             "$ref": "definitions.json#/define/mediatype",
 86 |             "propertyOrder": 90
 87 |           },
 88 |           "encoding": {
 89 |             "$ref": "definitions.json#/define/encoding",
 90 |             "propertyOrder": 100
 91 |           },
 92 |           "bytes": {
 93 |             "$ref": "definitions.json#/define/bytes",
 94 |             "propertyOrder": 110,
 95 |             "options": { "hidden": true }
 96 |           },
 97 |           "hash": {
 98 |             "$ref": "definitions.json#/define/hash",
 99 |             "propertyOrder": 120,
100 |             "options": { "hidden": true }
101 |           },
102 |           "dialect": {
103 |             "$ref": "definitions.json#/define/dialect",
104 |             "propertyOrder": 130,
105 |             "options": { "hidden": true }
106 |           },
107 |           "sources": {
108 |             "$ref": "definitions.json#/define/sources",
109 |             "propertyOrder": 140,
110 |             "options": { "hidden": true }
111 |           },
112 |           "license": {
113 |             "$ref": "definitions.json#/define/license",
114 |             "description": "The license under which the resource is published.",
115 |             "propertyOrder": 150,
116 |             "options": { "hidden": true }
117 |           }
118 |         },
119 |         "anyOf": [
120 |           { "title": "url required", "required": ["url"] },
121 |           { "title": "path required", "required": ["path"] },
122 |           { "title": "data required", "required": ["data"] }
123 |         ]
124 |       }
125 |     },
126 |     "keywords": {
127 |       "$ref": "definitions.json#/define/keywords",
128 |       "propertyOrder": 100
129 |     },
130 |     "sources": {
131 |       "$ref": "definitions.json#/define/sources",
132 |       "propertyOrder": 110,
133 |       "options": { "hidden": true }
134 |     },
135 |     "image": {
136 |       "$ref": "definitions.json#/define/image",
137 |       "propertyOrder": 120,
138 |       "options": { "hidden": true }
139 |     },
140 |     "base": {
141 |       "$ref": "definitions.json#/define/base",
142 |       "propertyOrder": 130,
143 |       "options": { "hidden": true }
144 |     },
145 |     "dataDependencies": {
146 |       "$ref": "definitions.json#/define/dataDependencies",
147 |       "propertyOrder": 140,
148 |       "options": { "hidden": true }
149 |     }
150 |   }
151 | }
152 | 


--------------------------------------------------------------------------------
/man/datapackage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datapkg_read.R, R/datapkg_validate.R, R/datapkg_write.R
 3 | \name{datapackage}
 4 | \alias{datapackage}
 5 | \alias{datapkg}
 6 | \alias{datapkg_read}
 7 | \alias{datapkg_validate}
 8 | \alias{datapkg_write}
 9 | \title{Read/write data-package}
10 | \usage{
11 | datapkg_read(path = getwd())
12 | 
13 | datapkg_validate(path = getwd())
14 | 
15 | datapkg_write(data, name, path = getwd())
16 | }
17 | \arguments{
18 | \item{path}{file path or URL to the data package directory}
19 | 
20 | \item{data}{a data frame to be added to the package}
21 | 
22 | \item{name}{what to name this dataset}
23 | }
24 | \description{
25 | Read and write data frames to/from 'data-package' format. For reading
26 | supported paths are disk, http or git. For writing only disk is supported.
27 | }
28 | \examples{
29 | # Create new data package
30 | pkgdir <- tempfile()
31 | datapkg_write(mtcars, path = pkgdir)
32 | datapkg_write(iris, path = pkgdir)
33 | 
34 | # Read it back
35 | mypkg <- datapkg_read(pkgdir)
36 | print(mypkg$data$mtcars)
37 | 
38 | # Clone package with git:
39 | cities <- datapkg_read("git://github.com/datasets/world-cities")
40 | 
41 | # Read over http
42 | euribor <- datapkg_read("https://raw.githubusercontent.com/datasets/euribor/master")
43 | }
44 | \references{
45 | \url{http://frictionlessdata.io/data-packages}, \url{https://github.com/datasets}
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------