├── LICENSE ├── .Rbuildignore ├── .gitignore ├── NAMESPACE ├── .travis.yml ├── rdat.Rproj ├── DESCRIPTION ├── R ├── jeroen.R └── dat.R ├── man └── dat.Rd └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2015 2 | COPYRIGHT HOLDER: Jeroen Ooms; Karthik Ram 3 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.dat 4 | ^\.travis.yml 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.o 3 | *.dll 4 | .DS_Store 5 | .Rproj.user 6 | .Rhistory 7 | src/*.a 8 | src/*.o 9 | .dat 10 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.1.1): do not edit by hand 2 | 3 | S3method("$",jeroen) 4 | S3method("[",jeroen) 5 | S3method("[[",jeroen) 6 | S3method(print,dat) 7 | S3method(print,jeroen) 8 | export(dat) 9 | importFrom(jsonlite,stream_in) 10 | importFrom(jsonlite,stream_out) 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | 3 | sudo: false 4 | 5 | warnings_are_errors: true 6 | 7 | apt_packages: 8 | - libcurl4-openssl-dev 9 | - npm 10 | - git 11 | 12 | before_install: 13 | - ( cd .. ; git clone https://github.com/maxogden/dat; cd dat; npm install; npm link ) 14 | 15 | notifications: 16 | email: 17 | on_success: change 18 | on_failure: change 19 | -------------------------------------------------------------------------------- /rdat.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,namespace 22 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rdat 2 | Type: Package 3 | Title: Bindings to the 'dat' Data-Versioning System 4 | Version: 0.2 5 | Date: 2015-05-13 6 | Authors@R: c( 7 | person("Jeroen", "Ooms", email = "jeroen.ooms@stat.ucla.edu", role = c("aut", "cre")), 8 | person("Karthik", "Ram", email = "karthik.ram@gmail.com", role = "aut"), 9 | person("Karissa", "McKelvey", role = "aut")) 10 | URL: https://github.com/ropensci/rdat/ https://github.com/maxogden/dat/ 11 | Description: Programmatic interface to the Dat system for data versioning 12 | and replication. 13 | License: MIT + file LICENSE 14 | Imports: jsonlite 15 | -------------------------------------------------------------------------------- /R/jeroen.R: -------------------------------------------------------------------------------- 1 | # A poor man's oo system. 2 | 3 | #' @export 4 | print.jeroen <- function(x, title = paste0("<", is(x), ">"), ...){ 5 | ns <- ls(x) 6 | cat(title, "\n") 7 | lapply(ns, function(fn){ 8 | cat(format_function(x[[fn]], fn), sep = "\n") 9 | }) 10 | } 11 | 12 | #' @export 13 | `$.jeroen` <- function(x, y){ 14 | if(!exists(y, x, inherits = FALSE)){ 15 | stop("Class '", is(x), "' has no field '", y, "'", call. = FALSE) 16 | } 17 | get(y, x, inherits = FALSE) 18 | } 19 | 20 | #' @export 21 | `[[.jeroen` <- `$.jeroen` 22 | 23 | #' @export 24 | `[.jeroen` <- `$.jeroen` 25 | 26 | # Pretty format function headers 27 | format_function <- function(fun, name = deparse(substitute(fun))){ 28 | #header <- sub("\\{$", "", capture.output(fun)[1]) 29 | header <- head(deparse(args(fun)), -1) 30 | header <- sub("^[ ]*", " ", header) 31 | header[1] <- sub("^[ ]*function ?", paste0(" $", name), header[1]) 32 | header 33 | } 34 | 35 | # Override default call argument. 36 | stop <- function(..., call. = FALSE){ 37 | base::stop(..., call. = call.) 38 | } 39 | 40 | # Override default call argument. 41 | warning <- function(..., call. = FALSE){ 42 | base::warning(..., call. = call.) 43 | } 44 | -------------------------------------------------------------------------------- /man/dat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/dat.R 3 | \name{dat} 4 | \alias{dat} 5 | \title{Dat repository} 6 | \usage{ 7 | dat(dataset = "test", path = tempdir(), remote = NULL, dat = "dat", 8 | verbose = FALSE) 9 | } 10 | \arguments{ 11 | \item{dataset}{name of the dat 'dataset' (namespace)} 12 | 13 | \item{path}{directory of the dat repository} 14 | 15 | \item{remote}{path or url to clone form. Default will init a new repo.} 16 | 17 | \item{dat}{name of the 'dat' executable (possibly with path)} 18 | 19 | \item{verbose}{gives some more output} 20 | } 21 | \description{ 22 | Create and modify a dat repository. 23 | } 24 | \examples{ 25 | # init a temporary repo 26 | repo <- dat("cars") 27 | 28 | # insert some data 29 | repo$insert(cars[1:20,]) 30 | v1 <- repo$status()$version 31 | v1 32 | 33 | # insert some more data 34 | repo$insert(cars[21:25,]) 35 | v2 <- repo$status()$version 36 | v2 37 | 38 | # get the data 39 | data1 <- repo$get(v1) 40 | data2 <- repo$get(v2) 41 | diff <- repo$diff(v1, v2) 42 | diff$key 43 | 44 | # create fork 45 | repo$checkout(v1) 46 | repo$insert(cars[26:30,]) 47 | repo$forks() 48 | v3 <- repo$status()$version 49 | 50 | # go back 51 | repo$checkout(v2) 52 | repo$get() 53 | 54 | # store binary attachements 55 | repo$write(serialize(iris, NULL), "iris") 56 | unserialize(repo$read("iris")) 57 | 58 | # Create another repo 59 | dir.create(newdir <- tempfile()) 60 | repo2 <- dat("cars", path = newdir, remote = repo$path()) 61 | repo2$forks() 62 | repo2$get() 63 | 64 | # Create a third repo 65 | dir.create(newdir <- tempfile()) 66 | repo3 <- dat("cars", path = newdir, remote = repo$path()) 67 | 68 | # Sync 2 with 3 via remote (1) 69 | repo2$insert(cars[31:40,]) 70 | repo2$push() 71 | repo3$pull() 72 | 73 | # Verify that repositories are in sync 74 | mydata2 <- repo2$get() 75 | mydata3 <- repo3$get() 76 | stopifnot(all.equal(mydata2, mydata3)) 77 | } 78 | 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Attention 2 | 3 | This repository used to contain an R wrapper for an old version of `dat`. Meanwhile dat has changed a lot so this no longer works. 4 | 5 | [![Build Status](https://travis-ci.org/ropensci/rdat.svg)](https://travis-ci.org/ropensci/rdat) 6 | 7 | # rdat 8 | [![dat](http://i.imgur.com/1iD2dEx.png)](http://dat-data.com/) 9 | 10 | _Software is in alpha stage. Not yet ready for use with real world data_ 11 | 12 | The `rdat` package provides an R wrapper to the [Dat project](https://github.com/maxogden/). Dat (`git` for data) is a framework for data versioning, replication and synchronisation, see [dat-data.com](http://dat-data.com/). 13 | 14 | 15 | 16 | ## Installation instructions 17 | 18 | __Prerequisites:__ Instructions below require [R](http://cran.rstudio.com/), [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) and [nodejs (npm)](https://nodejs.org/download/). 19 | 20 | 21 | ### Installing `dat` stable 22 | 23 | Install the latest stable version from npm: 24 | 25 | ``` 26 | sudo npm install -g dat 27 | ``` 28 | 29 | See [instructions](https://www.npmjs.com/package/dat#installation) for more details. 30 | 31 | 32 | ### Installing `dat` development version 33 | 34 | If you have not already installed `dat` grab it from github: 35 | 36 | ``` 37 | git clone https://github.com/maxogden/dat ~/dat 38 | cd ~/dat 39 | npm install . 40 | sudo npm link 41 | ``` 42 | 43 | To update an existing copy of `dat` 44 | 45 | ``` 46 | cd ~/dat 47 | git pull 48 | rm -Rf node_modules 49 | npm install . 50 | ``` 51 | 52 | ### Installing `rdat` 53 | 54 | Then install the R package: 55 | 56 | ```r 57 | library(devtools) 58 | install_github("ropensci/rdat") 59 | ``` 60 | 61 | Run through the examples to verify that everything works: 62 | 63 | ```r 64 | library(rdat) 65 | example(dat) 66 | ``` 67 | 68 | ## API 69 | 70 | This api is experimental and hasn't been finalized or implemented. Stay tuned for updates 71 | 72 | ### init 73 | 74 | When no `remote` is specified, `dat()` will init a new repository: 75 | 76 | ```r 77 | repo <- dat("cars", path = getwd()) 78 | ``` 79 | 80 | ### insert 81 | 82 | Inserts data from a data frame and gets the dat version key 83 | 84 | ```r 85 | # insert some data 86 | repo$insert(cars[1:20,]) 87 | v1 <- repo$status()$version 88 | v1 89 | ``` 90 | Inserts more data, get a new version key 91 | 92 | ```r 93 | # insert more data 94 | repo$insert(cars[21:25,]) 95 | v2 <- repo$status()$version 96 | v2 97 | 98 | ``` 99 | 100 | ### get 101 | 102 | Retreive particular versions of the dataset from the key. 103 | 104 | ```r 105 | data1 <- repo$get(v1) 106 | data2 <- repo$get(v2) 107 | ``` 108 | 109 | ### diff 110 | 111 | List changes in between versions 112 | 113 | ```r 114 | diff <- repo$diff(v1, v2) 115 | diff$key 116 | ``` 117 | 118 | ### branching 119 | 120 | Fork a dataset from a particular version into a new branch. 121 | 122 | ```r 123 | # create fork 124 | repo$checkout(v1) 125 | repo$insert(cars[40:42,]) 126 | repo$forks() 127 | v3 <- repo$status()$version 128 | ``` 129 | 130 | 131 | ### checkout 132 | 133 | Checkout the data at a particular version. 134 | 135 | ```r 136 | # go back to v2 137 | repo$checkout(v2) 138 | repo$get() 139 | ``` 140 | 141 | ### binary data 142 | 143 | Save binary data (files) as attachements to the dataset. 144 | 145 | ```r 146 | # store binary attachements 147 | repo$write(serialize(iris, NULL), "iris") 148 | unserialize(repo$read("iris")) 149 | ``` 150 | 151 | 152 | ### clone 153 | 154 | ```r 155 | # Create another repo 156 | dir.create(newdir <- tempfile()) 157 | repo2 <- dat("cars", path = newdir, remote = repo$path()) 158 | repo2$forks() 159 | repo2$get() 160 | ``` 161 | 162 | Specifying a `remote` (path or url) to clone an existing repo. In this case we clone the previous repo into a new location. 163 | 164 | ### push and pull 165 | 166 | Lets make yet another clone of our original repository 167 | 168 | ```r 169 | # Create a third repo 170 | dir.create(newdir <- tempfile()) 171 | repo3 <- dat("cars", path = newdir, remote = repo$path()) 172 | ``` 173 | 174 | Add data in repo2 and then `push` it back to repo1. 175 | 176 | 177 | ```r 178 | # Add some data and push to origin 179 | repo2$insert(cars[31:40,]) 180 | repo2$push() 181 | ``` 182 | 183 | Then `pull` data back into repo3. 184 | 185 | ```r 186 | # sync data with origin 187 | repo3$pull() 188 | 189 | # Verify that repositories are in sync 190 | mydata2 <- repo2$get() 191 | mydata3 <- repo3$get() 192 | all.equal(mydata2, mydata3) 193 | ``` 194 | 195 | 196 | [![ropensci footer](http://ropensci.org/public_images/github_footer.png)](http://ropensci.org) 197 | 198 | 199 | -------------------------------------------------------------------------------- /R/dat.R: -------------------------------------------------------------------------------- 1 | #' Dat repository 2 | #' 3 | #' Create and modify a dat repository. 4 | #' 5 | #' @export 6 | #' @param dataset name of the dat 'dataset' (namespace) 7 | #' @param remote path or url to clone form. Default will init a new repo. 8 | #' @param path directory of the dat repository 9 | #' @param dat name of the 'dat' executable (possibly with path) 10 | #' @param verbose gives some more output 11 | #' @importFrom jsonlite stream_in stream_out 12 | #' @examples # init a temporary repo 13 | #' repo <- dat("cars") 14 | #' 15 | #' # insert some data 16 | #' repo$insert(cars[1:20,]) 17 | #' v1 <- repo$status()$version 18 | #' v1 19 | #' 20 | #' # insert some more data 21 | #' repo$insert(cars[21:25,]) 22 | #' v2 <- repo$status()$version 23 | #' v2 24 | #' 25 | #' # get the data 26 | #' data1 <- repo$get(v1) 27 | #' data2 <- repo$get(v2) 28 | #' diff <- repo$diff(v1, v2) 29 | #' diff$key 30 | #' 31 | #' # create fork 32 | #' repo$checkout(v1) 33 | #' repo$insert(cars[26:30,]) 34 | #' repo$forks() 35 | #' v3 <- repo$status()$version 36 | #' 37 | #' # go back 38 | #' repo$checkout(v2) 39 | #' repo$get() 40 | #' 41 | #' # store binary attachements 42 | #' repo$write(serialize(iris, NULL), "iris") 43 | #' unserialize(repo$read("iris")) 44 | #' 45 | #' # Create another repo 46 | #' dir.create(newdir <- tempfile()) 47 | #' repo2 <- dat("cars", path = newdir, remote = repo$path()) 48 | #' repo2$forks() 49 | #' repo2$get() 50 | #' 51 | #' # Create a third repo 52 | #' dir.create(newdir <- tempfile()) 53 | #' repo3 <- dat("cars", path = newdir, remote = repo$path()) 54 | #' 55 | #' # Sync 2 with 3 via remote (1) 56 | #' repo2$insert(cars[31:40,]) 57 | #' repo2$push() 58 | #' repo3$pull() 59 | #' 60 | #' # Verify that repositories are in sync 61 | #' mydata2 <- repo2$get() 62 | #' mydata3 <- repo3$get() 63 | #' stopifnot(all.equal(mydata2, mydata3)) 64 | dat <- function(dataset = "test", path = tempdir(), remote = NULL, dat = "dat", verbose = FALSE){ 65 | 66 | # Holds dir with the dat repository 67 | dat_path <- normalizePath(path) 68 | repo <- file.path(dat_path, "data.dat") 69 | 70 | # Run a command in the dat dir 71 | in_datdir <- function(...){ 72 | oldpath <- getwd() 73 | on.exit(setwd(oldpath)) 74 | setwd(dat_path) 75 | eval(...) 76 | } 77 | 78 | # Executes a dat command and returs stdout. 79 | dat_command <- function(args){ 80 | in_datdir({ 81 | tmp1 <- tempfile() 82 | on.exit(unlink(tmp1), add=TRUE) 83 | tmp2 <- tempfile() 84 | on.exit(unlink(tmp2), add=TRUE) 85 | err <- system2(dat, args, stdout = tmp1, stderr = tmp2) 86 | if(err) 87 | stop(readLines(tmp2), " (", err, ")") 88 | if(file.exists(tmp2) && length(txt2 <- readLines(tmp2))) 89 | message(txt2) 90 | if(file.exists(tmp1)) 91 | readLines(tmp1) 92 | }) 93 | } 94 | 95 | # Stream ndjson data from dat in R 96 | dat_stream_in <- function(args){ 97 | args <- paste(args, collapse = " ") 98 | in_datdir({ 99 | con <- pipe(paste(dat, args), open = "r") 100 | on.exit({ 101 | res <- close(con) 102 | if(length(res) && res) stop("dat error ", res) 103 | }) 104 | jsonlite::stream_in(con, verbose = verbose) 105 | }) 106 | } 107 | 108 | # Stream ndjson into dat 109 | dat_stream_out <- function(data, args){ 110 | args <- paste(args, collapse = " ") 111 | in_datdir({ 112 | con <- pipe(paste(dat, args), open = "w") 113 | on.exit({ 114 | res <- close(con) 115 | if(length(res) && res) stop("dat error ", res) 116 | }) 117 | invisible(jsonlite::stream_out(data, con, verbose = verbose)) 118 | }) 119 | } 120 | 121 | # Stream binary data from dat in R 122 | dat_read_bin <- function(args){ 123 | args <- paste(args, collapse = " ") 124 | in_datdir({ 125 | con <- pipe(paste(dat, args), open = "rb") 126 | on.exit({ 127 | res <- close(con) 128 | if(length(res) && res) stop("dat error ", res) 129 | }) 130 | readBin(con, raw(), n = 1e8) 131 | }) 132 | } 133 | 134 | # Write binary data to dat 135 | dat_write_bin <- function(data, args){ 136 | args <- paste(args, collapse = " ") 137 | in_datdir({ 138 | con <- pipe(paste(dat, args), open = "wb") 139 | on.exit({ 140 | res <- close(con) 141 | if(length(res) && res) stop("dat error ", res) 142 | }) 143 | invisible(writeBin(data, con)) 144 | }) 145 | } 146 | 147 | # Initiate the dat repository 148 | if(is.null(remote)){ 149 | dat_command("init --no-prompt") 150 | } else { 151 | dat_command(c("clone", remote, ".")) 152 | } 153 | 154 | # Show dat version 155 | if(verbose) 156 | message("This is dat version ", dat_command("--version")) 157 | 158 | # Control object 159 | self <- local({ 160 | 161 | insert <- function(data){ 162 | stopifnot(is.data.frame(data)) 163 | invisible(dat_stream_out(data, c("-d", dataset, "import -"))) 164 | } 165 | 166 | write <- function(bin, filename){ 167 | stopifnot(is.raw(bin)) 168 | invisible(dat_write_bin(bin, c("write", filename, "-d", dataset, "-"))) 169 | } 170 | 171 | read <- function(filename, version = NULL){ 172 | if (is.null(version)) { 173 | dat_read_bin(c("read -d", dataset, filename)) 174 | } else { 175 | dat_read_bin(c("read -d", dataset, "-c", version, filename)) 176 | } 177 | } 178 | 179 | get <- function(version = NULL){ 180 | out <- if(is.null(version)){ 181 | dat_stream_in(c("export --full -d", dataset)) 182 | } else { 183 | dat_stream_in(c("export --full -d", dataset, "-c", version)) 184 | } 185 | frame <- out$value 186 | frame$key <- out$key 187 | as.data.frame(frame) 188 | } 189 | 190 | status <- function() 191 | jsonlite::fromJSON(dat_command("status --json")) 192 | 193 | checkout <- function(key) 194 | invisible(dat_command(c("checkout", key))) 195 | 196 | forks <- function() 197 | jsonlite::fromJSON(dat_command("forks --json"))$forks 198 | 199 | diff <- function(version1, version2 = NULL){ 200 | if(is.null(version2)){ 201 | dat_stream_in(c("diff --json", version1)) 202 | } else { 203 | dat_stream_in(c("diff --json", version1, version2)) 204 | } 205 | } 206 | 207 | log <- function() 208 | dat_stream_in("log") 209 | 210 | path <- function() 211 | return(dat_path) 212 | 213 | pull <- function(){ 214 | if(is.null(remote)){ 215 | stop("This repository was not created from a remote.") 216 | } else { 217 | jsonlite::fromJSON(dat_command(c("pull --json", remote))) 218 | } 219 | } 220 | 221 | push <- function(){ 222 | if(is.null(remote)){ 223 | stop("This repository was not created from a remote.") 224 | } else { 225 | jsonlite::fromJSON(dat_command(c("push --json", remote))) 226 | } 227 | } 228 | 229 | environment(); 230 | }) 231 | 232 | # Create the object 233 | lockEnvironment(self, TRUE) 234 | structure(self, class=c("dat", "jeroen", class(self))) 235 | } 236 | 237 | #' @export 238 | print.dat <- function(x, ...){ 239 | print.jeroen(x, title = paste0(" '", x$path(), "'")) 240 | } 241 | --------------------------------------------------------------------------------