├── cyl.csv ├── .gitignore ├── LICENSE-MIT.txt ├── mtcars.csv ├── dplyr ├── README.Rmd └── README.md /cyl.csv: -------------------------------------------------------------------------------- 1 | cyl,description 2 | 4,four 3 | 6,six 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rhistory 2 | .Rproj.user 3 | *.Rproj 4 | .DS_Store 5 | *.swp 6 | mtcars.csv 7 | -------------------------------------------------------------------------------- /LICENSE-MIT.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 mikefc@coolbutuseless.com 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /mtcars.csv: -------------------------------------------------------------------------------- 1 | "mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" 2 | 21,6,160,110,3.9,2.62,16.46,0,1,4,4 3 | 21,6,160,110,3.9,2.875,17.02,0,1,4,4 4 | 22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 5 | 21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 6 | 18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 7 | 18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 8 | 14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 9 | 24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 10 | 22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 11 | 19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 12 | 17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 13 | 16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 14 | 17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 15 | 15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 16 | 10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 17 | 10.4,8,460,215,3,5.424,17.82,0,0,3,4 18 | 14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 19 | 32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 20 | 30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 21 | 33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 22 | 21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 23 | 15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 24 | 15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 25 | 13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 26 | 19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 27 | 27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 28 | 26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 29 | 30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 30 | 15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 31 | 19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 32 | 15,8,301,335,3.54,3.57,14.6,0,1,5,8 33 | 21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 34 | -------------------------------------------------------------------------------- /dplyr: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | # 5 | # dplyr-cli [mikefc@coolbutuseless.com] 6 | # 7 | # Run chains of dplyr commands in the terminal 8 | # 9 | # * run any dplyr command of the form "dplyr::verb(.data, code)" 10 | # * can set input file to be a CSV or RDS file 11 | # * if reading data from stdin (the default), assume that it is CSV format 12 | # 13 | # History 14 | # v0.1.0 2020-04-20 Initial release 15 | # v0.1.1 2020-04-21 Switch to Rscript executable 16 | # v0.1.2 2020-04-21 Support for joins 17 | # v0.1.3 2020-04-22 More robust tmpdir handling 18 | # v0.1.4 2022-01-23 Fix for newer read_csv handling 19 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | suppressMessages({ 22 | library(docopt) 23 | library(dplyr) 24 | }) 25 | 26 | 27 | 28 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 | # configuration for docopt 30 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31 | doc <- "dplyr-cli 32 | 33 | Usage: 34 | dplyr [--file=fn] [--csv | -c] [--verbose | -v] [...] 35 | dplyr -h | --help 36 | 37 | Options: 38 | -h --help show this help text 39 | -f FILE --file=FILE input CSV or RDS filename. If reading from stdin, assumes CSV [default: stdin] 40 | -c --csv write output to stdout in CSV format (instead of default RDS file) 41 | -v --verbose be verbose" 42 | 43 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 44 | # Print help if requested 45 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | opt <- docopt(doc) 47 | 48 | 49 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | # Helper function to reading data from a file based upon its extension 51 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 52 | read_data <- function(input) { 53 | if (!file.exists(input)) { 54 | stop("[input]: not found: ", input) 55 | } 56 | ext <- tolower(tools::file_ext(input)) 57 | switch( 58 | ext, 59 | csv = readr::read_csv(input, col_types = readr::cols()), 60 | rds = readRDS(input), 61 | stop("Unknown file extension: ", ext) 62 | ) 63 | } 64 | 65 | 66 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 67 | # Helper function 68 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | "%||%" <- function(x, y) { 70 | if (is.null(x)) { 71 | y 72 | } else { 73 | x 74 | } 75 | } 76 | 77 | 78 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 79 | # What did the user pass in to stdin? 80 | # Two cases handled: 81 | # - if stdin is only a single element, assume it's a filename 82 | # - only reading of RDS and CSV files currently supported. 83 | # (easy to add more) 84 | # - otherwise assume that the user has echoed the contents of 85 | # a CSV file and piped it into this command 86 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 87 | if (opt$verbose) message("------------------------------------------") 88 | input <- readLines(opt$file) 89 | 90 | if (length(input) == 1) { 91 | input <- trimws(input) 92 | if (opt$verbose) message("[input] looks like an existing file: ", input) 93 | .data <- read_data(input) 94 | } else { 95 | if (opt$verbose) message("[input] reading CSV from stdin") 96 | .data <- readr::read_csv(I(input), col_types = readr::cols()) 97 | } 98 | 99 | 100 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 | # Run the command + code 102 | # If the user is demanding CSV or kable output, then 103 | # set the result to be the initial data and print it out 104 | # later 105 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 106 | if (opt$command %in% c('csv', 'kable')) { 107 | opt$csv <- TRUE 108 | res <- .data 109 | } else if (opt$command %in% c('left_join', 'right_join', 'full_join', 'anti_join', 'semi_join', 'inner_join')) { 110 | rhs_filename <- opt$code[1] 111 | .rhs <- read_data(rhs_filename) 112 | code <- paste(opt$code[-1], collapse=" ") 113 | command <- sprintf("dplyr::%s(.data, .rhs, %s)", opt$command, code) 114 | if (opt$verbose) message("command: ", command) 115 | 116 | # Avoid printing stuff about the join "by" 117 | suppressMessages({ 118 | res <- eval(parse(text = command)) 119 | }) 120 | } else { 121 | code <- paste(opt$code, collapse="") 122 | command <- sprintf("dplyr::%s(.data, %s)", opt$command, code) 123 | if (opt$verbose) message("command: ", command) 124 | 125 | res <- eval(parse(text = command)) 126 | } 127 | 128 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 129 | # Output options: 130 | # - if command == 'kable' then use knitr::kable to output 131 | # - if command == 'csv' or opt$csv is true, then dump 132 | # CSV strings to the terminal. User can redirect how 133 | # they want 134 | # - otherwise save to an RDS file and echo to stdout such 135 | # that another command can use it. 136 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 137 | if (opt$command == 'kable') { 138 | cat(knitr::kable(.data), sep = "\n") 139 | } else if (opt$csv) { 140 | if (opt$verbose) message(">>>> output CSV to stdout") 141 | write.csv(res, row.names = FALSE) 142 | } else { 143 | # Explicitly set a known, unchanging tempdir. 144 | # A temporary directory created via 'tempdir()' is valid only for the 145 | # current rsession and is destroyed at the session's close. 146 | # This does not work for the multiple rsessions being 147 | # used with the shell pipe. So try and determine as best I can 148 | # where the best temporary directory is. 149 | tmpdir <- c(Sys.getenv(c('TMPDIR', 'TMP', 'TEMP')), "/tmp/") 150 | tmpdir <- tmpdir[tmpdir != ''] 151 | tmpdir <- tmpdir[1] 152 | tmp <- tempfile(pattern = "dplyr-cli-", tmpdir = tmpdir, fileext = ".rds") 153 | saveRDS(res, tmp) 154 | if (opt$verbose) message(">>>> output to RDS", tmp) 155 | cat(tmp, "\n") 156 | } 157 | 158 | 159 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = FALSE, 10 | comment = "# " 11 | ) 12 | ``` 13 | 14 | 15 | # dplyr-cli 16 | 17 | 18 | ![](https://img.shields.io/badge/cool-useless-green.svg) 19 | 20 | 21 | `dplyr-cli` uses the `Rscript` executable to 22 | run dplyr commands on CSV files in the terminal. 23 | 24 | `dplyr-cli` makes use of the terminal pipe `|` instead of the magrittr pipe (`%>%`) 25 | to run sequences of commands. 26 | 27 | ``` 28 | cat mtcars.csv | group_by cyl | summarise "mpg = mean(mpg)" | kable 29 | #> | cyl| mpg| 30 | #> |---:|--------:| 31 | #> | 4| 26.66364| 32 | #> | 6| 19.74286| 33 | #> | 8| 15.10000| 34 | ``` 35 | 36 | ## Motivation 37 | 38 | I wanted to be able to do quick hacks on CSV files on the command line using 39 | dplyr syntax, but without actually starting a proper R session. 40 | 41 | 42 | ## What dplyr commands are supported? 43 | 44 | Any command of the form: 45 | 46 | * `dplyr::verb(.data, code)` 47 | * `dplyr::*_join(.data, .rhs)` 48 | 49 | Currently two extra commands are supported which are not part of `dplyr`. 50 | 51 | * `csv` performs no dplyr command, but only outputs the input data as CSV to stdout 52 | * `kable` performs no dplyr command, but only outputs the input data as a 53 | `knitr::kable()` formatted string to stdout 54 | 55 | 56 | ## Limitations 57 | 58 | * Only tested under 'bash' on OSX. YMMV. 59 | * Every command runs in a separate R session. 60 | * When using special shell characters such as `()`, you'll have to quote 61 | your code arguments. Some shells will require more quoting than others. 62 | * "joins" (such as `left_join`) do not currently let you specify the `by` argument, 63 | so there must be columns in common to both dataset 64 | 65 | ## Usage 66 | 67 | ```{sh} 68 | dplyr --help 69 | ``` 70 | 71 | ## History 72 | 73 | 74 | #### v0.1.0 2020-04-20 75 | 76 | * Initial release 77 | 78 | #### v0.1.1 2020-04-21 79 | 80 | * Switch to 'Rscript' for easier install for users 81 | * rename 'dplyr.sh' to just 'dplyr' 82 | 83 | #### v0.1.2 2020-04-21 84 | 85 | * Support for joins e.g. `left_join` 86 | 87 | #### v0.1.3 2020-04-22 88 | 89 | * More robust tmpdir handling 90 | 91 | #### v0.1.4 2022-01-23 92 | 93 | * Fix handling for latest `read_csv()`. Fixes #9 94 | 95 | 96 | ## Contributors 97 | 98 | * [aborusso](https://github.com/aborruso) - documentation 99 | 100 | 101 | ## Installation 102 | 103 | Because this script straddles a great divide between R and the shell, you need 104 | to ensure both are set up correctly for this to work. 105 | 106 | 1. Install R packages 107 | 2. Clone this repo and put `dplyr` in your path 108 | 109 | 110 | #### Install R packages - within R 111 | `dplyr-cli` is run from the shell but at every invocation is starting a new 112 | rsession where the following packages are expected to be installed: 113 | 114 | 115 | ```{r eval=FALSE} 116 | install.packages('readr') # read in CSV data 117 | install.packages('dplyr') # data manipulation 118 | install.packages('docopt') # CLI description language 119 | ``` 120 | 121 |
122 | Click to reveal instructions for installing packages on the command line 123 | 124 | To do it from the cli on a linux-ish system, install `r-base` (`sudo apt -y install r-base`) and then run 125 | 126 | ```bash 127 | sudo su - -c "R -e \"install.packages('readr', repos='http://cran.rstudio.com/')\"" 128 | sudo su - -c "R -e \"install.packages('dplyr', repos='http://cran.rstudio.com/')\"" 129 | sudo su - -c "R -e \"install.packages('docopt', repos='http://cran.rstudio.com/')\"" 130 | ``` 131 | 132 |
133 | 134 | 135 | #### Clone this repo and put `dplyr` in your path 136 | 137 | 138 | You'll then need to download the shell script from this repository and put `dplyr` 139 | somewhere in your path. 140 | 141 | ``` 142 | git clone https://github.com/coolbutuseless/dplyr-cli 143 | cp dplyr-cli/dplyr ./somewhere/in/your/search/path 144 | ``` 145 | 146 | 147 | # Example data 148 | 149 | Put an example CSV file on the filesystem. Note: This CSV file is now included as 150 | `mtcars.csv` as part of this git repository, as is a second CSV file for 151 | demonstrating joins - `cyl.csv` 152 | 153 | ```{r} 154 | write.csv(mtcars, "mtcars.csv", row.names = FALSE) 155 | ``` 156 | 157 | # Example 1 - Basic Usage 158 | 159 | 160 | ```{sh} 161 | # cat contents of input CSV into dplyr-cli. 162 | # Use '-c' to output CSV if this is the final step 163 | cat mtcars.csv | dplyr filter -c "mpg == 21" 164 | ``` 165 | 166 | 167 | ```{sh} 168 | # Put quotes around any commands which contain special characters like <>() 169 | cat mtcars.csv | dplyr filter -c "mpg < 11" 170 | ``` 171 | 172 | 173 | ```{sh} 174 | # Combine dplyr commands with shell 'head' command 175 | dplyr select --file mtcars.csv -c cyl | head -n 6 176 | ``` 177 | 178 | 179 | # Example 2 - Simple piping of commands (with shell pipe, not magrittr pipe) 180 | 181 | ```{sh} 182 | cat mtcars.csv | \ 183 | dplyr mutate "cyl2 = 2 * cyl" | \ 184 | dplyr filter "cyl == 8" | \ 185 | dplyr kable 186 | ``` 187 | 188 | 189 | # Example 3 - set up some aliases for convenience 190 | 191 | 192 | ```{sh} 193 | alias mutate="dplyr mutate" 194 | alias filter="dplyr filter" 195 | alias select="dplyr select" 196 | alias summarise="dplyr summarise" 197 | alias group_by="dplyr group_by" 198 | alias ungroup="dplyr ungroup" 199 | alias count="dplyr count" 200 | alias arrange="dplyr arrange" 201 | alias kable="dplyr kable" 202 | 203 | 204 | cat mtcars.csv | group_by cyl | summarise "mpg = mean(mpg)" | kable 205 | ``` 206 | 207 | 208 | # Example 4 - joins 209 | 210 | Limitations: 211 | 212 | * first argument after a join command must be an existing file (either CSV or RDS) 213 | * You can't yet specify a `by` argument for a join, so there must be a column in 214 | common to join by 215 | 216 | 217 | ```{sh} 218 | cat cyl.csv 219 | ``` 220 | 221 | 222 | ```{sh} 223 | cat mtcars.csv | dplyr inner_join cyl.csv | dplyr kable 224 | ``` 225 | 226 | 227 | 228 | ## Security warning 229 | 230 | `dplyr-cli` uses `eval(parse(text = ...))` on user input. Do not expose this 231 | program to the internet or random users under any circumstances. 232 | 233 | 234 | ## Inspirations 235 | 236 | * [xsv](https://github.com/BurntSushi/xsv) - a fast CSV command line toolkit 237 | written in Rust 238 | * [jq](https://stedolan.github.io/jq/) - a command line JSON processor. 239 | * [miller](http://johnkerl.org/miller/doc/) 240 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # dplyr-cli 5 | 6 | 7 | 8 | ![](https://img.shields.io/badge/cool-useless-green.svg) 9 | 10 | 11 | `dplyr-cli` uses the `Rscript` executable to run dplyr commands on CSV 12 | files in the terminal. 13 | 14 | `dplyr-cli` makes use of the terminal pipe `|` instead of the magrittr 15 | pipe (`%>%`) to run sequences of commands. 16 | 17 | cat mtcars.csv | group_by cyl | summarise "mpg = mean(mpg)" | kable 18 | #> | cyl| mpg| 19 | #> |---:|--------:| 20 | #> | 4| 26.66364| 21 | #> | 6| 19.74286| 22 | #> | 8| 15.10000| 23 | 24 | ## Motivation 25 | 26 | I wanted to be able to do quick hacks on CSV files on the command line 27 | using dplyr syntax, but without actually starting a proper R session. 28 | 29 | ## What dplyr commands are supported? 30 | 31 | Any command of the form: 32 | 33 | - `dplyr::verb(.data, code)` 34 | - `dplyr::*_join(.data, .rhs)` 35 | 36 | Currently two extra commands are supported which are not part of 37 | `dplyr`. 38 | 39 | - `csv` performs no dplyr command, but only outputs the input data as 40 | CSV to stdout 41 | - `kable` performs no dplyr command, but only outputs the input data 42 | as a `knitr::kable()` formatted string to stdout 43 | 44 | ## Limitations 45 | 46 | - Only tested under ‘bash’ on OSX. YMMV. 47 | - Every command runs in a separate R session. 48 | - When using special shell characters such as `()`, you’ll have to 49 | quote your code arguments. Some shells will require more quoting 50 | than others. 51 | - “joins” (such as `left_join`) do not currently let you specify the 52 | `by` argument, so there must be columns in common to both dataset 53 | 54 | ## Usage 55 | 56 | ``` sh 57 | dplyr --help 58 | ``` 59 | 60 | # dplyr-cli 61 | # 62 | # Usage: 63 | # dplyr [--file=fn] [--csv | -c] [--verbose | -v] [...] 64 | # dplyr -h | --help 65 | # 66 | # Options: 67 | # -h --help show this help text 68 | # -f FILE --file=FILE input CSV or RDS filename. If reading from stdin, assumes CSV [default: stdin] 69 | # -c --csv write output to stdout in CSV format (instead of default RDS file) 70 | # -v --verbose be verbose 71 | 72 | ## History 73 | 74 | #### v0.1.0 2020-04-20 75 | 76 | - Initial release 77 | 78 | #### v0.1.1 2020-04-21 79 | 80 | - Switch to ‘Rscript’ for easier install for users 81 | - rename ‘dplyr.sh’ to just ‘dplyr’ 82 | 83 | #### v0.1.2 2020-04-21 84 | 85 | - Support for joins e.g. `left_join` 86 | 87 | #### v0.1.3 2020-04-22 88 | 89 | - More robust tmpdir handling 90 | 91 | #### v0.1.4 2022-01-23 92 | 93 | - Fix handling for latest `read_csv()`. Fixes #9 94 | 95 | ## Contributors 96 | 97 | - [aborusso](https://github.com/aborruso) - documentation 98 | 99 | ## Installation 100 | 101 | Because this script straddles a great divide between R and the shell, 102 | you need to ensure both are set up correctly for this to work. 103 | 104 | 1. Install R packages 105 | 2. Clone this repo and put `dplyr` in your path 106 | 107 | #### Install R packages - within R 108 | 109 | `dplyr-cli` is run from the shell but at every invocation is starting a 110 | new rsession where the following packages are expected to be installed: 111 | 112 | ``` r 113 | install.packages('readr') # read in CSV data 114 | install.packages('dplyr') # data manipulation 115 | install.packages('docopt') # CLI description language 116 | ``` 117 | 118 |
119 | 120 | Click to reveal instructions for installing packages on the command line 121 | 122 | 123 | To do it from the cli on a linux-ish system, install `r-base` 124 | (`sudo apt -y install r-base`) and then run 125 | 126 | ``` bash 127 | sudo su - -c "R -e \"install.packages('readr', repos='http://cran.rstudio.com/')\"" 128 | sudo su - -c "R -e \"install.packages('dplyr', repos='http://cran.rstudio.com/')\"" 129 | sudo su - -c "R -e \"install.packages('docopt', repos='http://cran.rstudio.com/')\"" 130 | ``` 131 | 132 |
133 | 134 | #### Clone this repo and put `dplyr` in your path 135 | 136 | You’ll then need to download the shell script from this repository and 137 | put `dplyr` somewhere in your path. 138 | 139 | git clone https://github.com/coolbutuseless/dplyr-cli 140 | cp dplyr-cli/dplyr ./somewhere/in/your/search/path 141 | 142 | # Example data 143 | 144 | Put an example CSV file on the filesystem. Note: This CSV file is now 145 | included as `mtcars.csv` as part of this git repository, as is a second 146 | CSV file for demonstrating joins - `cyl.csv` 147 | 148 | ``` r 149 | write.csv(mtcars, "mtcars.csv", row.names = FALSE) 150 | ``` 151 | 152 | # Example 1 - Basic Usage 153 | 154 | ``` sh 155 | # cat contents of input CSV into dplyr-cli. 156 | # Use '-c' to output CSV if this is the final step 157 | cat mtcars.csv | dplyr filter -c "mpg == 21" 158 | ``` 159 | 160 | # "mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" 161 | # 21,6,160,110,3.9,2.62,16.46,0,1,4,4 162 | # 21,6,160,110,3.9,2.875,17.02,0,1,4,4 163 | 164 | ``` sh 165 | # Put quotes around any commands which contain special characters like <>() 166 | cat mtcars.csv | dplyr filter -c "mpg < 11" 167 | ``` 168 | 169 | # "mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" 170 | # 10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 171 | # 10.4,8,460,215,3,5.424,17.82,0,0,3,4 172 | 173 | ``` sh 174 | # Combine dplyr commands with shell 'head' command 175 | dplyr select --file mtcars.csv -c cyl | head -n 6 176 | ``` 177 | 178 | # "cyl" 179 | # 6 180 | # 6 181 | # 4 182 | # 6 183 | # 8 184 | 185 | # Example 2 - Simple piping of commands (with shell pipe, not magrittr pipe) 186 | 187 | ``` sh 188 | cat mtcars.csv | \ 189 | dplyr mutate "cyl2 = 2 * cyl" | \ 190 | dplyr filter "cyl == 8" | \ 191 | dplyr kable 192 | ``` 193 | 194 | # | mpg| cyl| disp| hp| drat| wt| qsec| vs| am| gear| carb| cyl2| 195 | # |----:|---:|-----:|---:|----:|-----:|-----:|--:|--:|----:|----:|----:| 196 | # | 18.7| 8| 360.0| 175| 3.15| 3.440| 17.02| 0| 0| 3| 2| 16| 197 | # | 14.3| 8| 360.0| 245| 3.21| 3.570| 15.84| 0| 0| 3| 4| 16| 198 | # | 16.4| 8| 275.8| 180| 3.07| 4.070| 17.40| 0| 0| 3| 3| 16| 199 | # | 17.3| 8| 275.8| 180| 3.07| 3.730| 17.60| 0| 0| 3| 3| 16| 200 | # | 15.2| 8| 275.8| 180| 3.07| 3.780| 18.00| 0| 0| 3| 3| 16| 201 | # | 10.4| 8| 472.0| 205| 2.93| 5.250| 17.98| 0| 0| 3| 4| 16| 202 | # | 10.4| 8| 460.0| 215| 3.00| 5.424| 17.82| 0| 0| 3| 4| 16| 203 | # | 14.7| 8| 440.0| 230| 3.23| 5.345| 17.42| 0| 0| 3| 4| 16| 204 | # | 15.5| 8| 318.0| 150| 2.76| 3.520| 16.87| 0| 0| 3| 2| 16| 205 | # | 15.2| 8| 304.0| 150| 3.15| 3.435| 17.30| 0| 0| 3| 2| 16| 206 | # | 13.3| 8| 350.0| 245| 3.73| 3.840| 15.41| 0| 0| 3| 4| 16| 207 | # | 19.2| 8| 400.0| 175| 3.08| 3.845| 17.05| 0| 0| 3| 2| 16| 208 | # | 15.8| 8| 351.0| 264| 4.22| 3.170| 14.50| 0| 1| 5| 4| 16| 209 | # | 15.0| 8| 301.0| 335| 3.54| 3.570| 14.60| 0| 1| 5| 8| 16| 210 | 211 | # Example 3 - set up some aliases for convenience 212 | 213 | ``` sh 214 | alias mutate="dplyr mutate" 215 | alias filter="dplyr filter" 216 | alias select="dplyr select" 217 | alias summarise="dplyr summarise" 218 | alias group_by="dplyr group_by" 219 | alias ungroup="dplyr ungroup" 220 | alias count="dplyr count" 221 | alias arrange="dplyr arrange" 222 | alias kable="dplyr kable" 223 | 224 | 225 | cat mtcars.csv | group_by cyl | summarise "mpg = mean(mpg)" | kable 226 | ``` 227 | 228 | # | cyl| mpg| 229 | # |---:|--------:| 230 | # | 4| 26.66364| 231 | # | 6| 19.74286| 232 | # | 8| 15.10000| 233 | 234 | # Example 4 - joins 235 | 236 | Limitations: 237 | 238 | - first argument after a join command must be an existing file (either 239 | CSV or RDS) 240 | - You can’t yet specify a `by` argument for a join, so there must be a 241 | column in common to join by 242 | 243 | ``` sh 244 | cat cyl.csv 245 | ``` 246 | 247 | # cyl,description 248 | # 4,four 249 | # 6,six 250 | 251 | ``` sh 252 | cat mtcars.csv | dplyr inner_join cyl.csv | dplyr kable 253 | ``` 254 | 255 | # | mpg| cyl| disp| hp| drat| wt| qsec| vs| am| gear| carb|description | 256 | # |----:|---:|-----:|---:|----:|-----:|-----:|--:|--:|----:|----:|:-----------| 257 | # | 21.0| 6| 160.0| 110| 3.90| 2.620| 16.46| 0| 1| 4| 4|six | 258 | # | 21.0| 6| 160.0| 110| 3.90| 2.875| 17.02| 0| 1| 4| 4|six | 259 | # | 22.8| 4| 108.0| 93| 3.85| 2.320| 18.61| 1| 1| 4| 1|four | 260 | # | 21.4| 6| 258.0| 110| 3.08| 3.215| 19.44| 1| 0| 3| 1|six | 261 | # | 18.1| 6| 225.0| 105| 2.76| 3.460| 20.22| 1| 0| 3| 1|six | 262 | # | 24.4| 4| 146.7| 62| 3.69| 3.190| 20.00| 1| 0| 4| 2|four | 263 | # | 22.8| 4| 140.8| 95| 3.92| 3.150| 22.90| 1| 0| 4| 2|four | 264 | # | 19.2| 6| 167.6| 123| 3.92| 3.440| 18.30| 1| 0| 4| 4|six | 265 | # | 17.8| 6| 167.6| 123| 3.92| 3.440| 18.90| 1| 0| 4| 4|six | 266 | # | 32.4| 4| 78.7| 66| 4.08| 2.200| 19.47| 1| 1| 4| 1|four | 267 | # | 30.4| 4| 75.7| 52| 4.93| 1.615| 18.52| 1| 1| 4| 2|four | 268 | # | 33.9| 4| 71.1| 65| 4.22| 1.835| 19.90| 1| 1| 4| 1|four | 269 | # | 21.5| 4| 120.1| 97| 3.70| 2.465| 20.01| 1| 0| 3| 1|four | 270 | # | 27.3| 4| 79.0| 66| 4.08| 1.935| 18.90| 1| 1| 4| 1|four | 271 | # | 26.0| 4| 120.3| 91| 4.43| 2.140| 16.70| 0| 1| 5| 2|four | 272 | # | 30.4| 4| 95.1| 113| 3.77| 1.513| 16.90| 1| 1| 5| 2|four | 273 | # | 19.7| 6| 145.0| 175| 3.62| 2.770| 15.50| 0| 1| 5| 6|six | 274 | # | 21.4| 4| 121.0| 109| 4.11| 2.780| 18.60| 1| 1| 4| 2|four | 275 | 276 | ## Security warning 277 | 278 | `dplyr-cli` uses `eval(parse(text = ...))` on user input. Do not expose 279 | this program to the internet or random users under any circumstances. 280 | 281 | ## Inspirations 282 | 283 | - [xsv](https://github.com/BurntSushi/xsv) - a fast CSV command line 284 | toolkit written in Rust 285 | - [jq](https://stedolan.github.io/jq/) - a command line JSON 286 | processor. 287 | - [miller](http://johnkerl.org/miller/doc/) 288 | --------------------------------------------------------------------------------