├── NAMESPACE ├── .Rbuildignore ├── LICENSE ├── .gitignore ├── tests ├── testthat.R └── testthat │ └── test_dataframe_stats_generation.R ├── R ├── Rbutchland.R ├── df_boxstat.R └── df_columnstat.R ├── DESCRIPTION ├── Rbutchland.Rproj ├── README.Rmd └── README.md /NAMESPACE: -------------------------------------------------------------------------------- 1 | exportPattern("^[[:alpha:]]+") 2 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2015 2 | COPYRIGHT HOLDER: Butch Landingin 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | *.html 5 | man 6 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(Rbutchland) 3 | 4 | test_check("Rbutchland") 5 | -------------------------------------------------------------------------------- /R/Rbutchland.R: -------------------------------------------------------------------------------- 1 | #' @title My Package of Useful and (hopefully) Reusable Routines 2 | #' @name Rbutchland 3 | #' @description A bunch of useful and (hopefully) reusable routines 4 | #' to make every data scientist's life a little bit easier 5 | #' @docType package 6 | #' @aliases Rbutchland 7 | NULL 8 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: Rbutchland 2 | Type: Package 3 | Title: My Personal Package of Useful and (hopefully) Reuseable Functions 4 | Version: 0.1.1 5 | Date: 2015-08-25 6 | Author: Butch Landingin 7 | Maintainer: Butch Landingin 8 | Description: This package collects a bunch of functions that I find useful 9 | License: MIT + file LICENSE 10 | Suggests: testthat 11 | -------------------------------------------------------------------------------- /R/df_boxstat.R: -------------------------------------------------------------------------------- 1 | #' Generate box stats (nrows,ncols) for a dataframe. 2 | #' 3 | #' @param df A dataframe 4 | #' @return The dataframe containing stats(nrows,ncols) 5 | #' @examples 6 | #' df_boxstat(data.frame(x=c(1,2,3),y=c("a","b","c"))) 7 | #' df_boxstat(mtcars) 8 | df_boxstat <- function(df) { 9 | rslt <- rbind(data.frame(dimension="nrows",value=nrow(df)), 10 | data.frame(dimension="ncols",value=ncol(df))) 11 | rslt$dimension <- as.character(rslt$dimension) 12 | rslt 13 | } 14 | -------------------------------------------------------------------------------- /Rbutchland.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /R/df_columnstat.R: -------------------------------------------------------------------------------- 1 | #' Generate column stats (column name, type(class), count(NA)) 2 | #' for a dataframe. 3 | #' 4 | #' @param df A dataframe 5 | #' @return The dataframe containing column stats 6 | #' @examples 7 | #' df_columnstat(data.frame(x=c(1,2,3),y=c("a","b",NA))) 8 | #' df_columnstat(mtcars) 9 | df_columnstat <- function(df) { 10 | columns <- colnames(df) 11 | types <- rep("",length(columns)) 12 | nas <- rep(0,length(columns)) 13 | for(i in 1:length(columns)) { 14 | types[i] = class(df[[columns[i]]])[1] # take 1st class 15 | nas[i] = sum(is.na(df[[columns[i]]])) 16 | } 17 | rslt <- data.frame(column=columns,type=types,na=nas) 18 | rslt$column = as.character(rslt$column) 19 | rslt$type = as.character(rslt$type) 20 | rslt 21 | } 22 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Rbutchland" 3 | author: "Butch Landingin" 4 | date: "Tuesday, 25 August, 2015" 5 | output: html_document 6 | --- 7 | 8 | Rbutchland: A suite of useful and (hopefully) reusable routines 9 | ===================== 10 | 11 | This R package contains a suite of useful routines that I hope 12 | to turn into a reusable package. Right now, it doesn't contain 13 | much. My goal is to build a suite of useful routines 14 | for data exploration and manipulation. 15 | 16 | Installation and Documentation 17 | ------------ 18 | 19 | The Rbutchland package is available on github and can be installed using 20 | [devtools](https://github.com/hadley/devtools): 21 | 22 | ``` 23 | library(devtools) 24 | install_github("butchland/Rbutchland") 25 | ``` 26 | ```{r results='hide',echo=FALSE} 27 | library(Rbutchland) 28 | ``` 29 | 30 | 31 | Dataframe stats 32 | ----------------- 33 | 34 | This package provides some simple methods for extracting information 35 | about a dataframe 36 | 37 | * `df_boxstat`: constructs a data frame that summarizes the dataframes's 38 | size (nrows and ncols) 39 | 40 | ```{r} 41 | df_boxstat(mtcars) 42 | ``` 43 | * `df_columnstat`: constructs a data frame that summarizes the dataframes's 44 | columns (column names, type and count(NAs)) 45 | ```{r} 46 | df <- data.frame(x=c(1,2,3),y=c("a","b",NA),z=c("a","b",NA)) 47 | df$z = as.character(df$z) 48 | df_columnstat(df) 49 | ``` 50 | 51 | -------------------------------------------------------------------------------- /tests/testthat/test_dataframe_stats_generation.R: -------------------------------------------------------------------------------- 1 | library(Rbutchland) 2 | 3 | context("dataframe stats") 4 | 5 | test_that("summarize dataframe with df_boxstat", { 6 | df <- data.frame(x=c(1,2,3),y=c("a","b","c")) 7 | df_box <- df_boxstat(df) 8 | expect_equal(df_box$dimension, c("nrows","ncols")) 9 | expect_equal(df_box$value, c(3,2)) 10 | }) 11 | 12 | test_that("summarize dataframe with df_columnstat", { 13 | df <- data.frame(x=c(1,2,3),y=c("a","b",NA),z=c("a","b",NA)) 14 | df$z <- as.character(df$z) 15 | df_col <- df_columnstat(df) 16 | expect_equal(df_col$column, c("x","y","z")) 17 | expect_equal(df_col$type, c("numeric","factor","character")) 18 | expect_equal(df_col$na, c(0,1,1)) 19 | }) 20 | 21 | test_that("df_columnstat should work with ordered factors with no warnings", { 22 | df <- data.frame(x=c(1,2,3),y=c("a","b",NA),z=c("a","b",NA)) 23 | df$z <- factor(df$z, ordered=T) 24 | errhandler <- function(c) fail("should'nt throw warning") 25 | df_col = NULL 26 | tryCatch({df_col = df_columnstat(df)}, warning=errhandler) 27 | expect_false(is.null(df_col)) 28 | }) 29 | test_that("df_columnstat should work with ordered factors with no warnings", { 30 | df <- data.frame(x=c(1,2,3),y=c("a","b",NA),z=c("a","b",NA)) 31 | df$z <- factor(df$z, ordered=T) 32 | df_col = df_columnstat(df) 33 | expect_false(is.null(df_col)) 34 | expect_equal(df_col$column, c("x","y","z")) 35 | expect_equal(df_col$type, c("numeric","factor","ordered")) 36 | expect_equal(df_col$na, c(0,1,1)) 37 | 38 | }) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Rbutchland" 3 | author: "Butch Landingin" 4 | date: "Tuesday, 25 August, 2015" 5 | output: html_document 6 | --- 7 | 8 | Rbutchland: A suite of useful and (hopefully) reusable routines 9 | ===================== 10 | 11 | This R package contains a suite of useful routines that I hope 12 | to turn into a reusable package. Right now, it doesn't contain 13 | much. My goal is to build a suite of useful routines 14 | for data exploration and manipulation. 15 | 16 | Installation and Documentation 17 | ------------ 18 | 19 | The Rbutchland package is available on github and can be installed using 20 | [devtools](https://github.com/hadley/devtools): 21 | 22 | ``` 23 | library(devtools) 24 | install_github("butchland/Rbutchland") 25 | ``` 26 | 27 | 28 | 29 | Dataframe stats 30 | ----------------- 31 | 32 | This package provides some simple methods for extracting information 33 | about a dataframe 34 | 35 | * `df_boxstat`: constructs a data frame that summarizes the dataframes's 36 | size (nrows and ncols) 37 | 38 | 39 | ```r 40 | df_boxstat(mtcars) 41 | ``` 42 | 43 | ``` 44 | ## dimension value 45 | ## 1 nrows 32 46 | ## 2 ncols 11 47 | ``` 48 | * `df_columnstat`: constructs a data frame that summarizes the dataframes's 49 | columns (column names, type and count(NAs)) 50 | 51 | ```r 52 | df <- data.frame(x=c(1,2,3),y=c("a","b",NA),z=c("a","b",NA)) 53 | df$z = as.character(df$z) 54 | df_columnstat(df) 55 | ``` 56 | 57 | ``` 58 | ## column type na 59 | ## 1 x numeric 0 60 | ## 2 y factor 1 61 | ## 3 z character 1 62 | ``` 63 | 64 | --------------------------------------------------------------------------------