├── Inst └── skeleton │ ├── data │ └── .gitkeep │ └── assets │ ├── css │ ├── .gitkeep │ └── ribbons.css │ ├── img │ └── .gitkeep │ ├── js │ └── .gitkeep │ ├── layouts │ └── .gitkeep │ ├── features.Rmd │ ├── featuresWithTarget.Rmd │ └── body.Rmd ├── .Rproj.user └── 4314C178 │ ├── cpp-definition-cache │ ├── sdb │ └── prop │ │ ├── 7588389C │ │ ├── A62AC356 │ │ ├── 4FEEED25 │ │ ├── 6DB52A19 │ │ ├── C4167D17 │ │ └── INDEX │ ├── saved_source_markers │ └── pcs │ ├── source-pane.pper │ ├── workbench-pane.pper │ ├── files-pane.pper │ ├── packages-pane.pper │ └── windowlayoutstate.pper ├── .Rbuildignore ├── .gitignore ├── new.png ├── NAMESPACE ├── man ├── hello.Rd ├── knitrNa.Rd ├── knitrUniq.Rd ├── knitrFeature.Rd ├── featureReport.Rd ├── uniqueReport.Rd ├── naReport.Rd └── preReport.Rd ├── R ├── test.R ├── hello.R ├── knitrFeature.R ├── knitrUniq.R ├── knitrNa.R ├── utils.R ├── preReport.R ├── uniqueReport.R ├── naReport.R └── featureReport.R ├── readme.md ├── preReport.Rproj ├── DESCRIPTION └── example └── preReport.Rmd /Inst/skeleton/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Inst/skeleton/assets/css/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Inst/skeleton/assets/img/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Inst/skeleton/assets/js/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Inst/skeleton/assets/layouts/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.Rproj.user/4314C178/cpp-definition-cache: -------------------------------------------------------------------------------- 1 | [ 2 | ] -------------------------------------------------------------------------------- /.Rproj.user/4314C178/sdb/prop/7588389C: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/4314C178/sdb/prop/A62AC356: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /.Rproj.user/4314C178/saved_source_markers: -------------------------------------------------------------------------------- 1 | {"active_set":"","sets":[]} -------------------------------------------------------------------------------- /.Rproj.user/4314C178/pcs/source-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "activeTab" : 4 3 | } -------------------------------------------------------------------------------- /new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lchiffon/preReport/master/new.png -------------------------------------------------------------------------------- /.Rproj.user/4314C178/sdb/prop/4FEEED25: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled2" 3 | } -------------------------------------------------------------------------------- /.Rproj.user/4314C178/sdb/prop/6DB52A19: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled1" 3 | } -------------------------------------------------------------------------------- /.Rproj.user/4314C178/sdb/prop/C4167D17: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled1" 3 | } -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | exportPattern("^[[:alpha:]]+") 2 | export(CountLines) 3 | import(htmltools) 4 | -------------------------------------------------------------------------------- /.Rproj.user/4314C178/pcs/workbench-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "TabSet1" : 2, 3 | "TabSet2" : 0 4 | } -------------------------------------------------------------------------------- /man/hello.Rd: -------------------------------------------------------------------------------- 1 | \name{hello} 2 | \alias{hello} 3 | \title{Hello, World!} 4 | \usage{ 5 | hello() 6 | } 7 | \description{ 8 | Prints 'Hello, world!'. 9 | } 10 | \examples{ 11 | hello() 12 | } 13 | -------------------------------------------------------------------------------- /Inst/skeleton/assets/features.Rmd: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | 4 | ### Report of Variable: %s 5 | 6 | ```{r results="asis"} 7 | feaReport = featureReport(data[,%s], '%s') 8 | knitrFeature(feaReport) 9 | ``` 10 | 11 | -------------------------------------------------------------------------------- /.Rproj.user/4314C178/pcs/files-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "path" : "E:/git/preReport", 3 | "sortOrder" : [ 4 | { 5 | "ascending" : true, 6 | "columnIndex" : 2 7 | } 8 | ] 9 | } -------------------------------------------------------------------------------- /Inst/skeleton/assets/featuresWithTarget.Rmd: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | 4 | ### Report of Variable: %s 5 | 6 | ```{r results="asis"} 7 | feaReport = featureReport(data[, %s], '%s', data[, %s]) 8 | knitrFeature(feaReport) 9 | ``` 10 | 11 | -------------------------------------------------------------------------------- /.Rproj.user/4314C178/pcs/packages-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "installOptions" : { 3 | "installDependencies" : true, 4 | "installFromRepository" : false, 5 | "libraryPath" : "E:/Program Files/R/R-3.2.2/library" 6 | } 7 | } -------------------------------------------------------------------------------- /R/test.R: -------------------------------------------------------------------------------- 1 | testData = data.frame( 2 | V1 = sample(c(NA,1), 100, replace=T, c(0.1,0.9)), 3 | V2 = sample(c(NA,1), 100, replace=T, c(0.2,0.8)), 4 | V3 = sample(c(NA,1), 100, replace=T, c(0.3,0.7)), 5 | V4 = sample(c(NA,1), 100, replace=T, c(0.5,0.5)) 6 | ) 7 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ``` 4 | devtools::install_github("lchiffon/preReport") 5 | library(preReport) 6 | preReport(iris) 7 | ``` 8 | 9 | ## TODO: 10 | 11 | ### Version 0.1 12 | - Raw report 13 | 14 | ### Version 0.2 15 | - Raw report with CSS 16 | - HTML report 17 | 18 | ### Version 0.3 19 | - HTML report with Echarts! 20 | - Word report 21 | 22 | ### Vertion 0.4 23 | - Other features 24 | -------------------------------------------------------------------------------- /.Rproj.user/4314C178/pcs/windowlayoutstate.pper: -------------------------------------------------------------------------------- 1 | { 2 | "left" : { 3 | "panelheight" : 967, 4 | "splitterpos" : 402, 5 | "topwindowstate" : "NORMAL", 6 | "windowheight" : 1005 7 | }, 8 | "right" : { 9 | "panelheight" : 967, 10 | "splitterpos" : 604, 11 | "topwindowstate" : "NORMAL", 12 | "windowheight" : 1005 13 | } 14 | } -------------------------------------------------------------------------------- /preReport.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | StripTrailingWhitespace: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | -------------------------------------------------------------------------------- /man/knitrNa.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/knitrNa.R 3 | \name{knitrNa} 4 | \alias{knitrNa} 5 | \title{Function for Knitring NA Report Function} 6 | \usage{ 7 | knitrNa(naR) 8 | } 9 | \arguments{ 10 | \item{naR}{naReport created by naReport function.} 11 | } 12 | \description{ 13 | Function for Knitring NA Report Function 14 | } 15 | \examples{ 16 | report = naReport(testData) 17 | knitrNa(report) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /man/knitrUniq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/knitrUniq.R 3 | \name{knitrUniq} 4 | \alias{knitrUniq} 5 | \title{Function for Knitring Unique Report Function} 6 | \usage{ 7 | knitrUniq(uniqR) 8 | } 9 | \arguments{ 10 | \item{uniqR}{uniqReport created by uniqueReport function.} 11 | } 12 | \description{ 13 | Function for Knitring Unique Report Function 14 | } 15 | \examples{ 16 | report = uniqueReport(testData) 17 | knitrUniq(report) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /R/hello.R: -------------------------------------------------------------------------------- 1 | # Hello, world! 2 | # 3 | # This is an example function named 'hello' 4 | # which prints 'Hello, world!'. 5 | # 6 | # You can learn more about package authoring with RStudio at: 7 | # 8 | # http://r-pkgs.had.co.nz/ 9 | # 10 | # Some useful keyboard shortcuts for package authoring: 11 | # 12 | # Build and Reload Package: 'Ctrl + Shift + B' 13 | # Check Package: 'Ctrl + Shift + E' 14 | # Test Package: 'Ctrl + Shift + T' 15 | 16 | hello <- function() { 17 | print("Hello, world!") 18 | } 19 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: preReport 2 | Type: Package 3 | Title: Create a Data Report before Modeling 4 | Version: 0.1 5 | Date: 2015-12-04 6 | Author: "Dawei Lang [aut, cre]" 7 | Description: This package will create a html report to show the quality of data 8 | before modeling. Including NA, class, distribution,... 9 | License: MIT + file LICENSE 10 | LazyData: TRUE 11 | Depends:R (>= 3.1.0) 12 | Imports: 13 | knitr (>= 1.11), 14 | ggplot2 (>= 1.0.1), 15 | rmarkdown (>= 0.7), 16 | htmltools (>= 0.2) 17 | Suggests: 18 | -------------------------------------------------------------------------------- /man/knitrFeature.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/knitrFeature.R 3 | \name{knitrFeature} 4 | \alias{knitrFeature} 5 | \title{Function for Knitring Feature Report Function} 6 | \usage{ 7 | knitrFeature(feaReport) 8 | } 9 | \arguments{ 10 | \item{feaReport}{featureReport created by featureReport function.} 11 | } 12 | \description{ 13 | Function for Knitring Feature Report Function 14 | } 15 | \examples{ 16 | feaReport = featureReport(iris[,1]) 17 | knitrFeature(feaReport) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /man/featureReport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/featureReport.R 3 | \name{featureReport} 4 | \alias{featureReport} 5 | \title{Create a Features Report of a Feature in the Data.frame} 6 | \usage{ 7 | featureReport(vector, vecName, ...) 8 | } 9 | \arguments{ 10 | \item{vector}{A vector.} 11 | 12 | \item{vecName}{The Name of the vector.} 13 | } 14 | \description{ 15 | featureReport will create a list of information of the feature 16 | } 17 | \examples{ 18 | featureReport(iris[,1]) 19 | } 20 | 21 | -------------------------------------------------------------------------------- /R/knitrFeature.R: -------------------------------------------------------------------------------- 1 | knitrFeature = function(feaReport){ 2 | if(feaReport$class == "numeric"){ 3 | print(p(feaReport$Info)) 4 | print(feaReport$plot) 5 | opts_current$set(results = "asis") 6 | 7 | print(knitr::kable(as.data.frame(feaReport$table), row.names = T)) 8 | 9 | print(knitr::kable(as.data.frame(feaReport$summary), row.names = T)) 10 | } 11 | 12 | if(feaReport$class == "character"){ 13 | print(feaReport$plot) 14 | 15 | print(p(feaReport$tableInfo)) 16 | 17 | print(knitr::kable(as.data.frame(feaReport$table), row.names = T)) 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /man/uniqueReport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/uniqueReport.R 3 | \name{uniqueReport} 4 | \alias{uniqueReport} 5 | \title{Create a uniqueReport of a data.frame} 6 | \usage{ 7 | uniqueReport (inputData,range = c(0, 0.4)) 8 | } 9 | \arguments{ 10 | \item{inputData}{The data frame to create report before modeling.} 11 | 12 | \item{range}{A vector of length 2 for the range of NA proportion. 13 | Variables with NA proportion less than range[1] will do nothing, 14 | Variables with NA proportion more than range[2] will be suggested to delete. 15 | Others will be suggested to remove the observations.} 16 | } 17 | \description{ 18 | uniqueReport will create a unique Report from a data.frame. 19 | } 20 | \examples{ 21 | uniqueReport(testData) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /man/naReport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/naReport.R 3 | \name{naReport} 4 | \alias{naReport} 5 | \title{Create a naReport of a data.frame} 6 | \usage{ 7 | naReport(inputData,range = c(0, 0.4)) 8 | } 9 | \arguments{ 10 | \item{inputData}{The data frame to create report before modeling.} 11 | 12 | \item{range}{A vector of length 2 for the range of NA proportion. 13 | Variables with NA proportion less than range[1] will do nothing, 14 | Variables with NA proportion more than range[2] will be suggested to delete. 15 | Others will be suggested to remove the observations.} 16 | } 17 | \description{ 18 | naReport will detect the NAs i around the variables in the 19 | dataframe and show the codes users may need for omit. 20 | } 21 | \examples{ 22 | naReport(testData) 23 | } 24 | 25 | -------------------------------------------------------------------------------- /R/knitrUniq.R: -------------------------------------------------------------------------------- 1 | knitrUniq = function(uniqR){ 2 | if(is.null(uniqR$unique2)){ 3 | div1 = tagList(p(strong(uniqR$unique1[[1]])) 4 | ) 5 | }else{ 6 | div1 = tagList(p(uniqR$unique2[[1]]), 7 | p(uniqR$unique2[[2]]), 8 | pre(class = 'r', code(class = 'r', uniqR$unique2[[3]])) 9 | ) 10 | } 11 | 12 | if(is.null(uniqR$unique4)){ 13 | div2 = tagList(p(strong(uniqR$unique3[[1]])) 14 | ) 15 | }else{ 16 | div2 = tagList(p(uniqR$unique4[[1]]), 17 | p(uniqR$unique4[[2]]), 18 | pre(class = 'r', code(class = 'r', uniqR$unique4[[3]]), 19 | code(class = 'r', uniqR$unique4[[4]])) 20 | ) 21 | } 22 | doc <- tagList(div1, div2) 23 | doc 24 | } 25 | -------------------------------------------------------------------------------- /man/preReport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/preReport.R 3 | \name{preReport} 4 | \alias{preReport} 5 | \title{Create a preReport of a data.frame} 6 | \usage{ 7 | preReport(iris) 8 | 9 | preReport(inputData, 10 | path, 11 | scaffold = system.file("skeleton", package = "preReport"), 12 | open_rmd = T, 13 | useBytes = T, ...) 14 | } 15 | \arguments{ 16 | \item{inputData}{The data frame to create report before modeling.} 17 | 18 | \item{path}{A vector of length 2 for the range of NA proportion.} 19 | 20 | \item{scaffold}{Path for the file used in the html file.} 21 | 22 | \item{open_rmd}{Whether open the Rmd file or not.} 23 | 24 | \item{useBytes}{If useByte when writing the Rmd file.} 25 | 26 | \item{...}{Other parameters put into writeLines function.} 27 | } 28 | \description{ 29 | preReport will create a Rmd Report from a data.frame. 30 | } 31 | \examples{ 32 | preReport(iris) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /Inst/skeleton/assets/body.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Data Report of %s" 3 | date: "%s" 4 | output: html_document 5 | --- 6 | 7 | ```{r echo=F} 8 | library(knitr) 9 | opts_chunk$set(echo=F, message = F, warning = F, fig.pos = "center") 10 | ``` 11 | ## Overall 12 | 13 | ```{r } 14 | library(preReport) 15 | load("data/data.Rdata") 16 | ``` 17 | 18 | The data including `r dim(data)[1]` rows and `r dim(data)[2]` columns. 19 | 20 | 21 | ```{r} 22 | # number = 1:dim(data)[2] 23 | features = names(data) 24 | class = sapply(data,class) 25 | NAs = sapply(data,function(x) sum(is.na(x))) 26 | 27 | outData = data.frame(features, 28 | class, 29 | NAs) 30 | row.names(outData) = as.character(1:dim(data)[2]) 31 | ## TODO: Consider Change Names here 32 | ### 33 | ### 34 | 35 | 36 | knitr::kable(outData, row.names=T) 37 | ``` 38 | 39 | 40 | ### NAs 41 | 42 | ```{r} 43 | naR = naReport(data) 44 | knitrNa(naR) 45 | ``` 46 | 47 | ### Unique observations 48 | 49 | 50 | ```{r } 51 | uniqR = uniqueReport(data) 52 | knitrUniq(uniqR) 53 | ``` 54 | 55 | 56 | ## Variable Report 57 | -------------------------------------------------------------------------------- /example/preReport.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Data Report of input" 3 | date: "sys.date()" 4 | output: html_document 5 | --- 6 | 7 | ```{r echo=F} 8 | library(knitr) 9 | opts_chunk$set(echo=F, message = F, warning = F, fig.pos = "center") 10 | ``` 11 | ## Overall 12 | 13 | ```{r } 14 | # load("data/data.Rdata") 15 | library(preReport) 16 | data = iris 17 | ``` 18 | 19 | The data including `r dim(data)[1]` rows and `r dim(data)[2]` columns. 20 | 21 | 22 | ```{r} 23 | # number = 1:dim(data)[2] 24 | features = names(data) 25 | class = sapply(data,class) 26 | NAs = sapply(data,function(x) sum(is.na(x))) 27 | 28 | outData = data.frame(features, 29 | class, 30 | NAs) 31 | row.names(outData) = as.character(1:dim(data)[2]) 32 | ## TODO: Consider Change Names here 33 | ### 34 | ### 35 | 36 | 37 | knitr::kable(outData, row.names=T) 38 | ``` 39 | 40 | 41 | ### NAs 42 | 43 | ```{r} 44 | naR = naReport(data) 45 | knitrNa(naR) 46 | ``` 47 | 48 | ### Unique observations 49 | 50 | 51 | ```{r } 52 | uniqR = uniqueReport(data) 53 | knitrUniq(uniqR) 54 | ``` 55 | 56 | 57 | ## Variable Report 58 | 59 | --- 60 | 61 | ### Report of Variable: %s 62 | 63 | ```{r results="asis"} 64 | 65 | feaReport = featureReport(data[,1], '%s') 66 | knitrFeature(feaReport) 67 | 68 | ``` 69 | 70 | 71 | 72 | 73 | --- 74 | 75 | ### Report of Variable: %s 76 | ```{r results="asis"} 77 | 78 | feaReport = featureReport(data[,5], '%s') 79 | knitrFeature(feaReport) 80 | 81 | ``` 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /R/knitrNa.R: -------------------------------------------------------------------------------- 1 | knitrNa = function(naR){ 2 | 3 | if(is.null(naR$noNA)){ 4 | div1 = tagList(p(" ")) 5 | }else{ 6 | div1 = tagList( p(strong(naR$noNA)), 7 | br() 8 | ) 9 | } 10 | 11 | if(is.null(naR$Delete)){ 12 | div2 = tagList(p(" ")) 13 | }else{ 14 | div2 = tagList( p(naR$Delete[[1]]), 15 | br(), 16 | p(naR$Delete[[2]]), 17 | br(), 18 | p(naR$Delete[[3]]), 19 | br(), 20 | pre(class='r', code(class = 'r', naR$Delete[[4]]), 21 | code(class = 'r', naR$Delete[[5]]), 22 | code(class = 'r', naR$Delete[[6]])) 23 | ) 24 | } 25 | 26 | if(is.null(naR$Replace)){ 27 | div3 = tagList(p(" ")) 28 | }else{ 29 | div3 = tagList( br(), 30 | p(naR$Replace[[1]]), 31 | br(), 32 | p(naR$Replace[[2]]), 33 | br(), 34 | p(naR$Replace[[3]]), 35 | br(), 36 | pre(class='r', code(class = 'r', naR$Replace[[4]])) 37 | ) 38 | } 39 | doc <- tagList(div1, div2, div3) 40 | doc 41 | } 42 | -------------------------------------------------------------------------------- /Inst/skeleton/assets/css/ribbons.css: -------------------------------------------------------------------------------- 1 | /*Github Ribbon Test*/ 2 | /* Source: https://github.com/dciccale/css3-github-ribbon */ 3 | /* Define classes for example, definition, problem etc. */ 4 | /* Choose meaningful colors for background and text */ 5 | 6 | .example { 7 | background-color: #121621; 8 | top: 1.2em; 9 | right: -3.2em; 10 | -webkit-transform: rotate(45deg); 11 | -moz-transform: rotate(45deg); 12 | transform: rotate(45deg); 13 | -webkit-box-shadow: 0 0 0 1px #1d212e inset,0 0 2px 1px #fff inset,0 0 1em #888; 14 | -moz-box-shadow: 0 0 0 1px #1d212e inset,0 0 2px 1px #fff inset,0 0 1em #888; 15 | box-shadow: 0 0 0 1px #1d212e inset,0 0 2px 1px #fff inset,0 0 1em #888; 16 | color: #FF0; 17 | display: block; 18 | padding: .6em 3.5em; 19 | position: absolute; 20 | font: bold .82em sans-serif; 21 | text-align: center; 22 | text-decoration: none; 23 | text-shadow: 1px -1px 8px rgba(0,0,0,0.60); 24 | -webkit-user-select: none; 25 | -moz-user-select: none; 26 | user-select: none; 27 | } 28 | 29 | .definition { 30 | background-color: #a00; 31 | top: 1.2em; 32 | right: -3.2em; 33 | -webkit-transform: rotate(45deg); 34 | -moz-transform: rotate(45deg); 35 | transform: rotate(45deg); 36 | -webkit-box-shadow: 0 0 0 1px #1d212e inset,0 0 2px 1px #fff inset,0 0 1em #888; 37 | -moz-box-shadow: 0 0 0 1px #1d212e inset,0 0 2px 1px #fff inset,0 0 1em #888; 38 | box-shadow: 0 0 0 1px #1d212e inset,0 0 2px 1px #fff inset,0 0 1em #888; 39 | color: #FFF; 40 | display: block; 41 | padding: .6em 3.5em; 42 | position: absolute; 43 | font: bold .82em sans-serif; 44 | text-align: center; 45 | text-decoration: none; 46 | text-shadow: 1px -1px 8px rgba(0,0,0,0.60); 47 | -webkit-user-select: none; 48 | -moz-user-select: none; 49 | user-select: none; 50 | } -------------------------------------------------------------------------------- /.Rproj.user/4314C178/sdb/prop/INDEX: -------------------------------------------------------------------------------- 1 | D%3A%2FCtrip%20task%2F%5B2015.12%5DautoRoom%2Fscrapy%2FautoOpen.R="5CF7A473" 2 | E%3A%2Fgit%2FAuto.Reservoir%2FZeusRcode%2FRscripts%2FunlimitAuto.R="DCEEB6FE" 3 | E%3A%2Fgit%2FAuto.Reservoir%2Ftrain%2FunlimitAuto.R="DD566FC9" 4 | E%3A%2Fgit%2FpreReport%2FDESCRIPTION="7588389C" 5 | E%3A%2Fgit%2FpreReport%2FR%2FfeatureReport.R="57A43C04" 6 | E%3A%2Fgit%2FpreReport%2FR%2Fhello.R="A62AC356" 7 | E%3A%2Fgit%2FpreReport%2FR%2FnaReport.R="C4167D17" 8 | E%3A%2Fgit%2FpreReport%2FR%2FpreReport.R="8F1369B8" 9 | E%3A%2Fgit%2FpreReport%2FR%2FuniqueReport.R="D372BB46" 10 | E%3A%2Fgit%2FpreReport%2FR%2Futils.R="4FEEED25" 11 | E%3A%2Fgit%2FpreReport%2Fexample%2FpreReport.Rmd="4C9A706A" 12 | E%3A%2Fgit%2FpreReport%2Fskeleton%2Fassets%2Fbody.Rmd="C485246D" 13 | E%3A%2Fgit%2FpreReport%2Fskeleton%2Fassets%2Ffeatures.Rmd="35C49C60" 14 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmp06yJ5H%2FCurrent%2Findex.Rmd="3491C169" 15 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmp2V3p16%2FCurrent%2Findex.Rmd="536CD38D" 16 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpMj6A51%2FCurrent%2Findex.Rmd="18D09842" 17 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpSwtrfR%2FCurrent%2Findex.Rmd="2E434AE4" 18 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpW8ohzq%2FCurrent%2Findex.Rmd="4C134612" 19 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpWAJvwX%2FCurrent%2Findex.Rmd="EC8EB368" 20 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpaO1VnE%2FCurrent%2Findex.Rmd="EF7F4EDA" 21 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpeGwSi7%2FCurrent%2Findex.Rmd="1AFE9160" 22 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpgDGz4X%2FCurrent%2Findex.Rmd="A5EF7E5F" 23 | d%3A%2FUsers%2Flangdw%2FAppData%2FLocal%2FTemp%2FRtmpsNPzRd%2FCurrent%2Findex.Rmd="D66F94EB" 24 | d%3A%2FUsers%2Flangdw%2FDesktop%2FpreReport.Rmd="6DB52A19" 25 | ~%2Fz%E3%80%90%E4%BC%A0%E9%80%81%E3%81%AE%E9%97%A8%E3%80%91%2Fcombine.R="36A5B913" 26 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | ## Check the class of inputData 3 | dataFrameCheck = function(inputData){ 4 | 5 | ## Support Data frame, tbl(from dplyr) and data.table 6 | if(any(class(inputData) == "data.frame")){ 7 | inputData = as.data.frame(inputData) 8 | }else{ 9 | stop("inputData is not a data frame!") 10 | } 11 | 12 | if(dim(inputData)[1] < 2){ 13 | warning("Ther is only one row in the data frame! ") 14 | } 15 | 16 | return(inputData) 17 | } 18 | 19 | ## Test 20 | # dataFrameCheck(iris) 21 | 22 | 23 | 24 | ## Check the vector 25 | 26 | vectorCheck = function(vector, 27 | class = "numeric", 28 | lengthVector, 29 | name = "It"){ 30 | 31 | if(!is.vector(vector)){ 32 | stop(sprintf("%s is not a vector!",name)) 33 | } 34 | 35 | if(missing(lengthVector)){ 36 | lengthVector = length(vector) 37 | }else if(length(vector) != lengthVector){ 38 | warning(sprintf("The length of %s is not correct!",name)) 39 | } 40 | 41 | if(class(vector) != class){ 42 | stop(sprintf("%s is not a %s object!",name,class)) 43 | } 44 | 45 | } 46 | 47 | ## Test 48 | # vectorCheck(iris,name = "Range") 49 | # 50 | # vectorCheck(1:10/10,lengthVector = 3,name = "Range") 51 | 52 | # Getname 53 | 54 | returnName = function(a){ 55 | cc = as.character(substitute(a)) 56 | cc 57 | } 58 | # all.equal("iris", returnName(iris)) 59 | 60 | naNum = function(vec){ 61 | sum(is.na(vec)) 62 | } 63 | # naNum(rep(c(NA,1),5)) 64 | 65 | tableProb = function(vec){ 66 | Count = table(vec, useNA="always") 67 | Prob = round(Count / length(vec) * 100) 68 | Prob = paste0(Prob, "%") 69 | output = as.data.frame(rbind(Count, Prob)) 70 | output 71 | } 72 | 73 | CountLines = function(){ 74 | a = paste0("R/",dir("R")) 75 | n = 0 76 | for (file in a){ 77 | n = n + length(readLines(file)) 78 | } 79 | n 80 | } 81 | 82 | 83 | copy_dir <- function(from, to){ 84 | if (!(file.exists(to))){ 85 | dir.create(to, recursive = TRUE) 86 | message('Copying files to ', to, '...') 87 | file.copy(list.files(from, full.names = T), to, recursive = TRUE) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /R/preReport.R: -------------------------------------------------------------------------------- 1 | preReport = function(inputData, path, 2 | scaffold = system.file("skeleton", package = "preReport"), 3 | open_rmd = T, useBytes = T, target = NULL, ...){ 4 | 5 | if(missing(inputData)){ 6 | stop("You should input a data.frame.") 7 | } 8 | ## Check data class, convert to data.frame 9 | datName = as.character(substitute(inputData)) 10 | inputData = dataFrameCheck(inputData) 11 | 12 | if(missing(path)){ 13 | setwd(tempdir()) 14 | path = "Current" 15 | } 16 | 17 | if(!is.null(target)){ 18 | if(!target %in% names(inputData)) 19 | target = NULL 20 | } 21 | 22 | message("Creating report directory at ", path, "...") 23 | copy_dir(scaffold, path) 24 | message("Finished creating report directory...") 25 | message("Switching to report directory...") 26 | setwd(path) 27 | 28 | data = inputData 29 | save(data, file = "data/data.Rdata") 30 | body = readLines("assets/body.Rmd", ...) 31 | body[2] = sprintf(body[2], datName) 32 | body[3] = sprintf(body[3], Sys.Date()) 33 | 34 | if(is.null(target)){ 35 | featureLayout = readLines("assets/features.Rmd", ...) 36 | }else{ 37 | featureLayout = readLines("assets/featuresWithTarget.Rmd", ...) 38 | j = which(names(inputData)==target) 39 | } 40 | 41 | featureIndex = which(grepl("%s", featureLayout)) 42 | featureOutput = c() 43 | 44 | 45 | ## Create dirction 46 | if(is.null(target)){ 47 | for( i in 1:(dim(data)[2])){ 48 | feature = featureLayout 49 | feature[[featureIndex[1]]] = sprintf(feature[[featureIndex[1]]], names(data)[i]) 50 | feature[[featureIndex[2]]] = sprintf(feature[[featureIndex[2]]], i, names(data)[i]) 51 | featureOutput = c(featureOutput, feature) 52 | } 53 | }else{ 54 | for( i in 1:(dim(data)[2])){ 55 | feature = featureLayout 56 | feature[[featureIndex[1]]] = sprintf(feature[[featureIndex[1]]], names(data)[i]) 57 | feature[[featureIndex[2]]] = sprintf(feature[[featureIndex[2]]], i, names(data)[i], j) 58 | featureOutput = c(featureOutput, feature) 59 | } 60 | } 61 | 62 | 63 | out = c(body, featureOutput) 64 | ## Write Files 65 | writeLines(out, "index.Rmd", useBytes = useBytes) 66 | 67 | ## knitrtohtml 68 | rmarkdown::render("index.Rmd","html_document") 69 | # 70 | # 71 | # knitr::knit('index.Rmd', 'index.md') # creates md file 72 | # markdown::markdownToHTML('index.md', 'index2.html', 73 | # options=c('use_xhtml', 'base64_images','fragment_only')) # creates html file 74 | 75 | ## openRmd 76 | if (open_rmd) { 77 | message("Opening slide deck...") 78 | file.edit("index.Rmd") 79 | } 80 | 81 | browseURL("index.html") 82 | 83 | 84 | 85 | } 86 | -------------------------------------------------------------------------------- /R/uniqueReport.R: -------------------------------------------------------------------------------- 1 | ##' Create a uniqueReport of a data.frame 2 | ##' 3 | ##' naReport will detect the NAs i around the variables in the 4 | ##' dataframe and show the codes users may need for omit. 5 | ##' 6 | ##' @usage 7 | ##' uniqueReport (inputData,range = c(0, 0.4)) 8 | ##' 9 | ##' 10 | ##' @param inputData The data frame to create report before modeling. 11 | ##' @param range A vector of length 2 for the range of NA proportion. 12 | ##' Variables with NA proportion less than range[1] will do nothing, 13 | ##' Variables with NA proportion more than range[2] will be suggested to delete. 14 | ##' Others will be suggested to remove the observations. 15 | ##' @examples 16 | ##' naReport(testData) 17 | 18 | 19 | 20 | 21 | uniqueReport = function(inputData, naRange = c(0, 0.4)){ 22 | if(missing(inputData)){ 23 | stop("You should input a data.frame.") 24 | } 25 | ## Check data class, convert to data.frame 26 | datName = as.character(substitute(inputData)) 27 | inputData = dataFrameCheck(inputData) 28 | 29 | n = dim(inputData)[1] 30 | uniqN = dim(unique(inputData))[1] 31 | 32 | output = list() 33 | inputData_new = naProcess(inputData, naRange) 34 | 35 | n1 = dim(inputData_new)[1] 36 | uniqN1 = dim(unique(inputData_new))[1] 37 | 38 | if(n == uniqN){ 39 | # unique[[1]] 40 | # "There's no repeat data in original data frame." 41 | output$unique1 = list("There's no repeat data in original data frame.") 42 | }else{ 43 | output$unique2 = list( 44 | # unique[[2]] 45 | # "There are %s%%(%s/%s) repeat observations in the original data frame" 46 | sprintf("There are %s%%(%s/%s) repeat observations in the original data frame", 47 | round((1 - uniqN / n) * 100), 48 | n - uniqN, 49 | n), 50 | # unique[[3]] 51 | # "Use the following codes to replace them:" 52 | "Use the following codes to replace them:", 53 | sprintf("%s_uniq = unique(%s)", datName, datName) 54 | ) 55 | } 56 | 57 | if(n1 == uniqN1){ 58 | # unique[[4]] 59 | # "There's no repeat data in the data frame after removing NA variables." 60 | output$unique3 = list("There's no repeat data in the data frame after removing NA variables.") 61 | }else{ 62 | output$unique4 = list( 63 | # unique[[5]] 64 | # "There are %s%%(%s/%s) repeat observations in the data frame after removing NA variables." 65 | sprintf("There are %s%%(%s/%s) repeat observations in the data frame after removing NA variables.", 66 | round((1 - uniqN1 / n1) * 100), 67 | n1 - uniqN1, 68 | n1), 69 | # unique[[6]] 70 | # "Use the following codes to replace them:" 71 | "Use the following codes to replace them:", 72 | sprintf("%s_new = naProcess(%s)", datName, 73 | datName), 74 | sprintf("%s_new = unique(%s_new)", 75 | datName, 76 | datName) 77 | ) 78 | } 79 | 80 | return(output) 81 | 82 | } 83 | -------------------------------------------------------------------------------- /R/naReport.R: -------------------------------------------------------------------------------- 1 | ##' Create a naReport of a data.frame 2 | ##' 3 | ##' naReport will detect the NAs i around the variables in the 4 | ##' dataframe and show the codes users may need for omit. 5 | ##' 6 | ##' @usage 7 | ##' naReport(inputData,range = c(0, 0.4)) 8 | ##' 9 | ##' 10 | ##' @param inputData The data frame to create report before modeling. 11 | ##' @param range A vector of length 2 for the range of NA proportion. 12 | ##' Variables with NA proportion less than range[1] will do nothing, 13 | ##' Variables with NA proportion more than range[2] will be suggested to delete. 14 | ##' Others will be suggested to remove the observations. 15 | ##' @examples 16 | ##' naReport(testData) 17 | 18 | 19 | 20 | naReport = function(inputData, 21 | range = c(0, 0.4) 22 | ){ 23 | if(missing(inputData)){ 24 | stop("You should input a data.frame.") 25 | } 26 | ## Check data class, convert to data.frame 27 | datName = as.character(substitute(inputData)) 28 | inputData = dataFrameCheck(inputData) 29 | 30 | ## Check range 31 | vectorCheck(range,"numeric",2,"Range") 32 | 33 | if(max(range) > 1 | min(range) < 0){ 34 | warning("Wrong range input!") 35 | range = c(0, 0.4) 36 | } 37 | 38 | nas = sapply(inputData, function(x) sum(is.na(x))) 39 | naPor = nas / dim(inputData)[1] 40 | output = list() 41 | 42 | 43 | if(all(naPor <= range[1])){ 44 | # na[[1]] 45 | # "There's no NA in any variables!" 46 | output$noNA = ("There's no NA in any variables!") 47 | } 48 | 49 | if(any(naPor >= range[2])){ 50 | output$Delete = list( 51 | # na[[2]] 52 | # "There are more than %s%% NAs in these variables:" 53 | sprintf("There are more than %s%% NAs in these variables:", 54 | round(range[2]*100)), 55 | paste0(names(inputData)[which(naPor >= range[2])], 56 | collapse=", "), 57 | # na[[3]] 58 | # "Use the following codes to delete them:" 59 | "Use the following codes to delete them:", 60 | sprintf("nas = sapply(%s, function(x) sum(is.na(x)))", datName), 61 | sprintf("naPor = nas / dim(%s)[1]", datName), 62 | sprintf("%s_new = %s[, - which(naPor >= %s)]", 63 | datName, 64 | datName, 65 | range[2])) 66 | } 67 | 68 | if(any(naPor > range[1] & naPor range[1] & naPor %s & naPor<%s)] = -1", 84 | # datName, 85 | # datName, 86 | # datName, 87 | # datName, 88 | # range[1], 89 | # range[2]) 90 | ) 91 | } 92 | 93 | return(output) 94 | 95 | } 96 | 97 | 98 | naProcess = function(inputData, 99 | range = c(0,0.4)){ 100 | 101 | 102 | 103 | nas = sapply(inputData, function(x) sum(is.na(x))) 104 | naPor = nas / dim(inputData)[1] 105 | 106 | 107 | 108 | if(all(naPor <= range[1])){ 109 | return(inputData) 110 | } 111 | 112 | if(any(naPor >= range[2])){ 113 | inputData_new = inputData[,-which(naPor >= range[2])] 114 | }else{ 115 | inputData_new = inputData 116 | } 117 | 118 | if(any(naPor > range[1] & naPor