├── LICENSE ├── .DS_Store ├── data ├── seasonAVG.rda ├── Batting2016.rda ├── Fielding2016.rda └── Pitching2016.rda ├── tests ├── testthat.R └── testthat │ ├── check_urls.R │ └── test-check_urls.R ├── man ├── figures │ └── baseballDBR_hex.png ├── pipe.Rd ├── compound_pipe.Rd ├── urlExists.Rd ├── baseballDBR.Rd ├── Ch.Rd ├── Fld_pct.Rd ├── IP.Rd ├── H_9.Rd ├── HR_9.Rd ├── K_9.Rd ├── BB_9.Rd ├── LOB_pct.Rd ├── WHIP.Rd ├── TBs.Rd ├── CTpct.Rd ├── ISO.Rd ├── OBP.Rd ├── SLG.Rd ├── PA.Rd ├── HRpct.Rd ├── Kpct.Rd ├── BA.Rd ├── Fielding2016.Rd ├── OPS.Rd ├── XBperH.Rd ├── XBHpct.Rd ├── get_bbdb.Rd ├── BABIP.Rd ├── RCbasic.Rd ├── seasonAVG.Rd ├── BBpct.Rd ├── Batting2016.Rd ├── RCtech.Rd ├── RC2002.Rd ├── fip_values.Rd ├── Pitching2016.Rd ├── FIP.Rd ├── wOBA_values.Rd ├── wOBA.Rd ├── wRC.Rd └── wRAA.Rd ├── .Rbuildignore ├── R ├── baseballDBR.R ├── utils.R ├── fielding2016.R ├── seasonAVG.R ├── batting2016.R ├── pitching2016.R ├── fieldingStats.R ├── fip.R ├── get_bbdb.R ├── pitchingStats.R ├── woba_values.R └── battingStats.R ├── .travis.yml ├── baseballDBR.Rproj ├── .gitignore ├── NEWS.md ├── DESCRIPTION ├── NAMESPACE ├── data-raw └── data_prep.R ├── vignettes ├── FIP.Rmd ├── wRAA_wRC.Rmd ├── Database_Tools.Rmd └── wOBA.Rmd ├── README.Rmd └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2017 2 | COPYRIGHT HOLDER: Kris Eberwein 3 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/.DS_Store -------------------------------------------------------------------------------- /data/seasonAVG.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/seasonAVG.rda -------------------------------------------------------------------------------- /data/Batting2016.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/Batting2016.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(baseballDBR) 3 | 4 | test_check("baseballDBR") 5 | -------------------------------------------------------------------------------- /data/Fielding2016.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/Fielding2016.rda -------------------------------------------------------------------------------- /data/Pitching2016.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/Pitching2016.rda -------------------------------------------------------------------------------- /man/figures/baseballDBR_hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/man/figures/baseballDBR_hex.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^data-raw$ 4 | ^\.travis\.yml$ 5 | ^README\.Rmd$ 6 | ^NEWS\.Rmd$ 7 | ^cran-comments\.md$ 8 | ^appveyor\.yml$ 9 | -------------------------------------------------------------------------------- /R/baseballDBR.R: -------------------------------------------------------------------------------- 1 | 2 | #' baseballDBR: A package for working with data from the Baseball Databank/Lahman Database. 3 | #' @name baseballDBR 4 | NULL 5 | # Global variables 6 | PitchingTable=HBP=HR=BB=SO=BB=SO=H=SF=SO=IBB=woba_scale=PitchingTable=NULL 7 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | Pipe operator 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/compound_pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{\%<>\%} 4 | \alias{\%<>\%} 5 | \title{Compound_pipe} 6 | \usage{ 7 | lhs \%<>\% rhs 8 | } 9 | \description{ 10 | Compound_pipe 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/urlExists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{urlExists} 4 | \alias{urlExists} 5 | \title{urlExists} 6 | \usage{ 7 | urlExists(target) 8 | } 9 | \arguments{ 10 | \item{target}{url} 11 | } 12 | \description{ 13 | A utility function to run a tryCatch on a URL. 14 | } 15 | -------------------------------------------------------------------------------- /man/baseballDBR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/baseballDBR.R 3 | \name{baseballDBR} 4 | \alias{baseballDBR} 5 | \title{baseballDBR: A package for working with data from the Baseball Databank/Lahman Database.} 6 | \description{ 7 | baseballDBR: A package for working with data from the Baseball Databank/Lahman Database. 8 | } 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: r 4 | warnings_are_errors: false 5 | sudo: false 6 | 7 | env: 8 | global: 9 | - NOT_CRAN=true 10 | before_install: echo "options(repos = c(CRAN='http://cran.rstudio.com'))" > ~/.Rprofile 11 | 12 | notifications: 13 | email: 14 | on_success: change 15 | on_failure: change -------------------------------------------------------------------------------- /baseballDBR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | inst/doc 5 | README.Rmd 6 | NEWS.Rmd 7 | # Example code in package build process 8 | *-Ex.R 9 | # Output files from R CMD build 10 | /*.tar.gz 11 | # Output files from R CMD check 12 | /*.Rcheck/ 13 | # RStudio files 14 | .Rproj.user/ 15 | # produced vignettes 16 | vignettes/*.html 17 | vignettes/*.pdf 18 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 19 | .httr-oauth 20 | # knitr and R markdown default cache directories 21 | /*_cache/ 22 | /cache/ 23 | # Temporary files created by R markdown 24 | *.utf8.md 25 | *.knit.md 26 | cran-comments.md 27 | .DS_Store 28 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # baseballDBR 0.1.4 2 | 3 | ## Bug fixes 4 | 5 | * Added a warning regarding name changes when user tries to download "Master" table. 6 | 7 | # baseballDBR 0.1.3 8 | 9 | ## Bug Fixes 10 | 11 | * Updated URLs in Description 12 | 13 | # baseballDBR 0.1.2 14 | 15 | ## New Features 16 | 17 | * Added internal data sets `Batting2016`, `Pitching2016`, and `Fielding2016`. 18 | 19 | ## Enhancements 20 | 21 | * Added more testing for functions on package build. 22 | 23 | # baseballDBR 0.1.1 24 | 25 | ## New features 26 | 27 | * Added `urlExists()` function to perform tryCatch on URLs. 28 | 29 | * Added backup URLs for the `get_bbdb()` function. 30 | 31 | * Added `downloadZip` argument to the `get_bbdb()` function. 32 | 33 | * Added vignettes. 34 | 35 | * Added `get_bbdb()` function. 36 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: baseballDBR 2 | Type: Package 3 | Title: Sabermetrics and Advanced Baseball Statistics 4 | Version: 0.1.2.000009 5 | Authors@R: person("Kris", "Eberwein", email = "eberwein@knights.ucf.edu", 6 | role = c("aut", "cre")) 7 | Description: A tool for gathering and analyzing data from the Baseball Databank , which includes player performance statistics from major league baseball in the United States beginning in the year 1871. 8 | Depends: 9 | R (>= 3.3.3) 10 | Imports: 11 | rvest, 12 | xml2, 13 | magrittr, 14 | dplyr 15 | Suggests: 16 | testthat, 17 | rmarkdown, 18 | knitr 19 | License: MIT + file LICENSE 20 | URL: https://github.com/keberwein/baseballDBR 21 | BugReports: https://github.com/keberwein/baseballDBR/issues 22 | LazyData: true 23 | RoxygenNote: 6.1.0 24 | VignetteBuilder: knitr 25 | -------------------------------------------------------------------------------- /man/Ch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fieldingStats.R 3 | \name{Ch} 4 | \alias{Ch} 5 | \title{Fielding: Calculate defensive chances} 6 | \usage{ 7 | Ch(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | The number of chances a player had to make a defensive play. 15 | Required fields from the Fielding table are; "PO", "A", and "E." 16 | } 17 | \examples{ 18 | 19 | data("Fielding2016") 20 | head(Fielding2016) 21 | 22 | Fielding2016$Ch <- Ch(Fielding2016) 23 | 24 | } 25 | \seealso{ 26 | Other Fielding functions: \code{\link{Fld_pct}} 27 | } 28 | \concept{Fielding functions} 29 | \keyword{Ch} 30 | \keyword{Chances} 31 | \keyword{Defensive} 32 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | #' Pipe operator 3 | #' 4 | #' @name %>% 5 | #' @rdname pipe 6 | #' @keywords internal 7 | #' @importFrom magrittr %>% 8 | #' @usage lhs \%>\% rhs 9 | #' @export 10 | NULL 11 | 12 | #' Compound_pipe 13 | #' 14 | #' @name %<>% 15 | #' @rdname compound_pipe 16 | #' @keywords internal 17 | #' @importFrom magrittr %<>% 18 | #' @usage lhs \%<>\% rhs 19 | #' @export 20 | NULL 21 | 22 | 23 | #' @title urlExists 24 | #' @description A utility function to run a tryCatch on a URL. 25 | #' @param target url 26 | #' @export 27 | urlExists <- function(target) { 28 | tryCatch({ 29 | con <- url(target) 30 | a <- capture.output(suppressWarnings(readLines(con))) 31 | close(con) 32 | TRUE; 33 | }, 34 | error = function(err) { 35 | occur <- grep("cannot open the connection", capture.output(err)); 36 | if(length(occur) > 0) FALSE; 37 | } 38 | ) 39 | } 40 | -------------------------------------------------------------------------------- /man/Fld_pct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fieldingStats.R 3 | \name{Fld_pct} 4 | \alias{Fld_pct} 5 | \title{Fielding: Calculate batting average} 6 | \usage{ 7 | Fld_pct(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find batting average for batters with more than zero at bats. 15 | Required fields from the Fielding table are; "PO", "A", and "E." 16 | } 17 | \examples{ 18 | 19 | data("Fielding2016") 20 | head(Fielding2016) 21 | 22 | Fielding2016$Fld_pct <- Fld_pct(Fielding2016) 23 | 24 | } 25 | \seealso{ 26 | Other Fielding functions: \code{\link{Ch}} 27 | } 28 | \concept{Fielding functions} 29 | \keyword{Fld_pct} 30 | \keyword{fielding} 31 | \keyword{percentage} 32 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%<>%") 4 | export("%>%") 5 | export(BA) 6 | export(BABIP) 7 | export(BB_9) 8 | export(BBpct) 9 | export(CTpct) 10 | export(Ch) 11 | export(FIP) 12 | export(Fld_pct) 13 | export(HR_9) 14 | export(HRpct) 15 | export(H_9) 16 | export(IP) 17 | export(ISO) 18 | export(K_9) 19 | export(Kpct) 20 | export(LOB_pct) 21 | export(OBP) 22 | export(OPS) 23 | export(PA) 24 | export(RC2002) 25 | export(RCbasic) 26 | export(RCtech) 27 | export(SLG) 28 | export(TBs) 29 | export(WHIP) 30 | export(XBHpct) 31 | export(XBperH) 32 | export(fip_values) 33 | export(get_bbdb) 34 | export(urlExists) 35 | export(wOBA) 36 | export(wOBA_values) 37 | export(wRAA) 38 | export(wRC) 39 | import(dplyr) 40 | import(utils) 41 | importFrom(magrittr,"%<>%") 42 | importFrom(magrittr,"%>%") 43 | importFrom(rvest,html_node) 44 | importFrom(rvest,html_table) 45 | importFrom(stats,setNames) 46 | importFrom(xml2,read_html) 47 | -------------------------------------------------------------------------------- /man/IP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{IP} 4 | \alias{IP} 5 | \title{Pitching: Calculate the innings pitched} 6 | \usage{ 7 | IP(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the number of innings a player has pitched for a season. 15 | Required fields from the Pitching table are; "IPouts." 16 | } 17 | \examples{ 18 | 19 | data("Pitching2016") 20 | head(Pitching2016) 21 | 22 | Pitching2016$IP <- IP(Pitching2016) 23 | 24 | } 25 | \seealso{ 26 | Other Pitching functions: \code{\link{BB_9}}, 27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}}, 28 | \code{\link{K_9}}, \code{\link{LOB_pct}}, 29 | \code{\link{WHIP}} 30 | } 31 | \concept{Pitching functions} 32 | \keyword{innings} 33 | \keyword{pitched} 34 | -------------------------------------------------------------------------------- /tests/testthat/check_urls.R: -------------------------------------------------------------------------------- 1 | # Check external urls to make sure they have not changed. 2 | 3 | library(testthat) 4 | 5 | # Set up function to do a tryCatch on URL. 6 | urlExists <- function(target) { 7 | tryCatch({ 8 | con <- url(target) 9 | a <- capture.output(suppressWarnings(readLines(con))) 10 | close(con) 11 | TRUE; 12 | }, 13 | error = function(err) { 14 | occur <- grep("cannot open the connection", capture.output(err)); 15 | if(length(occur) > 0) FALSE; 16 | } 17 | ) 18 | } 19 | 20 | # Check Chadwick Bureau Git repo. 21 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/AllstarFull.csv")) 22 | # Check my personal fork, backup. 23 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/keberwein/baseballdatabank/master/core/AllstarFull.csv")) 24 | # Check Fangraph guts page. 25 | testthat::expect_true(urlExists("https://www.fangraphs.com/guts.aspx?type=cn")) 26 | 27 | 28 | -------------------------------------------------------------------------------- /tests/testthat/test-check_urls.R: -------------------------------------------------------------------------------- 1 | # Check external urls to make sure they have not changed. 2 | 3 | library(testthat) 4 | 5 | # Set up function to do a tryCatch on URL. 6 | urlExists <- function(target) { 7 | tryCatch({ 8 | con <- url(target) 9 | a <- capture.output(suppressWarnings(readLines(con))) 10 | close(con) 11 | TRUE; 12 | }, 13 | error = function(err) { 14 | occur <- grep("cannot open the connection", capture.output(err)); 15 | if(length(occur) > 0) FALSE; 16 | } 17 | ) 18 | } 19 | 20 | # Check Chadwick Bureau Git repo. 21 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/AllstarFull.csv")) 22 | # Check my personal fork, backup. 23 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/keberwein/baseballdatabank/master/core/AllstarFull.csv")) 24 | # Check Fangraph guts page. 25 | testthat::expect_true(urlExists("https://www.fangraphs.com/guts.aspx?type=cn")) 26 | 27 | 28 | -------------------------------------------------------------------------------- /man/H_9.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{H_9} 4 | \alias{H_9} 5 | \title{Pitching: Calculate Hits per Nine innings} 6 | \usage{ 7 | H_9(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the number of hits a pitcher throws per nine innings pitched. 15 | Required fields from the Pitching table are; "H", "BB", and "IPouts." 16 | } 17 | \examples{ 18 | 19 | data("Pitching2016") 20 | head(Pitching2016) 21 | 22 | Pitching2016$H_9 <- H_9(Pitching2016) 23 | 24 | } 25 | \seealso{ 26 | Other Pitching functions: \code{\link{BB_9}}, 27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{IP}}, 28 | \code{\link{K_9}}, \code{\link{LOB_pct}}, 29 | \code{\link{WHIP}} 30 | } 31 | \concept{Pitching functions} 32 | \keyword{hits} 33 | \keyword{innings} 34 | \keyword{nine} 35 | \keyword{per} 36 | -------------------------------------------------------------------------------- /man/HR_9.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{HR_9} 4 | \alias{HR_9} 5 | \title{Pitching: Calculate Home Runs per Nine innings} 6 | \usage{ 7 | HR_9(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the number of home runs a pitcher allows per nine innings pitched. 15 | Required fields from the Pitching table are; "H" and "IPouts." 16 | } 17 | \examples{ 18 | 19 | data("Pitching2016") 20 | head(Pitching2016) 21 | 22 | Pitching2016$HR_9 <- HR_9(Pitching2016) 23 | 24 | } 25 | \seealso{ 26 | Other Pitching functions: \code{\link{BB_9}}, 27 | \code{\link{FIP}}, \code{\link{H_9}}, \code{\link{IP}}, 28 | \code{\link{K_9}}, \code{\link{LOB_pct}}, 29 | \code{\link{WHIP}} 30 | } 31 | \concept{Pitching functions} 32 | \keyword{hits} 33 | \keyword{innings} 34 | \keyword{nine} 35 | \keyword{per} 36 | -------------------------------------------------------------------------------- /man/K_9.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{K_9} 4 | \alias{K_9} 5 | \title{Pitching: Calculate Strikes per Nine innings} 6 | \usage{ 7 | K_9(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the number of strikes a pitcher throws per nine innings pitched. 15 | Required fields from the Pitching table are; "H", "BB", "IPouts", and "SO." 16 | } 17 | \examples{ 18 | 19 | data("Pitching2016") 20 | head(Pitching2016) 21 | 22 | Pitching2016$K_9 <- K_9(Pitching2016) 23 | 24 | } 25 | \seealso{ 26 | Other Pitching functions: \code{\link{BB_9}}, 27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}}, 28 | \code{\link{IP}}, \code{\link{LOB_pct}}, 29 | \code{\link{WHIP}} 30 | } 31 | \concept{Pitching functions} 32 | \keyword{innings} 33 | \keyword{nine} 34 | \keyword{per} 35 | \keyword{strikes} 36 | -------------------------------------------------------------------------------- /man/BB_9.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{BB_9} 4 | \alias{BB_9} 5 | \title{Pitching: Calculate walks per nine innings} 6 | \usage{ 7 | BB_9(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find batting average walks per nine innings for pitchers with more one or more inning pitched. 15 | Required fields from the Pitching table are; "IPouts", and "BB." 16 | } 17 | \examples{ 18 | 19 | data("Pitching2016") 20 | head(Pitching2016) 21 | 22 | Pitching2016$BB_9 <- BB_9(Pitching2016) 23 | 24 | } 25 | \seealso{ 26 | Other Pitching functions: \code{\link{FIP}}, 27 | \code{\link{HR_9}}, \code{\link{H_9}}, \code{\link{IP}}, 28 | \code{\link{K_9}}, \code{\link{LOB_pct}}, 29 | \code{\link{WHIP}} 30 | } 31 | \concept{Pitching functions} 32 | \keyword{BB} 33 | \keyword{BB9} 34 | \keyword{BB_9} 35 | \keyword{bb/9} 36 | -------------------------------------------------------------------------------- /man/LOB_pct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{LOB_pct} 4 | \alias{LOB_pct} 5 | \title{Pitching: Calculate the left on base percentage} 6 | \usage{ 7 | LOB_pct(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the percentage of base runners that a pitcher leaves on base of the course of a season. 15 | Required fields from the Pitching table are; "H", "BB", "HBP", "R", and "HR." 16 | } 17 | \examples{ 18 | 19 | data("Pitching2016") 20 | head(Pitching2016) 21 | 22 | Pitching2016$LOB_pct <- LOB_pct(Pitching2016) 23 | 24 | } 25 | \seealso{ 26 | Other Pitching functions: \code{\link{BB_9}}, 27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}}, 28 | \code{\link{IP}}, \code{\link{K_9}}, \code{\link{WHIP}} 29 | } 30 | \concept{Pitching functions} 31 | \keyword{LOB} 32 | \keyword{LOB_pct} 33 | \keyword{percentage} 34 | -------------------------------------------------------------------------------- /man/WHIP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{WHIP} 4 | \alias{WHIP} 5 | \title{Pitching: Calculate Walks plus Hits per Innings Pitched} 6 | \usage{ 7 | WHIP(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the number of walks plus hits a pitcher allows per inning pitched. 15 | Required fields from the Pitching table are; "H", "BB", and "IPouts." 16 | } 17 | \examples{ 18 | 19 | data("Pitching2016") 20 | head(Pitching2016) 21 | 22 | Pitching2016$WHIP <- WHIP(Pitching2016) 23 | 24 | } 25 | \seealso{ 26 | Other Pitching functions: \code{\link{BB_9}}, 27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}}, 28 | \code{\link{IP}}, \code{\link{K_9}}, 29 | \code{\link{LOB_pct}} 30 | } 31 | \concept{Pitching functions} 32 | \keyword{Hits} 33 | \keyword{Innings} 34 | \keyword{Pitched} 35 | \keyword{WHIP} 36 | \keyword{Walks} 37 | \keyword{per} 38 | \keyword{plus} 39 | -------------------------------------------------------------------------------- /R/fielding2016.R: -------------------------------------------------------------------------------- 1 | #' A sample subset of the Fielding table from the Baseball Databank for the year 2016. 2 | #' 3 | #' A dataset containing Fielding statistics in 2016. 4 | #' 5 | #' @format A data frame with 1953 rows and 18 variables: 6 | #' \describe{ 7 | #' \item{playerID}{database key for unique player} 8 | #' \item{yearID}{year} 9 | #' \item{stint}{number of times played on team in a single year} 10 | #' \item{teamID}{database key for unique team} 11 | #' \item{lgID}{database key for unique league} 12 | #' \item{POS}{primary position} 13 | #' \item{G}{number of games played} 14 | #' \item{GS}{number of games started} 15 | #' \item{InnOuts}{number of outs played in field} 16 | #' \item{PO}{number of putouts} 17 | #' \item{A}{number of assists} 18 | #' \item{E}{number of home errors} 19 | #' \item{DP}{number of double plays} 20 | #' \item{PB}{number of passed balls by catchers} 21 | #' \item{WP}{number of wild pitches by catchers} 22 | #' \item{SB}{opponent stolen bases by catchers} 23 | #' \item{CS}{opponents caught stealing by catchers} 24 | #' \item{ZR}{zone rating} 25 | 26 | #' } 27 | #' @docType data 28 | #' @keywords internal 29 | #' @usage data(Fielding2016) 30 | #' @note Last updated 2016-06-15 31 | "Fielding2016" 32 | -------------------------------------------------------------------------------- /man/TBs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{TBs} 4 | \alias{TBs} 5 | \title{Batting: Calculate a batter's total bases} 6 | \usage{ 7 | TBs(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find total bases. 15 | Required fields from the batting table are "AB","H", "X2B", "X3B" and "HR." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$TBs <- TBs(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 30 | \code{\link{OPS}}, \code{\link{PA}}, 31 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 32 | \code{\link{RCtech}}, \code{\link{SLG}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{TBs} 38 | \keyword{bases} 39 | \keyword{total} 40 | -------------------------------------------------------------------------------- /man/CTpct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{CTpct} 4 | \alias{CTpct} 5 | \title{Batting: Calculate a batter's contact rate} 6 | \usage{ 7 | CTpct(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the contact rate for batters. 15 | Required fields from the batting table are "AB" and "SO." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$CTpct <- CTpct(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{HRpct}}, \code{\link{ISO}}, 29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{CTpct} 38 | \keyword{contact} 39 | \keyword{rate} 40 | -------------------------------------------------------------------------------- /man/ISO.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{ISO} 4 | \alias{ISO} 5 | \title{Batting: Calculate ISO for batters} 6 | \usage{ 7 | ISO(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find isolated power (ISO) for batters with more than zero at bats. 15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$ISO <- ISO(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{ISO} 38 | \keyword{isolated} 39 | \keyword{power} 40 | -------------------------------------------------------------------------------- /man/OBP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{OBP} 4 | \alias{OBP} 5 | \title{Batting: Calculate on base percentage (OBP)} 6 | \usage{ 7 | OBP(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the OBP for batters with more than zero hits. 15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$OBP <- OBP(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OPS}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{OBP} 38 | \keyword{base} 39 | \keyword{on} 40 | \keyword{percentage} 41 | -------------------------------------------------------------------------------- /man/SLG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{SLG} 4 | \alias{SLG} 5 | \title{Batting: Calculate slugging percentage (SLG)} 6 | \usage{ 7 | SLG(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the SLG for batters with more than zero hits. 15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$SLG <- SLG(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 30 | \code{\link{OPS}}, \code{\link{PA}}, 31 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 32 | \code{\link{RCtech}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{SLG} 38 | \keyword{base} 39 | \keyword{on} 40 | \keyword{percentage} 41 | -------------------------------------------------------------------------------- /man/PA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{PA} 4 | \alias{PA} 5 | \title{Batting: Calculate plate appearances for batters} 6 | \usage{ 7 | PA(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the plate appearances (PA) for batters. 15 | Required fields from the batting table are "AB", "BB", "HBP", "SH", and "SF." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$PA <- PA(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 30 | \code{\link{OPS}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{PA} 38 | \keyword{base} 39 | \keyword{on} 40 | \keyword{percentage} 41 | -------------------------------------------------------------------------------- /man/HRpct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{HRpct} 4 | \alias{HRpct} 5 | \title{Batting: Calculate home run percentage} 6 | \usage{ 7 | HRpct(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find home run percentage for batters with more than zero at bats. 15 | Required fields from the Batting table are "AB" and "HR." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$HRpct <- HRpct(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{ISO}}, 29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{HRpct} 38 | \keyword{home} 39 | \keyword{percentage} 40 | \keyword{run} 41 | -------------------------------------------------------------------------------- /man/Kpct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{Kpct} 4 | \alias{Kpct} 5 | \title{Batting: Calculate strikeout percentage} 6 | \usage{ 7 | Kpct(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find strikeout percentage for batters with more than zero at bats. 15 | Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$Kpct <- Kpct(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{OBP}}, \code{\link{OPS}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{Kpct} 38 | \keyword{percentage} 39 | \keyword{strikeout} 40 | -------------------------------------------------------------------------------- /man/BA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{BA} 4 | \alias{BA} 5 | \title{Batting: Calculate batting average} 6 | \usage{ 7 | BA(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find batting average for batters with more than zero at bats. 15 | Required fields from the Batting table are; "AB", and "H." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$BA <- BA(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BBpct}}, \code{\link{CTpct}}, 28 | \code{\link{HRpct}}, \code{\link{ISO}}, 29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{BA} 38 | \keyword{ball} 39 | \keyword{base} 40 | \keyword{bb} 41 | \keyword{on} 42 | \keyword{percentage} 43 | -------------------------------------------------------------------------------- /man/Fielding2016.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fielding2016.R 3 | \docType{data} 4 | \name{Fielding2016} 5 | \alias{Fielding2016} 6 | \title{A sample subset of the Fielding table from the Baseball Databank for the year 2016.} 7 | \format{A data frame with 1953 rows and 18 variables: 8 | \describe{ 9 | \item{playerID}{database key for unique player} 10 | \item{yearID}{year} 11 | \item{stint}{number of times played on team in a single year} 12 | \item{teamID}{database key for unique team} 13 | \item{lgID}{database key for unique league} 14 | \item{POS}{primary position} 15 | \item{G}{number of games played} 16 | \item{GS}{number of games started} 17 | \item{InnOuts}{number of outs played in field} 18 | \item{PO}{number of putouts} 19 | \item{A}{number of assists} 20 | \item{E}{number of home errors} 21 | \item{DP}{number of double plays} 22 | \item{PB}{number of passed balls by catchers} 23 | \item{WP}{number of wild pitches by catchers} 24 | \item{SB}{opponent stolen bases by catchers} 25 | \item{CS}{opponents caught stealing by catchers} 26 | \item{ZR}{zone rating} 27 | }} 28 | \usage{ 29 | data(Fielding2016) 30 | } 31 | \description{ 32 | A dataset containing Fielding statistics in 2016. 33 | } 34 | \note{ 35 | Last updated 2016-06-15 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/OPS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{OPS} 4 | \alias{OPS} 5 | \title{Batting: Calculate on base percentage plus slugging (OPS)} 6 | \usage{ 7 | OPS(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the OPS for batters with more than zero hits. 15 | Required fields from the batting table are "H", "X2B", "X3B", "HR", "BB", "HBP", "AB" and "SF." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$OPS <- OPS(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{OPS} 38 | \keyword{base} 39 | \keyword{on} 40 | \keyword{percentage} 41 | -------------------------------------------------------------------------------- /man/XBperH.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{XBperH} 4 | \alias{XBperH} 5 | \title{Batting: Calculate extra base per hit} 6 | \usage{ 7 | XBperH(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the average extra bases per hit for batters with more than zero hits. 15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$XBperH <- XBperH(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 30 | \code{\link{OPS}}, \code{\link{PA}}, 31 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 32 | \code{\link{RCtech}}, \code{\link{SLG}}, 33 | \code{\link{TBs}}, \code{\link{XBHpct}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{XBperH} 38 | \keyword{base} 39 | \keyword{extra} 40 | \keyword{hit} 41 | \keyword{per} 42 | -------------------------------------------------------------------------------- /R/seasonAVG.R: -------------------------------------------------------------------------------- 1 | #' League averages and aggregates from 1901 to present. 2 | #' 3 | #' A dataset containing combined aggregates and averages for all of the 4 | #' MLB. These stats are primarily used in calculated advanced player stats. 5 | #' 6 | #' @format A data frame with 115 rows and 20 variables: 7 | #' \describe{ 8 | #' \item{yearID}{year} 9 | #' \item{tot_G}{total games played} 10 | #' \item{tot_PA}{total plate appearances} 11 | #' \item{tot_HR}{total home runs} 12 | #' \item{tot_R}{total runs scored} 13 | #' \item{tot_RBI}{total runs batted in} 14 | #' \item{tot_SB}{total stolen bases} 15 | #' \item{avg_BB}{mean base on ball percentage} 16 | #' \item{avg_K}{mean strikeout percentage} 17 | #' \item{avg_ISO}{mean isolated power} 18 | #' \item{avg_BABIP}{mean batting average on balls in play} 19 | #' \item{avg_BA}{mean batting average} 20 | #' \item{avg_OBP}{mean on base percentage} 21 | #' \item{avg_SLG}{mean slugging percentage} 22 | #' \item{avg_wOBA}{mean weighted on base average} 23 | #' \item{avg_wRC}{mean weighted runs created} 24 | #' \item{avg_BsR}{mean base running average} 25 | #' \item{off}{offense} 26 | #' \item{def}{defense} 27 | #' \item{avg_WAR}{mean wins above replacement} 28 | #' } 29 | #' @docType data 30 | #' @keywords internal 31 | #' @usage data(seasonAVG) 32 | #' @note Last updated 2016-09-21 33 | "seasonAVG" 34 | -------------------------------------------------------------------------------- /man/XBHpct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{XBHpct} 4 | \alias{XBHpct} 5 | \title{Batting: Calculate extra base percentage} 6 | \usage{ 7 | XBHpct(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find extra base percentage for batters with more than zero at bats. 15 | Required fields from the batting table are "AB", "BB", "HBP", "SF", "SH", "X2B", "X3B", "HR"." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$XBHpct <- XBHpct(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 30 | \code{\link{OPS}}, \code{\link{PA}}, 31 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 32 | \code{\link{RCtech}}, \code{\link{SLG}}, 33 | \code{\link{TBs}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{XBHpct} 38 | \keyword{base} 39 | \keyword{extra} 40 | \keyword{percentage} 41 | -------------------------------------------------------------------------------- /man/get_bbdb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_bbdb.R 3 | \name{get_bbdb} 4 | \alias{get_bbdb} 5 | \title{Get an up to date copy of the Baseball Databank.} 6 | \usage{ 7 | get_bbdb(table = NULL, downloadZip = FALSE, AllTables = FALSE) 8 | } 9 | \arguments{ 10 | \item{table}{The tables you would like to download. Uses Lahman table names Ex. "Batting", "Master", "AllstarFull", etc... 11 | If this argument is left as NULL, the function will download all twenty-seven tables.} 12 | 13 | \item{downloadZip}{If true, this will download a zip file of all twenty-seven tables in .csv format to your working directory.} 14 | 15 | \item{AllTables}{If true, this will download all the tables in the database. The default is set to false.} 16 | } 17 | \description{ 18 | Download the newest version of the Baseball Databank from the Chadwick Bureau GitHub repository. This is the source of 19 | Sean Lahman's baseball database and is always under development. This function will read the .csv files and return them as data frames. 20 | There is also an option to download the entire directory. 21 | } 22 | \examples{ 23 | 24 | get_bbdb(table = "Batting") 25 | 26 | \dontrun{ 27 | get_bbdb(table = c("Batting", "Pitching")) 28 | } 29 | 30 | \dontrun{ 31 | get_bbdb(AllTables = TRUE) 32 | } 33 | } 34 | \keyword{data} 35 | \keyword{database,} 36 | \keyword{frame} 37 | -------------------------------------------------------------------------------- /man/BABIP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{BABIP} 4 | \alias{BABIP} 5 | \title{Batting: Calculate batting average on balls in play (BABIP)} 6 | \usage{ 7 | BABIP(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find BABIP for batters with more than zero at bats. 15 | Required fields from the Batting table are; "AB", "BB", "H", "HBP", "SF", "SH", "HR" and "SO." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$BABIP <- BABIP(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BA}}, 27 | \code{\link{BBpct}}, \code{\link{CTpct}}, 28 | \code{\link{HRpct}}, \code{\link{ISO}}, 29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}}, 30 | \code{\link{PA}}, \code{\link{RC2002}}, 31 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{BABIP} 38 | \keyword{ball} 39 | \keyword{base} 40 | \keyword{bb} 41 | \keyword{on} 42 | \keyword{percentage} 43 | -------------------------------------------------------------------------------- /R/batting2016.R: -------------------------------------------------------------------------------- 1 | #' A sample subset of the Batting table from the Baseball Databank for the year 2016. 2 | #' 3 | #' A dataset containing Batting statistics in 2016. 4 | #' 5 | #' @format A data frame with 1483 rows and 22 variables: 6 | #' \describe{ 7 | #' \item{playerID}{database key for unique player} 8 | #' \item{yearID}{year} 9 | #' \item{stint}{number of times played on team in a single year} 10 | #' \item{teamID}{database key for unique team} 11 | #' \item{lgID}{database key for unique league} 12 | #' \item{G}{number of games played} 13 | #' \item{AB}{number of at bats} 14 | #' \item{R}{number of runs scored} 15 | #' \item{H}{number of hits} 16 | #' \item{X2B}{number of doubles} 17 | #' \item{X3B}{number of triples} 18 | #' \item{HR}{number of home runs} 19 | #' \item{RBI}{number of runs batted in} 20 | #' \item{SB}{number of stolen bases} 21 | #' \item{CS}{number of times caught stealing} 22 | #' \item{BB}{number of base on balls} 23 | #' \item{SO}{number of strike outs} 24 | #' \item{IBB}{number of intentional base on balls} 25 | #' \item{HBP}{number of times hit by pitch} 26 | #' \item{SH}{number of sacrifice hits} 27 | #' \item{SF}{number of sacrifice flys} 28 | #' \item{GIDP}{number of times grounded into a double play} 29 | #' } 30 | #' @docType data 31 | #' @keywords internal 32 | #' @usage data(Batting2016) 33 | #' @note Last updated 2016-06-15 34 | "Batting2016" 35 | -------------------------------------------------------------------------------- /man/RCbasic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{RCbasic} 4 | \alias{RCbasic} 5 | \title{Batting: Calculate Runs Created using the basic formula.} 6 | \usage{ 7 | RCbasic(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find the runs created using the basic formula presented by Bill James in the late 1970s. 15 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", and "HR." 16 | } 17 | \examples{ 18 | 19 | data("Batting2016") 20 | head(Batting2016) 21 | 22 | Batting2016$RCbasic <- RCbasic(Batting2016) 23 | 24 | } 25 | \seealso{ 26 | Other Batting functions: \code{\link{BABIP}}, 27 | \code{\link{BA}}, \code{\link{BBpct}}, 28 | \code{\link{CTpct}}, \code{\link{HRpct}}, 29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 30 | \code{\link{OPS}}, \code{\link{PA}}, 31 | \code{\link{RC2002}}, \code{\link{RCtech}}, 32 | \code{\link{SLG}}, \code{\link{TBs}}, 33 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 35 | } 36 | \concept{Batting functions} 37 | \keyword{RCbasic} 38 | \keyword{base} 39 | \keyword{extra} 40 | \keyword{hit} 41 | \keyword{per} 42 | -------------------------------------------------------------------------------- /man/seasonAVG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/seasonAVG.R 3 | \docType{data} 4 | \name{seasonAVG} 5 | \alias{seasonAVG} 6 | \title{League averages and aggregates from 1901 to present.} 7 | \format{A data frame with 115 rows and 20 variables: 8 | \describe{ 9 | \item{yearID}{year} 10 | \item{tot_G}{total games played} 11 | \item{tot_PA}{total plate appearances} 12 | \item{tot_HR}{total home runs} 13 | \item{tot_R}{total runs scored} 14 | \item{tot_RBI}{total runs batted in} 15 | \item{tot_SB}{total stolen bases} 16 | \item{avg_BB}{mean base on ball percentage} 17 | \item{avg_K}{mean strikeout percentage} 18 | \item{avg_ISO}{mean isolated power} 19 | \item{avg_BABIP}{mean batting average on balls in play} 20 | \item{avg_BA}{mean batting average} 21 | \item{avg_OBP}{mean on base percentage} 22 | \item{avg_SLG}{mean slugging percentage} 23 | \item{avg_wOBA}{mean weighted on base average} 24 | \item{avg_wRC}{mean weighted runs created} 25 | \item{avg_BsR}{mean base running average} 26 | \item{off}{offense} 27 | \item{def}{defense} 28 | \item{avg_WAR}{mean wins above replacement} 29 | }} 30 | \usage{ 31 | data(seasonAVG) 32 | } 33 | \description{ 34 | A dataset containing combined aggregates and averages for all of the 35 | MLB. These stats are primarily used in calculated advanced player stats. 36 | } 37 | \note{ 38 | Last updated 2016-09-21 39 | } 40 | \keyword{internal} 41 | -------------------------------------------------------------------------------- /man/BBpct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{BBpct} 4 | \alias{BBpct} 5 | \title{Batting: Calculate base on ball percentage} 6 | \usage{ 7 | BBpct(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | Find base on ball percentage for batters with more than zero at bats. 15 | Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH." 16 | Intentional base on balls (IBB) is added for the years that metric is available. 17 | } 18 | \examples{ 19 | 20 | data("Batting2016") 21 | head(Batting2016) 22 | 23 | Batting2016$BBpct <- BBpct(Batting2016) 24 | 25 | } 26 | \seealso{ 27 | Other Batting functions: \code{\link{BABIP}}, 28 | \code{\link{BA}}, \code{\link{CTpct}}, 29 | \code{\link{HRpct}}, \code{\link{ISO}}, 30 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}}, 31 | \code{\link{PA}}, \code{\link{RC2002}}, 32 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 33 | \code{\link{SLG}}, \code{\link{TBs}}, 34 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 35 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 36 | } 37 | \concept{Batting functions} 38 | \keyword{BBpct} 39 | \keyword{ball} 40 | \keyword{base} 41 | \keyword{bb} 42 | \keyword{on} 43 | \keyword{percentage} 44 | -------------------------------------------------------------------------------- /data-raw/data_prep.R: -------------------------------------------------------------------------------- 1 | # Prep seasonAVG data set. 2 | seasonAVG <- read.csv("~/Downloads/seasonAVG.csv") 3 | # These data come from Fangraphs 4 | # http://www.fangraphs.com/leaders.aspx?pos=all&stats=bat&lg=all&qual=0&type=8&season=2015&month=0&season1=1901&ind=0&team=0,ss&rost=0&age=0&filter=&players=0 5 | 6 | # Check to make sure new columns line up with the old. May need to adjust colnames if they don't match. 7 | colnames(seasonAVG) <- c("yearID", "tot_G", "tot_PA", "tot_HR", "tot_R", "tot_RBI", "tot_SB", "avg_BB", 8 | "avg_K", "avg_ISO", "avg_BABIP", "avg_BA", "avg_OBP", "avg_SLG", "avg_wOBA", 9 | "avg_wRC", "avg_BsR", "off", "def", "avg_WAR") 10 | names(seasonAVG) 11 | devtools::use_data(seasonAVG, overwrite = TRUE) 12 | rm(seasonAVG) 13 | 14 | 15 | # Get example Batting, Pitching, and Fielding tables to be used in package testing to avoid long test times. 16 | library(baseballDBR) 17 | library(dplyr) 18 | 19 | get_bbdb("Batting") 20 | Batting2016 <- subset(Batting, yearID == "2016") 21 | devtools::use_data(Batting2016, overwrite = TRUE) 22 | rm(Batting2016) 23 | rm(Batting) 24 | 25 | get_bbdb("Pitching") 26 | Pitching2016 <- subset(Pitching, yearID == "2016") 27 | devtools::use_data(Pitching2016, overwrite = TRUE) 28 | rm(Pitching2016) 29 | rm(Pitching) 30 | 31 | get_bbdb("Fielding") 32 | Fielding2016 <- subset(Fielding, yearID == "2016") 33 | devtools::use_data(Fielding2016, overwrite = TRUE) 34 | rm(Fielding2016) 35 | rm(Fielding) 36 | -------------------------------------------------------------------------------- /man/Batting2016.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/batting2016.R 3 | \docType{data} 4 | \name{Batting2016} 5 | \alias{Batting2016} 6 | \title{A sample subset of the Batting table from the Baseball Databank for the year 2016.} 7 | \format{A data frame with 1483 rows and 22 variables: 8 | \describe{ 9 | \item{playerID}{database key for unique player} 10 | \item{yearID}{year} 11 | \item{stint}{number of times played on team in a single year} 12 | \item{teamID}{database key for unique team} 13 | \item{lgID}{database key for unique league} 14 | \item{G}{number of games played} 15 | \item{AB}{number of at bats} 16 | \item{R}{number of runs scored} 17 | \item{H}{number of hits} 18 | \item{X2B}{number of doubles} 19 | \item{X3B}{number of triples} 20 | \item{HR}{number of home runs} 21 | \item{RBI}{number of runs batted in} 22 | \item{SB}{number of stolen bases} 23 | \item{CS}{number of times caught stealing} 24 | \item{BB}{number of base on balls} 25 | \item{SO}{number of strike outs} 26 | \item{IBB}{number of intentional base on balls} 27 | \item{HBP}{number of times hit by pitch} 28 | \item{SH}{number of sacrifice hits} 29 | \item{SF}{number of sacrifice flys} 30 | \item{GIDP}{number of times grounded into a double play} 31 | }} 32 | \usage{ 33 | data(Batting2016) 34 | } 35 | \description{ 36 | A dataset containing Batting statistics in 2016. 37 | } 38 | \note{ 39 | Last updated 2016-06-15 40 | } 41 | \keyword{internal} 42 | -------------------------------------------------------------------------------- /man/RCtech.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{RCtech} 4 | \alias{RCtech} 5 | \title{Batting: Calculate Runs Created using the technical formula.} 6 | \usage{ 7 | RCtech(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | The "Technical Version" is the most well-known formula for RC. It adds several factors to the 15 | basic formula such as sacrifice hits, stolen bases and intentional base on balls. 16 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS", 17 | "SF" and "SH," and "IBB." 18 | } 19 | \examples{ 20 | 21 | data("Batting2016") 22 | head(Batting2016) 23 | 24 | Batting2016$RCtech <- RCtech(Batting2016) 25 | 26 | } 27 | \seealso{ 28 | Other Batting functions: \code{\link{BABIP}}, 29 | \code{\link{BA}}, \code{\link{BBpct}}, 30 | \code{\link{CTpct}}, \code{\link{HRpct}}, 31 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 32 | \code{\link{OPS}}, \code{\link{PA}}, 33 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 34 | \code{\link{SLG}}, \code{\link{TBs}}, 35 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 36 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 37 | } 38 | \concept{Batting functions} 39 | \keyword{RCtech} 40 | \keyword{base} 41 | \keyword{extra} 42 | \keyword{hit} 43 | \keyword{per} 44 | -------------------------------------------------------------------------------- /man/RC2002.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{RC2002} 4 | \alias{RC2002} 5 | \title{Batting: Calculate Runs Created using the updated 2002 formula.} 6 | \usage{ 7 | RC2002(dat = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 12 | } 13 | \description{ 14 | The "2002 Version" is an updated version of the "Technical Version" by Bill James. 15 | The 2002 RC uses the same counting stats as the Technical Version but applies weights to many of the raw stats. 16 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS", 17 | "SF" and "SH," "SO", and "IBB." 18 | } 19 | \examples{ 20 | 21 | data("Batting2016") 22 | head(Batting2016) 23 | 24 | Batting2016$RC2002 <- RC2002(Batting2016) 25 | 26 | } 27 | \seealso{ 28 | Other Batting functions: \code{\link{BABIP}}, 29 | \code{\link{BA}}, \code{\link{BBpct}}, 30 | \code{\link{CTpct}}, \code{\link{HRpct}}, 31 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 32 | \code{\link{OPS}}, \code{\link{PA}}, 33 | \code{\link{RCbasic}}, \code{\link{RCtech}}, 34 | \code{\link{SLG}}, \code{\link{TBs}}, 35 | \code{\link{XBHpct}}, \code{\link{XBperH}}, 36 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}} 37 | } 38 | \concept{Batting functions} 39 | \keyword{RC2002} 40 | \keyword{base} 41 | \keyword{extra} 42 | \keyword{hit} 43 | \keyword{per} 44 | -------------------------------------------------------------------------------- /man/fip_values.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fip.R 3 | \name{fip_values} 4 | \alias{fip_values} 5 | \title{Return FIP constants per season} 6 | \usage{ 7 | fip_values(dat = NULL, Sep.Leagues = FALSE, Fangraphs = FALSE) 8 | } 9 | \arguments{ 10 | \item{dat}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 11 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 12 | 13 | \item{Sep.Leagues}{If TRUE, this will split the calculation and return unique FIP constants for the various leagues. This can be 14 | helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues.} 15 | 16 | \item{Fangraphs}{If TRUE the function will return the Fangraphs FIP constants. This can not be used in conjunction with the 17 | \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league.} 18 | } 19 | \description{ 20 | Get fip constants for each season. By default the function uses a method adapted from 21 | Tom Tango and used by Fangraphs. The function returns FIP constants based on ERA \code{FIP_ERA} as well as constants based on RA \code{FIP_RA}. 22 | Both the Tango and Frangraphs formulas use ERA for their FIP constants. 23 | } 24 | \examples{ 25 | 26 | data("Pitching2016") 27 | head(Pitching2016) 28 | 29 | fip_df <- fip_values(Pitching2016, Fangraphs=FALSE) 30 | head(fip_df) 31 | 32 | 33 | } 34 | \keyword{average,} 35 | \keyword{base} 36 | \keyword{fangraphs} 37 | \keyword{on} 38 | \keyword{wOBA,} 39 | \keyword{woba,} 40 | -------------------------------------------------------------------------------- /R/pitching2016.R: -------------------------------------------------------------------------------- 1 | #' A sample subset of the Pitching table from the Baseball Databank for the year 2016. 2 | #' 3 | #' A dataset containing Pitching statistics in 2016. 4 | #' 5 | #' @format A data frame with 824 rows and 30 variables: 6 | #' \describe{ 7 | #' \item{playerID}{database key for unique player} 8 | #' \item{yearID}{year} 9 | #' \item{stint}{number of times played on team in a single year} 10 | #' \item{teamID}{database key for unique team} 11 | #' \item{lgID}{database key for unique league} 12 | #' \item{W}{number of games won} 13 | #' \item{L}{number of games lost} 14 | #' \item{G}{number of games played} 15 | #' \item{GS}{number of games started} 16 | #' \item{CG}{number of complete games} 17 | #' \item{SHO}{number of shutouts} 18 | #' \item{SV}{number of home saves} 19 | #' \item{IPouts}{number of outs pitched} 20 | #' \item{H}{number of hits surrendered} 21 | #' \item{ER}{number of earned runs} 22 | #' \item{HR}{number of home runs surrendered} 23 | #' \item{BB}{number of base on balls} 24 | #' \item{SO}{number of strike outs} 25 | #' \item{BAOpp}{opponent batting average} 26 | #' \item{ERA}{earned run average} 27 | #' \item{IBB}{number of intentional base on balls} 28 | #' \item{WP}{number of wild pitches} 29 | #' \item{HBP}{number of hit batters by pitch} 30 | #' \item{BK}{number of balks} 31 | #' \item{BFP}{batters faced by pitcher} 32 | #' \item{GF}{Games finished} 33 | #' \item{R}{number of runs allowed} 34 | #' \item{SH}{number of sacrifice hits by opposing batters} 35 | #' \item{SF}{number of sacrifice flys by opposing batters} 36 | #' \item{GIDP}{number of grounded into double plays by opposing batters} 37 | #' 38 | #' } 39 | #' @docType data 40 | #' @keywords internal 41 | #' @usage data(Pitching2016) 42 | #' @note Last updated 2016-06-15 43 | "Pitching2016" 44 | -------------------------------------------------------------------------------- /man/Pitching2016.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitching2016.R 3 | \docType{data} 4 | \name{Pitching2016} 5 | \alias{Pitching2016} 6 | \title{A sample subset of the Pitching table from the Baseball Databank for the year 2016.} 7 | \format{A data frame with 824 rows and 30 variables: 8 | \describe{ 9 | \item{playerID}{database key for unique player} 10 | \item{yearID}{year} 11 | \item{stint}{number of times played on team in a single year} 12 | \item{teamID}{database key for unique team} 13 | \item{lgID}{database key for unique league} 14 | \item{W}{number of games won} 15 | \item{L}{number of games lost} 16 | \item{G}{number of games played} 17 | \item{GS}{number of games started} 18 | \item{CG}{number of complete games} 19 | \item{SHO}{number of shutouts} 20 | \item{SV}{number of home saves} 21 | \item{IPouts}{number of outs pitched} 22 | \item{H}{number of hits surrendered} 23 | \item{ER}{number of earned runs} 24 | \item{HR}{number of home runs surrendered} 25 | \item{BB}{number of base on balls} 26 | \item{SO}{number of strike outs} 27 | \item{BAOpp}{opponent batting average} 28 | \item{ERA}{earned run average} 29 | \item{IBB}{number of intentional base on balls} 30 | \item{WP}{number of wild pitches} 31 | \item{HBP}{number of hit batters by pitch} 32 | \item{BK}{number of balks} 33 | \item{BFP}{batters faced by pitcher} 34 | \item{GF}{Games finished} 35 | \item{R}{number of runs allowed} 36 | \item{SH}{number of sacrifice hits by opposing batters} 37 | \item{SF}{number of sacrifice flys by opposing batters} 38 | \item{GIDP}{number of grounded into double plays by opposing batters} 39 | 40 | }} 41 | \usage{ 42 | data(Pitching2016) 43 | } 44 | \description{ 45 | A dataset containing Pitching statistics in 2016. 46 | } 47 | \note{ 48 | Last updated 2016-06-15 49 | } 50 | \keyword{internal} 51 | -------------------------------------------------------------------------------- /R/fieldingStats.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Fielding: Calculate defensive chances 3 | #' @description The number of chances a player had to make a defensive play. 4 | #' Required fields from the Fielding table are; "PO", "A", and "E." 5 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 6 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 7 | #' @keywords Ch Defensive Chances 8 | #' @family Fielding functions 9 | #' @export Ch 10 | #' @examples 11 | #' 12 | #' data("Fielding2016") 13 | #' head(Fielding2016) 14 | #' 15 | #' Fielding2016$Ch <- Ch(Fielding2016) 16 | #' 17 | Ch <- function (dat=NULL){ 18 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 19 | 20 | if(!all(c("PO", "A", "E") %in% colnames(dat))) { 21 | message("Not enough data to calculate. Please make sure your data inclueds 'PO', 'A' and 'E'") 22 | } 23 | Ch <- dat$A + dat$PO + dat$E 24 | return(Ch) 25 | } 26 | 27 | 28 | 29 | #' @title Fielding: Calculate batting average 30 | #' @description Find batting average for batters with more than zero at bats. 31 | #' Required fields from the Fielding table are; "PO", "A", and "E." 32 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 33 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 34 | #' @keywords Fld_pct fielding percentage 35 | #' @family Fielding functions 36 | #' @export Fld_pct 37 | #' @examples 38 | #' 39 | #' data("Fielding2016") 40 | #' head(Fielding2016) 41 | #' 42 | #' Fielding2016$Fld_pct <- Fld_pct(Fielding2016) 43 | #' 44 | Fld_pct <- function (dat=NULL){ 45 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 46 | 47 | if(!all(c("PO", "A", "E") %in% colnames(dat))) { 48 | message("Not enough data to calculate. Please make sure your data inclueds 'PO', 'A' and 'E'") 49 | } 50 | ifelse(dat$PO+dat$A+dat$E > 0, Fld_pct <- round(((dat$PO + dat$A) / (dat$PO + dat$A + dat$E)), 3), NA) 51 | return(Fld_pct) 52 | } 53 | -------------------------------------------------------------------------------- /man/FIP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pitchingStats.R 3 | \name{FIP} 4 | \alias{FIP} 5 | \title{Pitching: Fielding Independent Pitching (FIP)} 6 | \usage{ 7 | FIP(dat = NULL, Fangraphs = FALSE, NA_to_zero = TRUE, 8 | Sep.Leagues = FALSE) 9 | } 10 | \arguments{ 11 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in 12 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.} 13 | 14 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal 15 | formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and 16 | does not require an external download. If not specified, the default is set to FALSE.} 17 | 18 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 19 | were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954. 20 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 21 | If FALSE, the wOBA calculation will return NaN for years with missing data.} 22 | 23 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 24 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 25 | park factors between the American and National leagues. The default for this argument is FALSE.} 26 | } 27 | \description{ 28 | Find the FIP for all pitchers with one or strike outs in a particular season. 29 | Required fields from the Pitching table are "BB", "HBP", "SO", and "IPouts." 30 | } 31 | \examples{ 32 | 33 | data("Pitching2016") 34 | head(Pitching2016) 35 | 36 | Pitching2016$FIP <- FIP(Pitching2016, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE) 37 | 38 | } 39 | \seealso{ 40 | Other Pitching functions: \code{\link{BB_9}}, 41 | \code{\link{HR_9}}, \code{\link{H_9}}, \code{\link{IP}}, 42 | \code{\link{K_9}}, \code{\link{LOB_pct}}, 43 | \code{\link{WHIP}} 44 | } 45 | \concept{Pitching functions} 46 | \keyword{FIP} 47 | \keyword{fielding} 48 | \keyword{independent} 49 | \keyword{pitching} 50 | -------------------------------------------------------------------------------- /man/wOBA_values.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/woba_values.R 3 | \name{wOBA_values} 4 | \alias{wOBA_values} 5 | \title{Return wOBA values per season} 6 | \usage{ 7 | wOBA_values(BattingTable, PitchingTable, FieldingTable, 8 | Sep.Leagues = FALSE, Fangraphs = FALSE) 9 | } 10 | \arguments{ 11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 13 | 14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 16 | 17 | \item{FieldingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 19 | 20 | \item{Sep.Leagues}{If TRUE, this will split the calculation and return unique wOBA values for the various leagues. This can be 21 | helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues.} 22 | 23 | \item{Fangraphs}{if TRUE the function will return the Fangraphs wOBA values. By default the function uses a method adapted from 24 | Tom Tango. These values are often very close to Fangraphs, but are not the same due to Fangraphs using a different algorithm. 25 | This can not be used in conjunction with the \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league.} 26 | } 27 | \description{ 28 | Get wOBA values for each year in your database. This calculation requires all fields of 29 | the Pitching, Fielding and Batting tables from the Lahman package, or a comparable data set. The function uses 30 | a version of Tom Tango's wOBA formula by default, but can also return Fangraphs wOBA values. 31 | } 32 | \examples{ 33 | 34 | data("Batting2016") 35 | head(Batting2016) 36 | data("Pitching2016") 37 | head(Pitching2016) 38 | data("Fielding2016") 39 | head(Fielding2016) 40 | 41 | woba_df <- wOBA_values(Batting2016, Pitching2016, Fielding2016, Sep.Leagues=FALSE, Fangraphs=FALSE) 42 | 43 | } 44 | \keyword{average,} 45 | \keyword{base} 46 | \keyword{fangraphs} 47 | \keyword{on} 48 | \keyword{wOBA,} 49 | \keyword{woba,} 50 | -------------------------------------------------------------------------------- /vignettes/FIP.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fielding Independent Pitching (FIP)" 3 | author: "Kris Eberwein" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Fielding Independent Pitching (FIP)} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | The `baseballDBR` package provides two functions for fielding independent pitching (FIP), the `FIP()` function, which calculates the metric for players in the "Pitching" table, and the `fip_values()` function, which calculates the league averages and constraints used in the calculation. 13 | 14 | The FIP metric attempts to estimate a pitcher's performance independent of the performance of the other players on the defense. The goal is similar to that of earned run average (ERA), but unlike ERA, FIP removes the variable of a pitcher's "luck" from plays made by other defenders. 15 | 16 | Like wOBA, FIP is a weighted metric that relies on a system of linear weights and league averages. The formula for FIP is: 17 | 18 | $$\frac{13*HR + 3(BB + HBP) - 2*SO}{IP} + FIP constant$$ 19 | 20 | The above formula is based off of work by Tom Tango, and is similar to the DIPS metric by Voros McCracken. It looks at only the things a pitching *can* control such as hit-by-pitch, base-on-ball, strikeouts, and home runs and weights them accordingly. 21 | 22 | ## The FIP Constant 23 | 24 | The FIP constant is used in the FIP calculation solely to bring the number onto the ERA scale, and make it more familiar to users. The wOBA constant works in much the same way to bring wOBA closer to OBP. The formula for the FIP constant is: 25 | 26 | $$\frac{lgERA - (HR*13) + (BB + IBB + HBP - IBB)3 - (SO*2)}{IP}$$ 27 | 28 | ```{r, eval=FALSE} 29 | library(baseballDBR) 30 | 31 | # Grab the Pitching table from the Baseabll Databank 32 | get_bbdb(table = "Pitching") 33 | 34 | Pitching$FIP <- FIP(Pitching, Fangraphs=FALSE, NA_to_zero=FALSE, Sep.Leagues=FALSE) 35 | 36 | 37 | ``` 38 | 39 | 40 | ### Arguments 41 | 42 | Unlike the `wOBA()` functions, which require three data frames, the `FIP()` function only requires the "Pitching" data frame. 43 | 44 | * Fan graphs: Should the function use the package's native algorithm or download the FIP constraints from Fangraphs? The default is FALSE, as the results will likely be identical. 45 | 46 | * Sep.Leagues: Should the function determine separate FIP values for the National and American leagues. Standard practice would be to use FIP values that combine both leagues. Note, this function is not possible if `Fangraphs=TRUE` as Fangraphs does not separate leagues. 47 | 48 | * NA_to_zero: Should the function apply `0` to statistics that may not have been counted? For example, Cy Young's intentional base-on-balls `IBB` metric is NA because that statistic wasn't tracked when he played, so his `FIPS` should be NA. Note, that it is a statistically unsound practice to set NAs to zero. However, the authors of this package recognize the desire to compare past players to current players. 49 | 50 | ### Fip Values 51 | 52 | For deeper analysis, the `fip_values()` function allows us to look at the league averages that are the underpinnings of the FIP calculation. 53 | 54 | ```{r, eval=FALSE} 55 | library(baseballDBR) 56 | 57 | fip_valz <- fip_values(Pitching, Fangraphs = FALSE, Sep.Leagues = FALSE) 58 | ``` 59 | 60 | -------------------------------------------------------------------------------- /vignettes/wRAA_wRC.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Weighted Runs Above Average (wRAA) and Weighted Runs Created (wRC)" 3 | author: "Kris Eberwein" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Weighted Runs Above Average (wRAA) and Weighted Runs Created (wRC)} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | # Weighted Runs Above Average (wRAA) 13 | 14 | The wRAA metric attempts to establish an average of runs scored by all the players in the league and rate a single player as how many runs, above or below average, that player scored in a given year. The baseline of the metric is zero, so some players may have a negative wRAA. 15 | 16 | Despite having all the characteristics of a "counting statistic," wRAA is based on wOBA and the wOBA scale, which rely on linear weights, so wRAA could certainly be considered a predictive metric. 17 | 18 | The formula for wRAA is: 19 | 20 | $$\frac{wOBA - leagueWOBA}{wOBA scale} * (AB+BB-IBB+SF+HBP) = PA$$ 21 | 22 | The multiplier to the right is a formula for plate appearances, which differs from at-bats. Note that, SHs and IBBs are not counted in the `PA` formula because they are largely regarded as managerial decisions. For more information on wOBA, league wOBA, and wOBA scale; please see the `wOBA` vignette. 23 | 24 | The wRAA metric is also used to calculate wins above replacement (WAR.) 25 | 26 | Since wRAA relies on wOBA coefficients, we need three tables to make the calculation; `Batting`, `Pitching`, and `Fielding`. We use all three tables in order to determine a player's primary position. More on this can be found in the wOBA vignette. 27 | 28 | ```{r, eval=FALSE} 29 | library(baseballDBR) 30 | # Load data from Baseball Databank 31 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 32 | 33 | Batting$wRAA <- wRAA(Batting, Pitching, Fielding, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE) 34 | ``` 35 | 36 | ### Arguments 37 | 38 | * Fangraphs: Should the function use Fangraphs wOBA values or the package's native Tango method? 39 | 40 | * NA_to_zero: Should the function apply `0` to statistics that may not have been counted. For example, Babe Ruth's sacrifice fly `SF` metric is NA because that statistic wasn't tracked when he played, so his `wRAA` should be NA. Note, that it is a statistically unsound practice to set NAs to zero. However, the authors of this package recognize the desire to compare past players to current players. 41 | 42 | * Sep.Leagues: Should the function determine separate wOBA values for the National and American leagues. Standard practice would be to use wOBA values that combine both leagues. Note, this function is not possible if `Fangraphs=TRUE`. 43 | 44 | # Weighted Runs Created (wRC) 45 | 46 | The wRC metric attempts to quantify a player’s total offensive value and measure it by runs that player creates. The wRC metric is based the "Runs Created" metric that was originally used by Bill James. 47 | 48 | wRC requires the same data and accepts the same arguments as the `wRAA()` and `wOBA()` functions. 49 | 50 | ```{r, eval=FALSE} 51 | library(baseballDBR) 52 | # Load data from Baseball Databank 53 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 54 | 55 | Batting$wRC <- wRC(Batting, Pitching, Fielding, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE) 56 | ``` 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /man/wOBA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{wOBA} 4 | \alias{wOBA} 5 | \title{Batting: Calculate Weighted On-Base Average (wOBA)} 6 | \usage{ 7 | wOBA(BattingTable = NULL, PitchingTable = NULL, FieldingTable = NULL, 8 | Fangraphs = FALSE, NA_to_zero = TRUE, Sep.Leagues = FALSE) 9 | } 10 | \arguments{ 11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 13 | 14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 16 | 17 | \item{FieldingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 19 | 20 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal 21 | formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and 22 | does not require an external download. If not specified, the default is set to FALSE.} 23 | 24 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 25 | were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954. 26 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 27 | If FALSE, the wOBA calculation will return NaN for years with missing data.} 28 | 29 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 30 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 31 | park factors between the American and National leagues. The default for this argument is FALSE.} 32 | } 33 | \description{ 34 | Find the wOBA for all players with one or more hits for a particular season. 35 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB." 36 | } 37 | \examples{ 38 | 39 | data("Batting2016") 40 | head(Batting2016) 41 | data("Pitching2016") 42 | head(Pitching2016) 43 | data("Fielding2016") 44 | head(Fielding2016) 45 | 46 | Batting2016$wOBA <- wOBA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE, 47 | NA_to_zero=TRUE, Sep.Leagues=FALSE) 48 | 49 | } 50 | \seealso{ 51 | Other Batting functions: \code{\link{BABIP}}, 52 | \code{\link{BA}}, \code{\link{BBpct}}, 53 | \code{\link{CTpct}}, \code{\link{HRpct}}, 54 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 55 | \code{\link{OPS}}, \code{\link{PA}}, 56 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 57 | \code{\link{RCtech}}, \code{\link{SLG}}, 58 | \code{\link{TBs}}, \code{\link{XBHpct}}, 59 | \code{\link{XBperH}}, \code{\link{wRAA}}, 60 | \code{\link{wRC}} 61 | } 62 | \concept{Batting functions} 63 | \keyword{Average} 64 | \keyword{On-Base} 65 | \keyword{Weighted} 66 | \keyword{wOBA} 67 | -------------------------------------------------------------------------------- /man/wRC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{wRC} 4 | \alias{wRC} 5 | \title{Batting: Calculate Weighted Runs Created (wRC)} 6 | \usage{ 7 | wRC(BattingTable = NULL, PitchingTable = NULL, FieldingTable = NULL, 8 | Fangraphs = FALSE, NA_to_zero = TRUE, Sep.Leagues = FALSE) 9 | } 10 | \arguments{ 11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 13 | 14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 16 | 17 | \item{FieldingTable}{A full fielding table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 19 | 20 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRC 21 | calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula. 22 | This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRC should be expected. 23 | The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE.} 24 | 25 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 26 | were not a counted statistic until 1954, therefore we are technically unable to calculate wRC for any player prior to 1954. 27 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 28 | If FALSE, the wRC calculation will return NaN for years with missing data.} 29 | 30 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 31 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 32 | park factors between the American and National leagues. The default for this argument is FALSE.} 33 | } 34 | \description{ 35 | Find the wRC for all players with one or more hits for a particular season. 36 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB." 37 | } 38 | \examples{ 39 | 40 | data("Batting2016") 41 | head(Batting2016) 42 | data("Pitching2016") 43 | head(Pitching2016) 44 | data("Fielding2016") 45 | head(Fielding2016) 46 | 47 | Batting2016$wRC <- wRC(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE, 48 | NA_to_zero=TRUE, Sep.Leagues=FALSE) 49 | 50 | } 51 | \seealso{ 52 | Other Batting functions: \code{\link{BABIP}}, 53 | \code{\link{BA}}, \code{\link{BBpct}}, 54 | \code{\link{CTpct}}, \code{\link{HRpct}}, 55 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 56 | \code{\link{OPS}}, \code{\link{PA}}, 57 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 58 | \code{\link{RCtech}}, \code{\link{SLG}}, 59 | \code{\link{TBs}}, \code{\link{XBHpct}}, 60 | \code{\link{XBperH}}, \code{\link{wOBA}}, 61 | \code{\link{wRAA}} 62 | } 63 | \concept{Batting functions} 64 | \keyword{Above} 65 | \keyword{Average} 66 | \keyword{Runs} 67 | \keyword{Weighted} 68 | \keyword{wRC} 69 | -------------------------------------------------------------------------------- /man/wRAA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/battingStats.R 3 | \name{wRAA} 4 | \alias{wRAA} 5 | \title{Batting: Calculate Weighted Runs Above Average (wRAA)} 6 | \usage{ 7 | wRAA(BattingTable = NULL, PitchingTable = NULL, FieldingTable = NULL, 8 | Fangraphs = FALSE, NA_to_zero = TRUE, Sep.Leagues = FALSE) 9 | } 10 | \arguments{ 11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 13 | 14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 16 | 17 | \item{FieldingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.} 19 | 20 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRAA 21 | calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula. 22 | This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRAA should be expected. 23 | The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE.} 24 | 25 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 26 | were not a counted statistic until 1954, therefore we are technically unable to calculate wRAA for any player prior to 1954. 27 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 28 | If FALSE, the wRAA calculation will return NaN for years with missing data.} 29 | 30 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 31 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 32 | park factors between the American and National leagues. The default for this argument is FALSE.} 33 | } 34 | \description{ 35 | Find the wRAA for all players with one or more hits for a particular season. 36 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB." 37 | } 38 | \examples{ 39 | 40 | data("Batting2016") 41 | head(Batting2016) 42 | data("Pitching2016") 43 | head(Pitching2016) 44 | data("Fielding2016") 45 | head(Fielding2016) 46 | 47 | Batting2016$wRAA <- wRAA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE, 48 | NA_to_zero=TRUE, Sep.Leagues=FALSE) 49 | 50 | } 51 | \seealso{ 52 | Other Batting functions: \code{\link{BABIP}}, 53 | \code{\link{BA}}, \code{\link{BBpct}}, 54 | \code{\link{CTpct}}, \code{\link{HRpct}}, 55 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}}, 56 | \code{\link{OPS}}, \code{\link{PA}}, 57 | \code{\link{RC2002}}, \code{\link{RCbasic}}, 58 | \code{\link{RCtech}}, \code{\link{SLG}}, 59 | \code{\link{TBs}}, \code{\link{XBHpct}}, 60 | \code{\link{XBperH}}, \code{\link{wOBA}}, 61 | \code{\link{wRC}} 62 | } 63 | \concept{Batting functions} 64 | \keyword{Above} 65 | \keyword{Average} 66 | \keyword{Runs} 67 | \keyword{Weighted} 68 | \keyword{wRAA} 69 | -------------------------------------------------------------------------------- /R/fip.R: -------------------------------------------------------------------------------- 1 | #' @title Return FIP constants per season 2 | #' @description Get fip constants for each season. By default the function uses a method adapted from 3 | #' Tom Tango and used by Fangraphs. The function returns FIP constants based on ERA \code{FIP_ERA} as well as constants based on RA \code{FIP_RA}. 4 | #' Both the Tango and Frangraphs formulas use ERA for their FIP constants. 5 | #' @param dat A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 6 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 7 | #' @param Sep.Leagues If TRUE, this will split the calculation and return unique FIP constants for the various leagues. This can be 8 | #' helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues. 9 | #' @param Fangraphs If TRUE the function will return the Fangraphs FIP constants. This can not be used in conjunction with the 10 | #' \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league. 11 | #' @keywords woba, wOBA, on base average, fangraphs 12 | #' @importFrom rvest html_node 13 | #' @importFrom xml2 read_html 14 | #' @importFrom stats setNames 15 | #' @import dplyr 16 | #' @export fip_values 17 | #' @examples 18 | #' 19 | #' data("Pitching2016") 20 | #' head(Pitching2016) 21 | #' 22 | #' fip_df <- fip_values(Pitching2016, Fangraphs=FALSE) 23 | #' head(fip_df) 24 | #' 25 | #' 26 | 27 | fip_values <- function(dat=NULL, Sep.Leagues=FALSE, Fangraphs=FALSE){ 28 | # Declare values for Rcheck so it won't throw a note. 29 | yearID=lgID=G=IPouts=H=HR=BB=SO=IBB=HBP=R=SF=W=L=GS=CG=SHO=SV=ER=WP=BK=BFP=GF=SH=GIDP=IP=lgERA=lgRA=NULL 30 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){ 31 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...") 32 | } 33 | 34 | if(isTRUE(Fangraphs)){ 35 | # If user wants to use Fangraphs, grab it from the website. 36 | dat <- xml2::read_html("http://www.fangraphs.com/guts.aspx?type=cn") %>% 37 | rvest::html_node(xpath = '//*[(@id = "GutsBoard1_dg1_ctl00")]') %>% 38 | rvest::html_table() %>% 39 | stats::setNames(c("yearID", "lg_woba", "woba_scale", "wBB", "wHBP", "w1B", "w2B", 40 | "w3B", "wHR", "runSB", "runCS", "lg_r_pa", "lg_r_w", "cFIP")) 41 | } 42 | 43 | if(!isTRUE(Fangraphs)){ 44 | dat <- dat[, !names(dat) %in% c("playerID", "teamID", "stint", "BAOpp", "ERA")] 45 | # Replace NA with 0, otherwise our runsMinus and runsPlus calculations will thow NA. 46 | dat[is.na(dat)] <- 0 47 | 48 | if(isTRUE(Sep.Leagues)){ 49 | dat %<>% dplyr::group_by(yearID, lgID) 50 | } else { 51 | dat %<>% dplyr::group_by(yearID) 52 | } 53 | 54 | dat %<>% 55 | #dplyr::group_by(yearID, lgID) %>% 56 | dplyr::summarise(W=sum(W), L=sum(L), G=sum(G), GS=sum(GS), CG=sum(CG), SHO=sum(SHO), SV=sum(SV), 57 | IPouts=sum(IPouts), H=sum(H), ER=sum(ER), HR=sum(HR), BB=sum(BB), SO=sum(SO), IBB=sum(IBB), 58 | WP=sum(WP), HBP=sum(HBP), BK=sum(BK), BFP=sum(BFP), GF=sum(GF), R=sum(R), SH=sum(SH), 59 | SF=sum(SF), GIDP=sum(GIDP)) %>% 60 | dplyr::mutate(IP=IPouts/3, lgERA=(ER / IP)*9, lgRA=(R / IP)*9, 61 | cFIP=lgERA - ((HR*13) + ((BB + IBB + HBP - IBB)*3) - (SO*2)) / IP, 62 | cRA=lgRA - ((HR*13) + ((BB + IBB + HBP - IBB)*3) - (SO*2)) / IP) 63 | } 64 | return(dat) 65 | } 66 | 67 | -------------------------------------------------------------------------------- /R/get_bbdb.R: -------------------------------------------------------------------------------- 1 | #' @title Get an up to date copy of the Baseball Databank. 2 | #' @description Download the newest version of the Baseball Databank from the Chadwick Bureau GitHub repository. This is the source of 3 | #' Sean Lahman's baseball database and is always under development. This function will read the .csv files and return them as data frames. 4 | #' There is also an option to download the entire directory. 5 | #' @param table The tables you would like to download. Uses Lahman table names Ex. "Batting", "Master", "AllstarFull", etc... 6 | #' If this argument is left as NULL, the function will download all twenty-seven tables. 7 | #' @param downloadZip If true, this will download a zip file of all twenty-seven tables in .csv format to your working directory. 8 | #' @param AllTables If true, this will download all the tables in the database. The default is set to false. 9 | #' @keywords database, data frame 10 | #' @import utils 11 | #' @export get_bbdb 12 | #' @examples 13 | #' 14 | #' get_bbdb(table = "Batting") 15 | #' 16 | #' \dontrun{ 17 | #' get_bbdb(table = c("Batting", "Pitching")) 18 | #'} 19 | #' 20 | #'\dontrun{ 21 | #' get_bbdb(AllTables = TRUE) 22 | #'} 23 | 24 | get_bbdb <- function(table=NULL, downloadZip=FALSE, AllTables=FALSE){ 25 | if (isTRUE(downloadZip)) { 26 | # Try to ping the Chadwick Bureau repository. If that fails to connect, try the backup repo. 27 | if (isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){ 28 | download.file("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip", "master.zip") 29 | } 30 | else { 31 | print(print("Chadwick Bureau failed to connect, trying backup.")) 32 | if (!isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){ 33 | download.file("https://github.com/keberwein/baseballdatabank/archive/master.zip", "master.zip") 34 | } 35 | } 36 | } 37 | if (!is.null(table)) { 38 | if("Master" %in% table) { 39 | warning('The "Master" table is referred to as "People" in the Baseball Data Bank. Downloading the "People table."') 40 | table <- gsub("Master", "People", table) 41 | } 42 | 43 | # Try to ping the Chadwick Bureau repository. If that fails to connect, try the backup repo. 44 | if (isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/tree/master/core"))){ 45 | baseURL <- "https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/" 46 | } 47 | else { 48 | print(print("Chadwick Bureau failed to connect, trying backup.")) 49 | if (!isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){ 50 | baseURL <- "https://raw.githubusercontent.com/keberwein/baseballdatabank/master/core/" 51 | } 52 | } 53 | 54 | urlList <- list() 55 | 56 | for (i in 1:length(table)) { 57 | urlList[[i]] <- paste0(baseURL, table[i], ".csv") 58 | } 59 | 60 | list2env(lapply(setNames(urlList, make.names(gsub("*.csv$", "", table))), read.csv, stringsAsFactors=FALSE), envir = .GlobalEnv) 61 | } 62 | 63 | if (is.null(table) & isTRUE(AllTables)) { 64 | # Try to ping the Chadwick Bureau repository. If that fails to connect, try the backup repo. 65 | if (isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/tree/master/core"))){ 66 | download.file("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip", "master.zip") 67 | } 68 | else { 69 | print(print("Chadwick Bureau failed to connect, trying backup.")) 70 | if (!isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){ 71 | download.file("https://github.com/keberwein/baseballdatabank/archive/master.zip", "master.zip") 72 | } 73 | } 74 | 75 | unzip("master.zip") 76 | baseDIR <- "baseballdatabank-master/core/" 77 | fileList <- list.files(path = baseDIR, pattern = "*.csv") 78 | urlList <- list() 79 | for (i in 1:length(fileList)) { 80 | urlList[[i]] <- paste0(baseDIR, fileList[i]) 81 | } 82 | list2env(lapply(setNames(urlList, make.names(gsub("*.csv$", "", fileList))), read.csv, stringsAsFactors=FALSE), envir = .GlobalEnv) 83 | if (!isTRUE(downloadZip)) { 84 | unlink("master.zip") 85 | } 86 | unlink("baseballdatabank-master", recursive=T) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /vignettes/Database_Tools.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Database Loading and Extracting Baseball Data" 3 | author: "Kris Eberwein" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Database Loading and Extracting Baseball Data} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | The data contained in the Baseball Databank / Lahman Database are small enough to fit directly into memory using R. However, many may find it preferable to store the data in a local database to be accessed at a later time. The following examples use a local PostgreSQL database and the `RPostgreSQL` package. Many of the following methods would also work with other database connections such as the `RMySQL` and `DBI` packages. 13 | 14 | ## Loading Data 15 | 16 | The goal here is to extract the data from source, do the transformations in R, and load them into the database. With the `baseballDBR` and `RPostgreSQL` packages, this is a simple task. The following assumes we are starting with an empty PostgreSQL instance named "lahman." 17 | 18 | ```{r, eval=FALSE} 19 | library(baseballDBR) 20 | library(RPostgreSQL) 21 | library(dplyr) 22 | 23 | get_bbdb(AllTables = TRUE) 24 | 25 | # Make a list of all data frames. 26 | dbTables <- names(Filter(isTRUE, eapply(.GlobalEnv, is.data.frame))) 27 | 28 | # Load data base drivers and load all data frames in a loop. 29 | drv <- dbDriver("PostgreSQL") 30 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD") 31 | 32 | # Do some transformations on our selected dataframes. 33 | Batting <- mutate(Batting, BA = BA(Batting), PA = PA(Batting), OBP = OBP(Batting), OPS = OPS(Batting)) 34 | 35 | # Create a loop to write all of our data frames our Postgres instance. 36 | for (i in 1:length(dbTables)) { 37 | if (dbExistsTable(con, dbTables[i])) { 38 | dbRemoveTable(con, dbTables[i]) 39 | } 40 | dbWriteTable(con, name = dbTables[i], value = get0(dbTables[i]), row.names = FALSE) 41 | } 42 | 43 | # Make sure every thing wrote correctly. 44 | test <- dbGetQuery(con, "SELECT * FROM Batting LIMIT 5") 45 | test 46 | rm(test) 47 | 48 | # Close the database connection. 49 | dbDisconnect(con) 50 | dbUnloadDriver(drv) 51 | ``` 52 | 53 | ## Accessing Data 54 | 55 | Once our data are loaded into a local database instance, working with the data becomes less cumbersome because we have the choice to only load into R the data that we need, or the choice to leverage `dplyr` or `SQL` backends to work with the data without loading into R at all. 56 | 57 | ### Using dplyr Backends 58 | 59 | The `dplyr` package allows us to work with remote or on-disk data stored in databases using a familiar syntax and `dplyr` verbs. For more on database backends, see the dplyr [Databases vignette](https://cran.r-project.org/package=dplyr). A current limitation to this method is, functions from the `baseballDBR` package will not work, and the method is limited to `dplyr` verbs. 60 | 61 | ```{r, eval=FALSE} 62 | library(dplyr) 63 | library(baseballDBR) 64 | 65 | # Use an "src" connection to access the database. This requires the same arguments that RPostreSQL uses to connect to the database. 66 | # Note: There are also src functions for connecting to various types of databases. 67 | pgcon <- src_postgres(host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD") 68 | 69 | # We're not laoding the batting table, rather we're loading a method to access it. 70 | Batting_tbl <- tbl(pgcon, "Batting") 71 | 72 | # Use familiar dplyr verbs to access our data. 73 | Batting_tbl %>% select(playerID, yearID, H, AB) %>% filter(yearID >= 1900) 74 | 75 | # We can pull our data into R to permorm some modifications. 76 | Batting <- collect(Batting_tbl, n = Inf) %>% mutate(BABIP = BABIP(Batting)) 77 | 78 | # Write the modified table back to the database. 79 | db_insert_into( con = pgcon$con, table = "Batting", values = Batting) 80 | 81 | ``` 82 | 83 | ### Using SQL Backends 84 | 85 | For those familiar with SQL, the same process can be archived in our relational database's native language. 86 | 87 | ```{r, eval=FALSE} 88 | library(RPostgreSQL) 89 | library(baseballDBR) 90 | 91 | # Load data base drivers and load all data frames in a loop. 92 | drv <- dbDriver("PostgreSQL") 93 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD") 94 | 95 | # Load the Batting table into R. 96 | Batting <- dbGetQuery(con, "SELECT * FROM Batting") 97 | 98 | # Permorm some modifications. 99 | Batting <- mutate(BABIP = BABIP(Batting)) 100 | 101 | # Write modified table back to the database. 102 | dbWriteTable("Batting", Batting) 103 | 104 | # Close the database connection. 105 | dbDisconnect(con) 106 | dbUnloadDriver(drv) 107 | 108 | ``` 109 | 110 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "README-", 12 | warning = FALSE, 13 | message=FALSE 14 | ) 15 | library(baseballDBR) 16 | ``` 17 | 18 | # BaseballDBR 19 | 20 | [![Build Status](https://travis-ci.org/keberwein/baseballDBR.png?branch=master)](https://travis-ci.org/keberwein/baseballDBR) 21 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/baseballDBR)](http://www.r-pkg.org/badges/version/baseballDBR) 22 | [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 23 | 24 | # Install 25 | 26 | * Install from CRAN 27 | 28 | ```{r eval=FALSE} 29 | install.packages("baseballDBR") 30 | ``` 31 | 32 | * Or, install the latest development version from GitHub: 33 | 34 | ```{r eval=FALSE} 35 | devtools::install_github("keberwein/baseballDBR") 36 | ``` 37 | 38 | # Gathering Data 39 | 40 | The `baseballDBR` package requires data that is formatted similar to the [Baseball Databank](https://github.com/chadwickbureau/baseballdatabank) or Sean Lahman's [Baseball Database](http://www.seanlahman.com/baseball-archive/statistics/). The package also contains the `get_bbdb()` function, which allows us to download the most up-to-date tables directly from the Chadwick Bureau's GitHub repository. For example, we can easily load the "Batting" table into our R environment. 41 | 42 | ```{r} 43 | library(baseballDBR) 44 | 45 | get_bbdb(table = "Batting") 46 | head(Batting) 47 | ``` 48 | 49 | ### Use with the Lahman Package 50 | 51 | ```{r} 52 | library(Lahman) 53 | library(baseballDBR) 54 | 55 | Batting <- Lahman::Batting 56 | head(Batting) 57 | ``` 58 | 59 | 60 | # Adding Basic Metrics 61 | 62 | Simple batting metrics can be easily added to any batting data frame. For example, we can add slugging percentage, on-base percentage and on-base plus slugging. Note that OPS and OBP appears as "NA" for the years before IBB was tracked. 63 | 64 | ```{r} 65 | library(baseballDBR) 66 | 67 | Batting$SLG <- SLG(Batting) 68 | 69 | Batting$OBP <- OBP(Batting) 70 | 71 | head(Batting, 3) 72 | ``` 73 | 74 | # Advanced Metrics 75 | 76 | The package includes a suite of advanced metrics such as wOBA, RAA, and FIP, among others. Many of the advanced metrics require multiple tables. For example, the wOBA metric requires the Batting, Pitching, and Fielding tables in order to establish a player's regular defensive position. 77 | 78 | ```{r} 79 | library(baseballDBR) 80 | 81 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 82 | 83 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = T) 84 | head(Batting, 3) 85 | ``` 86 | 87 | The code above uses [Fangraphs](http://www.fangraphs.com/guts.aspx?type=cn) wOBA values. The default behavior is to uses Tom Tango's adapted [SQL formula](http://www.insidethebook.com/ee/index.php/site/article/woba_year_by_year_calculations/). Other options include `Sep.Leagues`, which may act as a buffer to any bias created by the designated hitter. 88 | 89 | ```{r} 90 | library(baseballDBR) 91 | 92 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 93 | 94 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = F, Sep.Leagues = T) 95 | head(Batting, 3) 96 | ``` 97 | 98 | We can also produce a data frame that only shows the wOBA multipliers. Notice the Fangraphs wOBA multipliers slightly differ from the Tango multipliers. 99 | 100 | ```{r} 101 | library(baseballDBR) 102 | 103 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 104 | 105 | fangraphs_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=T) 106 | head(fangraphs_woba, 3) 107 | 108 | tango_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=F) 109 | head(tango_woba, 3) 110 | 111 | 112 | ``` 113 | 114 | # Create Local Database 115 | 116 | A relational database is not needed to work with these data. However, we may want to store the data to be called more quickly at a later time. We can download all of the tables at once with the `get_bbdb()` function and then write them to an empty schema in our favorite database. The example uses a newly created PostgreSQL instance, but other database tools can be used assuming an appropriate R package exists. 117 | 118 | ```{r, eval=F} 119 | library(baseballDBR) 120 | library(RPostgreSQL) 121 | 122 | # Load all tables into the Global Environment. 123 | get_bbdb(AllTables = TRUE) 124 | 125 | # Make a list of all data frames. 126 | dbTables <- names(Filter(isTRUE, eapply(.GlobalEnv, is.data.frame))) 127 | 128 | # Load data base drivers and load all data frames in a loop. 129 | drv <- dbDriver("PostgreSQL") 130 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD") 131 | 132 | for (i in 1:length(dbTables)) { 133 | dbWriteTable(con, name = dbTables[i], value = get0(dbTables[i]), overwrite = TRUE) 134 | } 135 | 136 | # Disconnect from database. 137 | dbDisconnect(con) 138 | rm(con, drv) 139 | ``` 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /vignettes/wOBA.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Weighted On-base Average (wOBA)" 3 | author: "Kris Eberwein" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Weighted On-base Average (wOBA)} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | The `baseballDBR` package provides several variations of the wOBA calculation. There are two primary functions that provide the data and calculations. The `wOBA()` function provides the final calculation, while the `WOBA_values()` function provides the season average data that drive the higher level calculation. 13 | 14 | ## Quick Start 15 | 16 | ```{r, eval=FALSE} 17 | library(baseballDBR) 18 | # Load data from Baseball Databank 19 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 20 | 21 | Batting <- wOBA(Batting, Pitching, Fielding, Fangraphs = T) 22 | head(Batting, 3) 23 | ``` 24 | 25 | 26 | ## Understanding wOBA 27 | 28 | Weighted on-base average was a statistic first used by sabermatrican Tom Tango and published in [The Book](http://www.insidethebook.com/). The wOBA metric has been show to strongly correlate to the number of runs scored. The basic formula is: 29 | 30 | $$\frac{wBB*BB + wHBP*HBP + wX1B*X1B + wX2B*X2B + wX3B*X3B + wHR*HR}{(AB+BB-IBB+SF+SH+HBP)=PA}$$ 31 | 32 | The basic formula is simple enough, but first we must find the `w` values, or weighted values. Calculating the weighted values is not as straight forward and is done by applying a system of linear weights to yearly league averages in order to create a "run scoring environment" for the year. The `baseballDBR` package uses [Tom Tango's formula](http://www.insidethebook.com/ee/index.php/site/article/woba_year_by_year_calculations/) to calculate weighted values. Tango's `SQL` has been ported to `R` for our use. The wOBA functions also offer a "Fangraphs" argument, which uses the weights provided by [Fangraphs](http://www.fangraphs.com/guts.aspx?type=cn). The Fangraphs algorithm and Tango algorithm produce similar woba values, but can be slightly different. 33 | 34 | ### Fangraphs wOBA vs Tango wOBA 35 | 36 | As we discussed above, the modifiers that Fangraphs produces are slightly different than the modifiers that the Tango algorithm produces, therefore the two produce slightly different wOBA values. The wOBA values are normally within one one-thousandth of one percent. 37 | 38 | **Why are they different?** 39 | 40 | The data from the Baseball Databank does not specify a player's position. Therefore, "fuzzy logic" is used to determine a player's primary position. This may cause instances where a player's statistics are weighted according to a position other than their primary position. 41 | 42 | ```{r, eval=FALSE} 43 | library(baseballDBR) 44 | library(dplyr) 45 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 46 | 47 | Batting$f_wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = T) 48 | 49 | Batting$t_wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = F) 50 | 51 | # Going to subset for players who had more than 100 at-bats and played in at least eighty games. 52 | # This shoul eliminate most of the pitchers and minor league call-ups. 53 | Batting_2016 <- subset(Batting, yearID >= 2016 & AB >= 100 & G >= 80) %>% 54 | arrange(desc(t_wOBA)) 55 | 56 | head(Batting_2016) 57 | 58 | ``` 59 | 60 | ### Arguments 61 | 62 | The `wOBA()` and `wOBA_values()` functions require three data frames: 63 | 64 | * Fangraphs: Should the function use Fangraphs wOBA values or the package's native Tango method? 65 | 66 | * NA_to_zero: Should the function apply `0` to statistics that may not have been counted? For example, Babe Ruth's sacrifice fly `SF` metric is NA because that statistic wasn't tracked when he played, so his `wOBA` should be NA. Note, that it is a statistically unsound practice to set NAs to zero. However, the authors of this package recognize the desire to compare past players to current players. 67 | 68 | * Sep.Leagues: Should the function determine separate wOBA values for the National and American leagues. Standard practice would be to use wOBA values that combine both leagues. Note, this function is not possible if `Fangraphs=TRUE`. 69 | 70 | Even though wOBA is a batting metric, the Pitching and Fielding tables are used to determine a player's primary position. The tables should be full tables of entire years, and not a subset, because the wOBA calculation depends on yearly league average values. 71 | 72 | ### The wOBA_values Function 73 | 74 | The higher-level `wOBA()` function relies on `wOBA_values()`. It is not necessary to call the `wOBA_values()` function to use the `wOBA()` function, but it this function has been exported to the package to give users the opportunity for deeper analysis. Arguments include: 75 | 76 | * Sep.Leagues - If `TRUE`, this will calculate separate wOBA vales for the American and National leagues. The default setting is `FALSE` because league separation is not typically performed in wOBA calculations. The advantage to separating the leagues is, the resulting wOBA values will naturally account for the DH and batting pitchers. 77 | 78 | * Fangraphs - If `TRUE` the function will use wOBA values provided by Fangraphs. The default is to use a ported version of Tom Tango's algorithm as applied to the Baseball Databank. The two algorithms produce similar, but slightly different results. The advantage to using the Tango algorithm is, it can be used in conjunction with `Sep.Leagues=TRUE`, whereas the Fangraphs data only provide for the combined leagues. 79 | 80 | ```{r, eval=FALSE} 81 | library(baseballDBR) 82 | # Load data from Baseball Databank 83 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 84 | 85 | # Run wOBA values for seperate leagues 86 | w_vals <- wOBA_values(BattingTable = Batting, FieldingTable = Fielding, PitchingTable = Pitching, Sep.Leagues = TRUE) 87 | 88 | ``` 89 | 90 | If we look at the data, we notice that the years 1871 to 1875 produce several NAs. This is due to incomplete or untracked data during that time period. We also notice there was only one league in existence during those years. Otherwise, the data are complete. The "league wOBA" for the two leagues is often close, but varies depending on the quality of play across various years. 91 | 92 | ```{r, eval=FALSE} 93 | head(w_vals) 94 | ``` 95 | 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BaseballDBR 2 | ======================================================================= 3 | 4 | [![Build Status](https://travis-ci.org/keberwein/baseballDBR.png?branch=master)](https://travis-ci.org/keberwein/baseballDBR) [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/baseballDBR)](http://www.r-pkg.org/badges/version/baseballDBR) [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 5 | 6 | Install 7 | ======= 8 | 9 | - Install from CRAN 10 | 11 | ``` r 12 | install.packages("baseballDBR") 13 | ``` 14 | 15 | - Or, install the latest development version from GitHub: 16 | 17 | ``` r 18 | devtools::install_github("keberwein/baseballDBR") 19 | ``` 20 | 21 | Gathering Data 22 | ============== 23 | 24 | The `baseballDBR` package requires data that is formatted similar to the [Baseball Databank](https://github.com/chadwickbureau/baseballdatabank) or Sean Lahman's [Baseball Database](http://www.seanlahman.com/baseball-archive/statistics/). The package also contains the `get_bbdb()` function, which allows us to download the most up-to-date tables directly from the Chadwick Bureau's GitHub repository. For example, we can easily load the "Batting" table into our R environment. 25 | 26 | ``` r 27 | library(baseballDBR) 28 | 29 | get_bbdb(table = "Batting") 30 | head(Batting) 31 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB 32 | #> 1 abercda01 1871 1 TRO 1 4 0 0 0 0 0 0 0 0 0 33 | #> 2 addybo01 1871 1 RC1 25 118 30 32 6 0 0 13 8 1 4 34 | #> 3 allisar01 1871 1 CL1 29 137 28 40 4 5 0 19 3 1 2 35 | #> 4 allisdo01 1871 1 WS3 27 133 28 44 10 2 2 27 1 1 0 36 | #> 5 ansonca01 1871 1 RC1 25 120 29 39 11 3 0 16 6 2 2 37 | #> 6 armstbo01 1871 1 FW1 12 49 9 11 2 1 0 5 0 1 0 38 | #> SO IBB HBP SH SF GIDP 39 | #> 1 0 NA NA NA NA NA 40 | #> 2 0 NA NA NA NA NA 41 | #> 3 5 NA NA NA NA NA 42 | #> 4 2 NA NA NA NA NA 43 | #> 5 1 NA NA NA NA NA 44 | #> 6 1 NA NA NA NA NA 45 | ``` 46 | 47 | ### Use with the Lahman Package 48 | 49 | ``` r 50 | library(Lahman) 51 | library(baseballDBR) 52 | 53 | Batting <- Lahman::Batting 54 | head(Batting) 55 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB 56 | #> 1 abercda01 1871 1 TRO NA 1 4 0 0 0 0 0 0 0 0 0 57 | #> 2 addybo01 1871 1 RC1 NA 25 118 30 32 6 0 0 13 8 1 4 58 | #> 3 allisar01 1871 1 CL1 NA 29 137 28 40 4 5 0 19 3 1 2 59 | #> 4 allisdo01 1871 1 WS3 NA 27 133 28 44 10 2 2 27 1 1 0 60 | #> 5 ansonca01 1871 1 RC1 NA 25 120 29 39 11 3 0 16 6 2 2 61 | #> 6 armstbo01 1871 1 FW1 NA 12 49 9 11 2 1 0 5 0 1 0 62 | #> SO IBB HBP SH SF GIDP 63 | #> 1 0 NA NA NA NA NA 64 | #> 2 0 NA NA NA NA NA 65 | #> 3 5 NA NA NA NA NA 66 | #> 4 2 NA NA NA NA NA 67 | #> 5 1 NA NA NA NA NA 68 | #> 6 1 NA NA NA NA NA 69 | ``` 70 | 71 | Adding Basic Metrics 72 | ==================== 73 | 74 | Simple batting metrics can be easily added to any batting data frame. For example, we can add slugging percentage, on-base percentage and on-base plus slugging. Note that OPS and OBP appears as "NA" for the years before IBB was tracked. 75 | 76 | ``` r 77 | library(baseballDBR) 78 | 79 | Batting$SLG <- SLG(Batting) 80 | 81 | Batting$OBP <- OBP(Batting) 82 | 83 | head(Batting, 3) 84 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB 85 | #> 1 abercda01 1871 1 TRO NA 1 4 0 0 0 0 0 0 0 0 0 86 | #> 2 addybo01 1871 1 RC1 NA 25 118 30 32 6 0 0 13 8 1 4 87 | #> 3 allisar01 1871 1 CL1 NA 29 137 28 40 4 5 0 19 3 1 2 88 | #> SO IBB HBP SH SF GIDP SLG OBP 89 | #> 1 0 NA NA NA NA NA 0.000 NA 90 | #> 2 0 NA NA NA NA NA 0.322 NA 91 | #> 3 5 NA NA NA NA NA 0.394 NA 92 | ``` 93 | 94 | Advanced Metrics 95 | ================ 96 | 97 | The package includes a suite of advanced metrics such as wOBA, RAA, and FIP, among others. Many of the advanced metrics require multiple tables. For example, the wOBA metric requires the Batting, Pitching, and Fielding tables in order to establish a player's regular defensive position. 98 | 99 | ``` r 100 | library(baseballDBR) 101 | 102 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 103 | 104 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = T) 105 | head(Batting, 3) 106 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB 107 | #> 1 abercda01 1871 1 TRO 1 4 0 0 0 0 0 0 0 0 0 108 | #> 2 addybo01 1871 1 RC1 25 118 30 32 6 0 0 13 8 1 4 109 | #> 3 allisar01 1871 1 CL1 29 137 28 40 4 5 0 19 3 1 2 110 | #> SO IBB HBP SH SF GIDP wOBA 111 | #> 1 0 NA NA NA NA NA 0.0000000 112 | #> 2 0 NA NA NA NA NA 0.2855902 113 | #> 3 5 NA NA NA NA NA 0.3078849 114 | ``` 115 | 116 | The code above uses [Fangraphs](http://www.fangraphs.com/guts.aspx?type=cn) wOBA values. The default behavior is to uses Tom Tango's adapted [SQL formula](http://www.insidethebook.com/ee/index.php/site/article/woba_year_by_year_calculations/). Other options include `Sep.Leagues`, which may act as a buffer to any bias created by the designated hitter. 117 | 118 | ``` r 119 | library(baseballDBR) 120 | 121 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 122 | 123 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = F, Sep.Leagues = T) 124 | head(Batting, 3) 125 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB 126 | #> 1 abercda01 1871 1 TRO 1 4 0 0 0 0 0 0 0 0 0 127 | #> 2 addybo01 1871 1 RC1 25 118 30 32 6 0 0 13 8 1 4 128 | #> 3 allisar01 1871 1 CL1 29 137 28 40 4 5 0 19 3 1 2 129 | #> SO IBB HBP SH SF GIDP wOBA 130 | #> 1 0 NA NA NA NA NA NA 131 | #> 2 0 NA NA NA NA NA NA 132 | #> 3 5 NA NA NA NA NA NA 133 | ``` 134 | 135 | We can also produce a data frame that only shows the wOBA multipliers. Notice the Fangraphs wOBA multipliers slightly differ from the Tango multipliers. 136 | 137 | ``` r 138 | library(baseballDBR) 139 | 140 | get_bbdb(table = c("Batting", "Pitching", "Fielding")) 141 | 142 | fangraphs_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=T) 143 | head(fangraphs_woba, 3) 144 | #> yearID lg_woba woba_scale wBB wHBP w1B w2B w3B wHR runSB 145 | #> 1 2017 0.320 1.192 0.693 0.723 0.878 1.236 1.558 1.989 0.2 146 | #> 2 2016 0.318 1.212 0.691 0.721 0.878 1.242 1.569 2.015 0.2 147 | #> 3 2015 0.313 1.251 0.687 0.718 0.881 1.256 1.594 2.065 0.2 148 | #> runCS lg_r_pa lg_r_w cFIP 149 | #> 1 -0.421 0.121 10.007 3.126 150 | #> 2 -0.410 0.118 9.778 3.147 151 | #> 3 -0.392 0.113 9.421 3.134 152 | 153 | tango_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=F) 154 | head(tango_woba, 3) 155 | #> # A tibble: 3 x 35 156 | #> # Groups: yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, 157 | #> # runSB, runCS [3] 158 | #> yearID AB R H X2B X3B HR SB CS BB SO IBB 159 | #> 160 | #> 1 1871 23179 5659 6616 950 495 101 948 270 817 371 0 161 | #> 2 1872 34755 7487 10003 1212 293 88 536 264 477 532 0 162 | #> 3 1873 40346 8487 11832 1308 472 102 395 253 747 552 0 163 | #> # ... with 23 more variables: HBP , SF , RperOut , 164 | #> # runBB , runHBP , run1B , run2B , run3B , 165 | #> # runHR , runSB , runCS , runMinus , runPlus , 166 | #> # lg_woba , woba_scale , wBB , wHBP , w1B , 167 | #> # w2B , w3B , wHR , wSB , wCS 168 | ``` 169 | 170 | Create Local Database 171 | ===================== 172 | 173 | A relational database is not needed to work with these data. However, we may want to store the data to be called more quickly at a later time. We can download all of the tables at once with the `get_bbdb()` function and then write them to an empty schema in our favorite database. The example uses a newly created PostgreSQL instance, but other database tools can be used assuming an appropriate R package exists. 174 | 175 | ``` r 176 | library(baseballDBR) 177 | library(RPostgreSQL) 178 | 179 | # Load all tables into the Global Environment. 180 | get_bbdb(AllTables = TRUE) 181 | 182 | # Make a list of all data frames. 183 | dbTables <- names(Filter(isTRUE, eapply(.GlobalEnv, is.data.frame))) 184 | 185 | # Load data base drivers and load all data frames in a loop. 186 | drv <- dbDriver("PostgreSQL") 187 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD") 188 | 189 | for (i in 1:length(dbTables)) { 190 | dbWriteTable(con, name = dbTables[i], value = get0(dbTables[i]), overwrite = TRUE) 191 | } 192 | 193 | # Disconnect from database. 194 | dbDisconnect(con) 195 | rm(con, drv) 196 | ``` 197 | -------------------------------------------------------------------------------- /R/pitchingStats.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Pitching: Calculate walks per nine innings 3 | #' @description Find batting average walks per nine innings for pitchers with more one or more inning pitched. 4 | #' Required fields from the Pitching table are; "IPouts", and "BB." 5 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 6 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 7 | #' @keywords BB BB_9 BB9 bb/9 8 | #' @family Pitching functions 9 | #' @export BB_9 10 | #' @examples 11 | #' 12 | #' data("Pitching2016") 13 | #' head(Pitching2016) 14 | #' 15 | #' Pitching2016$BB_9 <- BB_9(Pitching2016) 16 | #' 17 | BB_9 <- function (dat=NULL){ 18 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 19 | 20 | if (!all(c("BB", "IPouts") %in% names(dat))){ 21 | message("Not enough data to calculate. Please make sure your data inclueds 'BB', and 'IPouts'") 22 | } 23 | 24 | ifelse(dat$IPouts > 2, 25 | BB_9 <- round((dat$BB*9 / (dat$IPouts / 3)), 3), NA) 26 | return(BB_9) 27 | } 28 | 29 | #' @title Pitching: Fielding Independent Pitching (FIP) 30 | #' @description Find the FIP for all pitchers with one or strike outs in a particular season. 31 | #' Required fields from the Pitching table are "BB", "HBP", "SO", and "IPouts." 32 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 33 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 34 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 35 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 36 | #' park factors between the American and National leagues. The default for this argument is FALSE. 37 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 38 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954. 39 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 40 | #' If FALSE, the wOBA calculation will return NaN for years with missing data. 41 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal 42 | #' formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and 43 | #' does not require an external download. If not specified, the default is set to FALSE. 44 | #' @keywords FIP fielding independent pitching 45 | #' @family Pitching functions 46 | #' @export FIP 47 | #' @examples 48 | #' 49 | #' data("Pitching2016") 50 | #' head(Pitching2016) 51 | #' 52 | #' Pitching2016$FIP <- FIP(Pitching2016, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE) 53 | #' 54 | FIP <- function (dat=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){ 55 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 56 | 57 | if (!all(c("BB", "HBP", "SO", "IPouts") %in% names(dat))){ 58 | message("Not enough data to calculate. Please make sure your data inclueds 'BB', 'HBP', 'K', and 'IPouts'") 59 | } 60 | 61 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){ 62 | print("The Fangraphs Guts table does not sperate FIP by league. Applying the default calculation...") 63 | Fangraphs=FALSE 64 | } 65 | 66 | fip <- fip_values(dat=dat, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues) 67 | 68 | if (isTRUE(NA_to_zero)){ 69 | dat <- dplyr::mutate(dat, HBP=ifelse(is.na(HBP),0,HBP)) 70 | } 71 | 72 | if(isTRUE(Sep.Leagues)){ 73 | fip <- fip[, c("yearID", "lgID", "cFIP")] 74 | dat <- dplyr::left_join(dat, fip, by=c("yearID", "lgID")) 75 | } else { 76 | fip <- fip[, c("yearID", "cFIP")] 77 | dat <- dplyr::left_join(dat, fip, by="yearID") 78 | } 79 | 80 | ifelse(dat$SO > 0, 81 | fip <- (((dat$HR*13) + ((dat$BB + dat$IBB + dat$HBP - dat$IBB)*3) - (dat$SO*2)) / (dat$IPouts/3) + dat$cFIP), NA) 82 | 83 | return(fip) 84 | } 85 | 86 | 87 | #' @title Pitching: Calculate Hits per Nine innings 88 | #' @description Find the number of hits a pitcher throws per nine innings pitched. 89 | #' Required fields from the Pitching table are; "H", "BB", and "IPouts." 90 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 91 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 92 | #' @keywords hits per nine innings 93 | #' @family Pitching functions 94 | #' @export H_9 95 | #' @examples 96 | #' 97 | #' data("Pitching2016") 98 | #' head(Pitching2016) 99 | #' 100 | #' Pitching2016$H_9 <- H_9(Pitching2016) 101 | #' 102 | H_9 <- function (dat=NULL){ 103 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 104 | 105 | if (!all(c("H", "BB", "IPouts") %in% names(dat))){ 106 | message("Not enough data to calculate. Please make sure your data inclueds 'H', and 'IPouts'") 107 | } 108 | 109 | ifelse(dat$IPouts > 2, 110 | H_9 <- round((dat$H*9) / (dat$IPouts/3), 3), NA) 111 | 112 | return(H_9) 113 | } 114 | 115 | #' @title Pitching: Calculate Home Runs per Nine innings 116 | #' @description Find the number of home runs a pitcher allows per nine innings pitched. 117 | #' Required fields from the Pitching table are; "H" and "IPouts." 118 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 119 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 120 | #' @keywords hits per nine innings 121 | #' @family Pitching functions 122 | #' @export HR_9 123 | #' @examples 124 | #' 125 | #' data("Pitching2016") 126 | #' head(Pitching2016) 127 | #' 128 | #' Pitching2016$HR_9 <- HR_9(Pitching2016) 129 | #' 130 | HR_9 <- function (dat=NULL){ 131 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 132 | 133 | if (!all(c("HR", "IPouts") %in% names(dat))){ 134 | message("Not enough data to calculate. Please make sure your data inclueds 'HR', and 'IPouts'") 135 | } 136 | 137 | ifelse(dat$IPouts > 2, HR_9 <- round((dat$HR*9) / (dat$IPouts/3), 3), NA) 138 | return(HR_9) 139 | } 140 | 141 | #' @title Pitching: Calculate the innings pitched 142 | #' @description Find the number of innings a player has pitched for a season. 143 | #' Required fields from the Pitching table are; "IPouts." 144 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 145 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 146 | #' @keywords innings pitched 147 | #' @family Pitching functions 148 | #' @export IP 149 | #' @examples 150 | #' 151 | #' data("Pitching2016") 152 | #' head(Pitching2016) 153 | #' 154 | #' Pitching2016$IP <- IP(Pitching2016) 155 | #' 156 | IP <- function (dat=NULL){ 157 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 158 | 159 | if (!all(c("IPouts") %in% names(dat))){ 160 | message("Not enough data to calculate. Please make sure your data inclueds 'IPouts'") 161 | } 162 | 163 | ifelse(dat$IPouts > 2, IP <- round(dat$IPouts/3, 3), NA) 164 | return(IP) 165 | } 166 | 167 | #' @title Pitching: Calculate Strikes per Nine innings 168 | #' @description Find the number of strikes a pitcher throws per nine innings pitched. 169 | #' Required fields from the Pitching table are; "H", "BB", "IPouts", and "SO." 170 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 171 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 172 | #' @keywords strikes per nine innings 173 | #' @family Pitching functions 174 | #' @export K_9 175 | #' @examples 176 | #' 177 | #' data("Pitching2016") 178 | #' head(Pitching2016) 179 | #' 180 | #' Pitching2016$K_9 <- K_9(Pitching2016) 181 | #' 182 | K_9 <- function (dat=NULL){ 183 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 184 | 185 | if (!all(c("H", "BB", "IPouts", "SO") %in% names(dat))){ 186 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'BB', 'SO', and 'IPouts'") 187 | } 188 | 189 | ifelse(dat$IPouts > 2, K_9 <- round((dat$SO*9) / (dat$IPouts/3), 3), NA) 190 | return(K_9) 191 | } 192 | 193 | #' @title Pitching: Calculate the left on base percentage 194 | #' @description Find the percentage of base runners that a pitcher leaves on base of the course of a season. 195 | #' Required fields from the Pitching table are; "H", "BB", "HBP", "R", and "HR." 196 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 197 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 198 | #' @keywords LOB_pct LOB LOB percentage 199 | #' @family Pitching functions 200 | #' @export LOB_pct 201 | #' @examples 202 | #' 203 | #' data("Pitching2016") 204 | #' head(Pitching2016) 205 | #' 206 | #' Pitching2016$LOB_pct <- LOB_pct(Pitching2016) 207 | #' 208 | LOB_pct <- function (dat=NULL){ 209 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 210 | 211 | if (!all(c("H", "BB", "HBP", "R", "HR") %in% names(dat))){ 212 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'BB', 'IBB', 'HBP', 'SF', and 'SH'") 213 | } 214 | 215 | ifelse(dat$IPouts > 2, LOB_pct <- round((dat$H+dat$BB+dat$HBP-dat$R) / (dat$H+dat$BB+dat$HBP-(1.4*dat$HR)), 3), NA) 216 | 217 | return(LOB_pct) 218 | } 219 | 220 | #' @title Pitching: Calculate Walks plus Hits per Innings Pitched 221 | #' @description Find the number of walks plus hits a pitcher allows per inning pitched. 222 | #' Required fields from the Pitching table are; "H", "BB", and "IPouts." 223 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 224 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 225 | #' @keywords Walks plus Hits per Innings Pitched WHIP 226 | #' @family Pitching functions 227 | #' @export WHIP 228 | #' @examples 229 | #' 230 | #' data("Pitching2016") 231 | #' head(Pitching2016) 232 | #' 233 | #' Pitching2016$WHIP <- WHIP(Pitching2016) 234 | #' 235 | WHIP <- function (dat=NULL){ 236 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 237 | 238 | if (!all(c("H", "BB", "IPouts") %in% names(dat))){ 239 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'BB', and 'IPouts'") 240 | } 241 | 242 | ifelse(dat$IPouts > 2, WHIP <- round((dat$BB+dat$H) / (dat$IPouts/3), 3), NA) 243 | 244 | return(WHIP) 245 | } 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /R/woba_values.R: -------------------------------------------------------------------------------- 1 | #' @title Return wOBA values per season 2 | #' @description Get wOBA values for each year in your database. This calculation requires all fields of 3 | #' the Pitching, Fielding and Batting tables from the Lahman package, or a comparable data set. The function uses 4 | #' a version of Tom Tango's wOBA formula by default, but can also return Fangraphs wOBA values. 5 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 6 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 7 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 8 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 9 | #' @param FieldingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 10 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 11 | #' @param Sep.Leagues If TRUE, this will split the calculation and return unique wOBA values for the various leagues. This can be 12 | #' helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues. 13 | #' @param Fangraphs if TRUE the function will return the Fangraphs wOBA values. By default the function uses a method adapted from 14 | #' Tom Tango. These values are often very close to Fangraphs, but are not the same due to Fangraphs using a different algorithm. 15 | #' This can not be used in conjunction with the \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league. 16 | #' @keywords woba, wOBA, on base average, fangraphs 17 | #' @importFrom rvest html_node html_table 18 | #' @importFrom xml2 read_html 19 | #' @importFrom stats setNames 20 | #' @import dplyr 21 | #' @export wOBA_values 22 | #' @examples 23 | #' 24 | #' data("Batting2016") 25 | #' head(Batting2016) 26 | #' data("Pitching2016") 27 | #' head(Pitching2016) 28 | #' data("Fielding2016") 29 | #' head(Fielding2016) 30 | #' 31 | #' woba_df <- wOBA_values(Batting2016, Pitching2016, Fielding2016, Sep.Leagues=FALSE, Fangraphs=FALSE) 32 | #' 33 | 34 | wOBA_values <- function(BattingTable, PitchingTable, FieldingTable, Sep.Leagues=FALSE, Fangraphs=FALSE){ 35 | # Declare values for Rcheck so it won't throw a note. 36 | POS=yearID=postf=playerID=teamID=lgID=G=IPouts=R=RperOut=runBB=run1B=run2B=runHBP=run3B= 37 | runHR=runSB=runCS=AB=H=X2B=X3B=HR=SB=CS=BB=SO=IBB=HBP=SF=runPlus=runMinus=wOBAscale=NULL 38 | # Make sure users don't contradict themselves. 39 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){ 40 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...") 41 | Fangraphs=FALSE 42 | } 43 | 44 | if(isTRUE(Fangraphs)){ 45 | # If user wants to use Fangraphs, grab it from the website. 46 | runsBatting <- xml2::read_html("https://www.fangraphs.com/guts.aspx?type=cn") %>% 47 | html_node(xpath = '//*[(@id = "GutsBoard1_dg1_ctl00")]') %>% 48 | html_table() %>% 49 | setNames(c("yearID", "lg_woba", "woba_scale", "wBB", "wHBP", "w1B", "w2B", 50 | "w3B", "wHR", "runSB", "runCS", "lg_r_pa", "lg_r_w", "cFIP")) 51 | } 52 | 53 | if(!isTRUE(Fangraphs)){ 54 | # Find primary positions 55 | fielding <- FieldingTable 56 | # The "postf" field below is to filter out Natl. League players who may have 57 | # played as DH in inter-leauge games, and may have multiple entries at diff. positions. 58 | PrimPos <- dplyr::mutate(fielding, postf=ifelse(POS=="OF" & yearID>1995, 1,0)) %>% 59 | subset(postf==0, 60 | select=c("playerID", "yearID", "teamID", "lgID","G", "POS")) %>% 61 | group_by(playerID, yearID, teamID, lgID, POS) %>% 62 | summarise(G = sum(G)) 63 | 64 | # Find a run environment for each season, including pitchers. 65 | pitching <- PitchingTable %>% subset(select=c("yearID", "playerID", "lgID","R", "IPouts")) 66 | 67 | pitchersPOS <- subset(PrimPos, POS=="P") 68 | 69 | pitchingLRPO <- inner_join(pitchersPOS, pitching, by=c("yearID", "playerID", "lgID")) 70 | 71 | 72 | LeagueRunsPerOut <- subset(pitchingLRPO, select=c("yearID", "lgID", "R", "IPouts")) %>% 73 | # Set NA to 0 so the sums will work. 74 | mutate(IPouts=ifelse(is.na(IPouts),0,IPouts)) 75 | 76 | # Check to see if user wants the AL and NL split. 77 | if(isTRUE(Sep.Leagues)){ 78 | LeagueRunsPerOut <- group_by(LeagueRunsPerOut, yearID, lgID) %>% 79 | summarise(R=sum(R), IPouts=sum(IPouts)) %>% 80 | mutate(RperOut=R/IPouts) %>% 81 | rename(totR=R, totOuts=IPouts) 82 | 83 | RunValues <- subset(LeagueRunsPerOut, select=c("yearID", "lgID", "RperOut")) %>% 84 | group_by(yearID, lgID) %>% 85 | mutate(runBB=RperOut+0.14, runHBP=runBB+0.025, run1B=runBB+0.155, run2B=run1B+0.3, 86 | run3B=run2B+0.27, runHR=1.4, runSB=0.2, runCS=(2*RperOut)+0.075) %>% 87 | group_by(yearID, lgID ,RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS) 88 | } else { 89 | LeagueRunsPerOut <- group_by(LeagueRunsPerOut, yearID) %>% 90 | summarise(R=sum(R), IPouts=sum(IPouts)) %>% 91 | mutate(RperOut=R/IPouts) %>% 92 | rename(totR=R, totOuts=IPouts) 93 | 94 | # Calculate the Run Values for each event using Tom Tango's linear weights. 95 | # More info from Tango can be found here: 96 | # http://www.insidethebook.com/ee/index.php/site/comments/woba_year_by_year_calculations/ 97 | # Note that HR and SB are static values. Tango admits this isn't perfect but is close. 98 | RunValues <- subset(LeagueRunsPerOut, select=c("yearID", "RperOut")) %>% group_by(yearID) %>% 99 | mutate(runBB=RperOut+0.14, runHBP=runBB+0.025, run1B=runBB+0.155, run2B=run1B+0.3, 100 | run3B=run2B+0.27, runHR=1.4, runSB=0.2, runCS=(2*RperOut)+0.075) %>% 101 | group_by(yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS) 102 | } 103 | 104 | 105 | # Use Position Players table to find the runsPlus and runsMinus values to use in the wOBA multiplier. 106 | batting <- BattingTable 107 | batting <- batting[, !names(batting) %in% c("G")] 108 | batting <- inner_join(batting, PrimPos, by=c("playerID", "yearID", "lgID")) 109 | # Replace NA with 0, otherwise our runsMinus and runsPlus calculations will thow NA. 110 | batting[is.na(batting)] <- 0 111 | 112 | if(isTRUE(Sep.Leagues)){ 113 | # Summarize values by year. 114 | yearbatting <- subset(batting, select=c("yearID", "lgID", "AB", "R", "H", "X2B", "X3B", "HR", 115 | "SB", "CS", "BB", "SO", "IBB", "HBP", "SF")) %>% 116 | group_by(yearID, lgID) %>% 117 | summarise(AB=sum(AB), R=sum(R), H=sum(H), X2B=sum(X2B), X3B=sum(X3B), HR=sum(HR), 118 | SB=sum(SB), CS=sum(CS), BB=sum(BB), SO=sum(SO), IBB=sum(IBB), HBP=sum(HBP), 119 | SF=sum(SF)) 120 | 121 | # Join yearly aggregates with the RunValues modifiers. 122 | runsBatting <- left_join(yearbatting, RunValues, by= c("yearID", "lgID")) %>% 123 | group_by(yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS) %>% 124 | # Calculate modifiers for wOBA events and wOBA scale. 125 | mutate(runMinus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) + 126 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (AB-H+SF)) %>% 127 | # Calculate modifier for wOBA scale. 128 | mutate(runPlus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) + 129 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (BB-IBB+HBP+H)) %>% 130 | # Calculate league wOBA. 131 | mutate(lg_woba = (H+BB+IBB+HBP) / (AB+BB-IBB+HBP+SF)) %>% 132 | # Calculate wOBA scale. 133 | mutate(woba_scale = 1/(runPlus+runMinus)) %>% 134 | # wOBA hit-event modifiers. 135 | mutate(wBB = (runBB+runMinus)*woba_scale, wHBP = (runHBP+runMinus)*woba_scale, 136 | w1B = (run1B+runMinus)*woba_scale, w2B = (run2B+runMinus)*woba_scale, 137 | w3B = (run3B+runMinus)*woba_scale, wHR = (runHR+runMinus)*woba_scale, 138 | wSB = runSB*woba_scale, wCS = runCS*woba_scale) 139 | } else { 140 | # Summarize values by year. 141 | yearbatting <- subset(batting, select=c("yearID", "AB", "R", "H", "X2B", "X3B", "HR", 142 | "SB", "CS", "BB", "SO", "IBB", "HBP", "SF")) %>% 143 | group_by(yearID) %>% 144 | summarise(AB=sum(AB), R=sum(R), H=sum(H), X2B=sum(X2B), X3B=sum(X3B), HR=sum(HR), 145 | SB=sum(SB), CS=sum(CS), BB=sum(BB), SO=sum(SO), IBB=sum(IBB), HBP=sum(HBP), 146 | SF=sum(SF)) 147 | 148 | # Join yearly aggregates with the RunValues modifiers. 149 | runsBatting <- left_join(yearbatting, RunValues, by="yearID") %>% 150 | group_by(yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS) %>% 151 | # Calculate modifiers for wOBA events and wOBA scale. 152 | mutate(runMinus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) + 153 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (AB-H+SF)) %>% 154 | # Calculate modifier for wOBA scale. 155 | mutate(runPlus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) + 156 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (BB-IBB+HBP+H)) %>% 157 | # Calculate league wOBA. 158 | mutate(lg_woba = (H+BB+IBB+HBP) / (AB+BB-IBB+HBP+SF)) %>% 159 | # Calculate wOBA scale. 160 | mutate(woba_scale = 1/(runPlus+runMinus)) %>% 161 | # wOBA hit-event modifiers. 162 | mutate(wBB = (runBB+runMinus)*woba_scale, wHBP = (runHBP+runMinus)*woba_scale, 163 | w1B = (run1B+runMinus)*woba_scale, w2B = (run2B+runMinus)*woba_scale, 164 | w3B = (run3B+runMinus)*woba_scale, wHR = (runHR+runMinus)*woba_scale, 165 | wSB = runSB*woba_scale, wCS = runCS*woba_scale) 166 | } 167 | } 168 | return(runsBatting) 169 | } 170 | -------------------------------------------------------------------------------- /R/battingStats.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Batting: Calculate batting average 3 | #' @description Find batting average for batters with more than zero at bats. 4 | #' Required fields from the Batting table are; "AB", and "H." 5 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 6 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 7 | #' @keywords BA base on ball percentage bb 8 | #' @family Batting functions 9 | #' @export BA 10 | #' @examples 11 | #' 12 | #' data("Batting2016") 13 | #' head(Batting2016) 14 | #' 15 | #' Batting2016$BA <- BA(Batting2016) 16 | #' 17 | BA <- function (dat=NULL){ 18 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 19 | 20 | if(!all(c("AB", "H") %in% colnames(dat))) { 21 | message("Not enough data to calculate. Please make sure your data inclueds 'H', and 'AB'") 22 | } 23 | ifelse(dat$AB > 0, BA <- round((dat$H/dat$AB), 3), NA) 24 | return(BA) 25 | } 26 | 27 | #' @title Batting: Calculate batting average on balls in play (BABIP) 28 | #' @description Find BABIP for batters with more than zero at bats. 29 | #' Required fields from the Batting table are; "AB", "BB", "H", "HBP", "SF", "SH", "HR" and "SO." 30 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 31 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 32 | #' @keywords BABIP base on ball percentage bb 33 | #' @family Batting functions 34 | #' @export BABIP 35 | #' @examples 36 | #' 37 | #' data("Batting2016") 38 | #' head(Batting2016) 39 | #' 40 | #' Batting2016$BABIP <- BABIP(Batting2016) 41 | #' 42 | BABIP <- function (dat=NULL){ 43 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 44 | 45 | if(!all(c("AB", "SO", "H", "SF") %in% colnames(dat))) { 46 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'SO', 'H' and 'SF'") 47 | } 48 | 49 | ifelse(dat$AB > 0, BABIP <- round(((dat$H-dat$HR)/(dat$AB-dat$SO-dat$HR+dat$SF)), 3), NA) 50 | 51 | return(BABIP) 52 | } 53 | 54 | #' @title Batting: Calculate base on ball percentage 55 | #' @description Find base on ball percentage for batters with more than zero at bats. 56 | #' Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH." 57 | #' Intentional base on balls (IBB) is added for the years that metric is available. 58 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 59 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 60 | #' @keywords BBpct base on ball percentage bb 61 | #' @family Batting functions 62 | #' @export BBpct 63 | #' @examples 64 | #' 65 | #' data("Batting2016") 66 | #' head(Batting2016) 67 | #' 68 | #' Batting2016$BBpct <- BBpct(Batting2016) 69 | #' 70 | BBpct <- function (dat=NULL){ 71 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 72 | 73 | if (!all(c("AB", "BB", "HBP", "SF", "SH") %in% names(dat))){ 74 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'BB', 'IBB', 'HBP', 'SF', and 'SH'") 75 | } 76 | 77 | ifelse(dat$AB > 0, BBpct <- round((dat$BB/(dat$AB+dat$BB+dat$HBP+dat$SF+dat$SH)) * 100, 3) , NA) 78 | return(BBpct) 79 | } 80 | 81 | #' @title Batting: Calculate a batter's contact rate 82 | #' @description Find the contact rate for batters. 83 | #' Required fields from the batting table are "AB" and "SO." 84 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 85 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 86 | #' @keywords CTpct contact rate 87 | #' @family Batting functions 88 | #' @export CTpct 89 | #' @examples 90 | #' 91 | #' data("Batting2016") 92 | #' head(Batting2016) 93 | #' 94 | #' Batting2016$CTpct <- CTpct(Batting2016) 95 | #' 96 | CTpct <- function (dat=NULL){ 97 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 98 | 99 | if (!all(c("AB", "SO") %in% names(dat))){ 100 | message("Not enough data to calculate. Please make sure your data inclueds 'AB' and 'SO'") 101 | } 102 | 103 | ifelse(dat$AB > 0, CTpct <- round(((dat$AB-dat$SO)/dat$AB) * 100, 3), NA) 104 | return(CTpct) 105 | } 106 | 107 | #' @title Batting: Calculate home run percentage 108 | #' @description Find home run percentage for batters with more than zero at bats. 109 | #' Required fields from the Batting table are "AB" and "HR." 110 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 111 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 112 | #' @keywords HRpct home run percentage 113 | #' @family Batting functions 114 | #' @export HRpct 115 | #' @examples 116 | #' 117 | #' data("Batting2016") 118 | #' head(Batting2016) 119 | #' 120 | #' Batting2016$HRpct <- HRpct(Batting2016) 121 | #' 122 | HRpct <- function (dat=NULL){ 123 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 124 | 125 | if (!all(c("AB", "HR") %in% names(dat))){ 126 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'HR'") 127 | } 128 | 129 | ifelse(dat$HR > 0, HRpct <- round(dat$HR/dat$AB * 100, 3), NA) 130 | return(HRpct) 131 | } 132 | 133 | #' @title Batting: Calculate ISO for batters 134 | #' @description Find isolated power (ISO) for batters with more than zero at bats. 135 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"." 136 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 137 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 138 | #' @keywords ISO isolated power 139 | #' @family Batting functions 140 | #' @export ISO 141 | #' @examples 142 | #' 143 | #' data("Batting2016") 144 | #' head(Batting2016) 145 | #' 146 | #' Batting2016$ISO <- ISO(Batting2016) 147 | #' 148 | ISO <- function (dat=NULL){ 149 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 150 | 151 | if (!all(c("AB", "X2B", "X3B", "HR") %in% names(dat))){ 152 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'X2B', 'X3B' and 'HR'") 153 | } 154 | 155 | ifelse(dat$AB > 0, 156 | ISO <- round((((dat$H-dat$X2B-dat$X3B-dat$HR) + (dat$X2B*2) + (dat$X3B*3) + (dat$HR*4))/dat$AB)-dat$H/dat$AB, 3), NA) 157 | return(ISO) 158 | } 159 | 160 | #' @title Batting: Calculate strikeout percentage 161 | #' @description Find strikeout percentage for batters with more than zero at bats. 162 | #' Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH." 163 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 164 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 165 | #' @keywords Kpct strikeout percentage 166 | #' @family Batting functions 167 | #' @export Kpct 168 | #' @examples 169 | #' 170 | #' data("Batting2016") 171 | #' head(Batting2016) 172 | #' 173 | #' Batting2016$Kpct <- Kpct(Batting2016) 174 | #' 175 | Kpct <- function (dat=NULL){ 176 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 177 | 178 | if (!all(c("AB", "SO", "BB", "HBP", "SF", "SH") %in% names(dat))){ 179 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'SO', 'BB', 'HBP', 'SF', and 'SH'") 180 | } 181 | 182 | ifelse(dat$SO > 0, 183 | Kpct <- round((dat$SO / (dat$AB + dat$BB + dat$HBP + dat$SF + dat$SH)) * 100, 3), NA) 184 | return(Kpct) 185 | } 186 | 187 | #' @title Batting: Calculate on base percentage (OBP) 188 | #' @description Find the OBP for batters with more than zero hits. 189 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"." 190 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 191 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 192 | #' @keywords OBP on base percentage 193 | #' @family Batting functions 194 | #' @export OBP 195 | #' @examples 196 | #' 197 | #' data("Batting2016") 198 | #' head(Batting2016) 199 | #' 200 | #' Batting2016$OBP <- OBP(Batting2016) 201 | #' 202 | OBP <- function (dat=NULL){ 203 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 204 | 205 | if (!all(c("H", "BB", "HBP", "AB", "SF") %in% names(dat))){ 206 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'AB', 'BB', 'HBP' and 'SF'") 207 | } 208 | 209 | ifelse(dat$H > 0, 210 | OBP <- round((dat$H+dat$BB+dat$HBP)/(dat$AB+dat$BB+dat$HBP+dat$SF), 3), NA) 211 | return(OBP) 212 | } 213 | 214 | #' @title Batting: Calculate on base percentage plus slugging (OPS) 215 | #' @description Find the OPS for batters with more than zero hits. 216 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR", "BB", "HBP", "AB" and "SF." 217 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 218 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 219 | #' @keywords OPS on base percentage 220 | #' @family Batting functions 221 | #' @export OPS 222 | #' @examples 223 | #' 224 | #' data("Batting2016") 225 | #' head(Batting2016) 226 | #' 227 | #' Batting2016$OPS <- OPS(Batting2016) 228 | #' 229 | OPS <- function (dat=NULL){ 230 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 231 | 232 | if (!all(c("H", "BB", "HBP", "AB", "SF", "X2B", "X3B", "HR", "AB") %in% names(dat))){ 233 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'AB', 'BB', 'SF', 'X2B', 'X3B', and 'HR'") 234 | } 235 | 236 | ifelse(dat$H > 0, 237 | OPS <- round((dat$H+dat$BB+dat$HBP) / (dat$AB+dat$BB+dat$HBP+dat$SF) + 238 | ((dat$H-dat$X2B-dat$X3B-dat$HR) + (dat$X2B*2) + (dat$X3B*3) + (dat$HR*4))/dat$AB, 3), NA) 239 | return(OPS) 240 | } 241 | 242 | #' @title Batting: Calculate plate appearances for batters 243 | #' @description Find the plate appearances (PA) for batters. 244 | #' Required fields from the batting table are "AB", "BB", "HBP", "SH", and "SF." 245 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 246 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 247 | #' @keywords PA on base percentage 248 | #' @family Batting functions 249 | #' @export PA 250 | #' @examples 251 | #' 252 | #' data("Batting2016") 253 | #' head(Batting2016) 254 | #' 255 | #' Batting2016$PA <- PA(Batting2016) 256 | #' 257 | PA <- function (dat=NULL){ 258 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 259 | 260 | if (!all(c("AB", "BB", "HBP", "SF") %in% names(dat))){ 261 | message("Not enough data to calculate. Please make sure your data inclueds AB', 'BB', 'HBP', and 'SF'") 262 | } 263 | 264 | ifelse(dat$AB >= 0, 265 | PA <- dat$AB+dat$BB+dat$HBP+dat$SF) 266 | return(PA) 267 | } 268 | 269 | #' @title Batting: Calculate extra base percentage 270 | #' @description Find extra base percentage for batters with more than zero at bats. 271 | #' Required fields from the batting table are "AB", "BB", "HBP", "SF", "SH", "X2B", "X3B", "HR"." 272 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 273 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 274 | #' @keywords XBHpct extra base percentage 275 | #' @family Batting functions 276 | #' @export XBHpct 277 | #' @examples 278 | #' 279 | #' data("Batting2016") 280 | #' head(Batting2016) 281 | #' 282 | #' Batting2016$XBHpct <- XBHpct(Batting2016) 283 | #' 284 | XBHpct <- function (dat=NULL){ 285 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 286 | 287 | if (!all(c("AB", "BB", "HBP", "SF", "SH", "X2B", "X3B", "HR") %in% names(dat))){ 288 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'BB', 'HBP', 'SF', 'SH', 'X2B', 'X2B' and 'HR'") 289 | } 290 | 291 | ifelse(dat$AB > 0, 292 | XBHpct <- round(((dat$X2B+dat$X3B+dat$HR)/(dat$AB + dat$BB + dat$HBP + dat$SF + dat$SH)) * 100, 3), NA) 293 | return(XBHpct) 294 | } 295 | 296 | #' @title Batting: Calculate extra base per hit 297 | #' @description Find the average extra bases per hit for batters with more than zero hits. 298 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"." 299 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 300 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 301 | #' @keywords XBperH extra base per hit 302 | #' @family Batting functions 303 | #' @export XBperH 304 | #' @examples 305 | #' 306 | #' data("Batting2016") 307 | #' head(Batting2016) 308 | #' 309 | #' Batting2016$XBperH <- XBperH(Batting2016) 310 | #' 311 | XBperH <- function (dat=NULL){ 312 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 313 | 314 | if (!all(c("H", "X2B", "X3B", "HR") %in% names(dat))){ 315 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'X2B', 'X3B' and 'HR'") 316 | } 317 | 318 | ifelse(dat$H > 0, 319 | XBperH <- round(((dat$X2B+dat$X3B+dat$HR)/(dat$H)) * 100, 3), NA) 320 | 321 | return(XBperH) 322 | } 323 | 324 | #' @title Batting: Calculate Runs Created using the basic formula. 325 | #' @description Find the runs created using the basic formula presented by Bill James in the late 1970s. 326 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", and "HR." 327 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 328 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 329 | #' @keywords RCbasic extra base per hit 330 | #' @family Batting functions 331 | #' @export RCbasic 332 | #' @examples 333 | #' 334 | #' data("Batting2016") 335 | #' head(Batting2016) 336 | #' 337 | #' Batting2016$RCbasic <- RCbasic(Batting2016) 338 | #' 339 | RCbasic <- function (dat=NULL){ 340 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 341 | 342 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR") %in% names(dat))){ 343 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B', and 'HR.'") 344 | } 345 | 346 | ifelse(dat$AB > 0, 347 | RCbasic <- ((dat$H+dat$BB)*(dat$H+2*dat$X2B+3*dat$X3B+4*dat$HR)/(dat$AB+dat$BB)), NA) 348 | return(RCbasic) 349 | } 350 | 351 | #' @title Batting: Calculate Runs Created using the technical formula. 352 | #' @description The "Technical Version" is the most well-known formula for RC. It adds several factors to the 353 | #' basic formula such as sacrifice hits, stolen bases and intentional base on balls. 354 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS", 355 | #' "SF" and "SH," and "IBB." 356 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 357 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 358 | #' @keywords RCtech extra base per hit 359 | #' @family Batting functions 360 | #' @export RCtech 361 | #' @examples 362 | #' 363 | #' data("Batting2016") 364 | #' head(Batting2016) 365 | #' 366 | #' Batting2016$RCtech <- RCtech(Batting2016) 367 | #' 368 | RCtech <- function (dat=NULL){ 369 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 370 | 371 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", 372 | "SB", "CS", "SF", "SH", "IBB") %in% names(dat))){ 373 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n 374 | 'HR', 'GIDP', 'HBP', 'SB', 'CS', 'SF', 'SH', and 'IBB.'") 375 | } 376 | X1B <- dat$H-dat$X2B-dat$X3B-dat$HR 377 | TB <- X1B + 2*dat$X2B + 3*dat$X3B + 4*dat$HR 378 | ifelse(dat$AB > 0, 379 | RCtech <- (((dat$H+dat$BB-dat$CS+dat$HBP-dat$GIDP)* 380 | (TB+(.26*(dat$BB-dat$IBB+dat$HBP))) + (.52*(dat$SH+dat$SF+dat$SB)))/ 381 | (dat$AB+dat$BB+dat$HBP+dat$SH+dat$SF)), NA) 382 | return(RCtech) 383 | } 384 | 385 | #' @title Batting: Calculate Runs Created using the updated 2002 formula. 386 | #' @description The "2002 Version" is an updated version of the "Technical Version" by Bill James. 387 | #' The 2002 RC uses the same counting stats as the Technical Version but applies weights to many of the raw stats. 388 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS", 389 | #' "SF" and "SH," "SO", and "IBB." 390 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 391 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 392 | #' @keywords RC2002 extra base per hit 393 | #' @family Batting functions 394 | #' @export RC2002 395 | #' @examples 396 | #' 397 | #' data("Batting2016") 398 | #' head(Batting2016) 399 | #' 400 | #' Batting2016$RC2002 <- RC2002(Batting2016) 401 | #' 402 | RC2002 <- function (dat=NULL){ 403 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 404 | 405 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", 406 | "SB", "CS", "SF", "SH", "IBB", "SO") %in% names(dat))){ 407 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n 408 | 'HR', 'GIDP', 'HBP', 'SB', 'CS', 'SF', 'SH', 'SO', and 'IBB.'") 409 | } 410 | 411 | X1B <- dat$H-dat$X2B-dat$X3B-dat$HR 412 | OnBaseFact <- dat$H+dat$BB-dat$CS+dat$HBP-dat$GIDP 413 | AdvanceFact <- (1.25*X1B)+(1.69*dat$X2B)+(3.02*dat$X3B)+(3.73*dat$HR)+0.29*(dat$BB-dat$IBB+dat$HBP)+ 414 | 0.492*(dat$SH+dat$SF+dat$SB)-(0.04*dat$SO) 415 | OpportunityFact <- dat$AB+dat$BB+dat$HBP+dat$SH+dat$SF 416 | ifelse(dat$AB > 0, 417 | RC2002 <- (((((2.4*OpportunityFact)+OnBaseFact)*((3*OpportunityFact)+AdvanceFact))/ 418 | (9*OpportunityFact))-(0.9*OpportunityFact)), NA) 419 | return(RC2002) 420 | } 421 | 422 | #' @title Batting: Calculate slugging percentage (SLG) 423 | #' @description Find the SLG for batters with more than zero hits. 424 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"." 425 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 426 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 427 | #' @keywords SLG on base percentage 428 | #' @family Batting functions 429 | #' @export SLG 430 | #' @examples 431 | #' 432 | #' data("Batting2016") 433 | #' head(Batting2016) 434 | #' 435 | #' Batting2016$SLG <- SLG(Batting2016) 436 | #' 437 | SLG <- function (dat=NULL){ 438 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 439 | 440 | if (!all(c("H", "X2B", "X3B", "AB", "HR") %in% names(dat))){ 441 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'AB', 'X2B', 'X3B' and 'HR'") 442 | } 443 | 444 | ifelse(dat$H > 0, 445 | SLG <- round(((dat$H-dat$X2B-dat$X3B-dat$HR) + (dat$X2B*2) + (dat$X3B*3) + (dat$HR*4))/dat$AB, 3), NA) 446 | return(SLG) 447 | } 448 | 449 | #' @title Batting: Calculate a batter's total bases 450 | #' @description Find total bases. 451 | #' Required fields from the batting table are "AB","H", "X2B", "X3B" and "HR." 452 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in 453 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository. 454 | #' @keywords TBs total bases 455 | #' @family Batting functions 456 | #' @export TBs 457 | #' @examples 458 | #' 459 | #' data("Batting2016") 460 | #' head(Batting2016) 461 | #' 462 | #' Batting2016$TBs <- TBs(Batting2016) 463 | #' 464 | TBs <- function (dat=NULL){ 465 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat) 466 | 467 | if (!all(c("H", "X2B", "X3B", "HR") %in% names(dat))){ 468 | message("Not enough data to calculate. Please make sure your data inclueds 'AB','H', 'X2B', 'X3B' and 'HR'") 469 | } 470 | 471 | ifelse(dat$AB > 0, 472 | TBs <- round(((dat$H)+(2*dat$X2B)+(3*dat$X3B)+(4*dat$HR)), 3), NA) 473 | return(TBs) 474 | } 475 | 476 | #' @title Batting: Calculate Weighted On-Base Average (wOBA) 477 | #' @description Find the wOBA for all players with one or more hits for a particular season. 478 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB." 479 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 480 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 481 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 482 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 483 | #' @param FieldingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 484 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 485 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 486 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 487 | #' park factors between the American and National leagues. The default for this argument is FALSE. 488 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 489 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954. 490 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 491 | #' If FALSE, the wOBA calculation will return NaN for years with missing data. 492 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal 493 | #' formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and 494 | #' does not require an external download. If not specified, the default is set to FALSE. 495 | #' @keywords wOBA Weighted On-Base Average 496 | #' @family Batting functions 497 | #' @import dplyr 498 | #' @export wOBA 499 | #' @examples 500 | #' 501 | #' data("Batting2016") 502 | #' head(Batting2016) 503 | #' data("Pitching2016") 504 | #' head(Pitching2016) 505 | #' data("Fielding2016") 506 | #' head(Fielding2016) 507 | #' 508 | #' Batting2016$wOBA <- wOBA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE, 509 | #' NA_to_zero=TRUE, Sep.Leagues=FALSE) 510 | #' 511 | wOBA <- function (BattingTable=NULL, PitchingTable=NULL, FieldingTable=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){ 512 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){ 513 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...") 514 | Fangraphs=FALSE 515 | } 516 | 517 | dat <- BattingTable 518 | wOBA_values <- wOBA_values(BattingTable, PitchingTable, FieldingTable, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues) 519 | 520 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB") %in% names(dat))){ 521 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n 522 | 'HR', 'HBP', 'SF', and 'IBB.'") 523 | } 524 | 525 | if (isTRUE(NA_to_zero)){ 526 | dat <- mutate(dat, SF=ifelse(is.na(SF),0,SF), IBB=ifelse(is.na(IBB),0,IBB), HBP=ifelse(is.na(HBP),0,HBP)) 527 | } 528 | 529 | if(isTRUE(Sep.Leagues)){ 530 | wOBA_values <- wOBA_values[, c("yearID", "lgID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR")] 531 | dat <- left_join(dat, wOBA_values, by=c("yearID", "lgID")) 532 | 533 | } else { 534 | wOBA_values <- wOBA_values[, c("yearID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR")] 535 | dat <- left_join(dat, wOBA_values, by="yearID") 536 | } 537 | 538 | ifelse(dat$H > 0, 539 | wOBA <- (dat$wBB*(dat$BB-dat$IBB) + dat$wHBP*dat$HBP + dat$w1B*(dat$H-dat$X2B-dat$X3B-dat$HR) + 540 | dat$w2B*dat$X2B + dat$w3B*dat$X3B + dat$wHR*dat$HR)/ 541 | (dat$AB+(dat$BB-dat$IBB)+dat$SF+dat$HBP) , NA) 542 | return(wOBA) 543 | } 544 | 545 | 546 | #' @title Batting: Calculate Weighted Runs Above Average (wRAA) 547 | #' @description Find the wRAA for all players with one or more hits for a particular season. 548 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB." 549 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 550 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 551 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 552 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 553 | #' @param FieldingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 554 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 555 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 556 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 557 | #' park factors between the American and National leagues. The default for this argument is FALSE. 558 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 559 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wRAA for any player prior to 1954. 560 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 561 | #' If FALSE, the wRAA calculation will return NaN for years with missing data. 562 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRAA 563 | #' calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula. 564 | #' This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRAA should be expected. 565 | #' The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE. 566 | #' @keywords wRAA Weighted Runs Above Average 567 | #' @family Batting functions 568 | #' @import dplyr 569 | #' @export wRAA 570 | #' @examples 571 | #' 572 | #' data("Batting2016") 573 | #' head(Batting2016) 574 | #' data("Pitching2016") 575 | #' head(Pitching2016) 576 | #' data("Fielding2016") 577 | #' head(Fielding2016) 578 | #' 579 | #' Batting2016$wRAA <- wRAA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE, 580 | #' NA_to_zero=TRUE, Sep.Leagues=FALSE) 581 | #' 582 | wRAA <- function (BattingTable=NULL, PitchingTable=NULL, FieldingTable=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){ 583 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){ 584 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...") 585 | Fangraphs=FALSE 586 | } 587 | dat <- BattingTable 588 | 589 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB") %in% names(dat))){ 590 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n 591 | 'HR', 'HBP', 'SF', and 'IBB.'") 592 | } 593 | 594 | wOBA_values <- wOBA_values(BattingTable, PitchingTable, FieldingTable, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues) 595 | 596 | if (isTRUE(NA_to_zero)){ 597 | dat <- mutate(dat, SF=ifelse(is.na(SF),0,SF), IBB=ifelse(is.na(IBB),0,IBB), HBP=ifelse(is.na(HBP),0,HBP)) 598 | } 599 | if(isTRUE(Sep.Leagues)){ 600 | wOBA_values <- wOBA_values[, c("yearID", "lgID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba")] 601 | dat <- left_join(dat, wOBA_values, by=c("yearID", "lgID")) 602 | } else { 603 | wOBA_values <- wOBA_values[, c("yearID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba")] 604 | dat <- left_join(dat, wOBA_values, by="yearID") 605 | } 606 | 607 | ifelse(dat$H > 0, 608 | dat$wOBA <- (dat$wBB*(dat$BB-dat$IBB) + dat$wHBP*dat$HBP + dat$w1B*(dat$H-dat$X2B-dat$X3B-dat$HR) + 609 | dat$w2B*dat$X2B + dat$w3B*dat$X3B + dat$wHR*dat$HR)/ 610 | (dat$AB+(dat$BB-dat$IBB)+dat$SF+dat$HBP), NA) 611 | 612 | ifelse(dat$H > 0, 613 | wRAA <- ((dat$wOBA-dat$lg_woba) / dat$woba_scale * (dat$AB+dat$BB+dat$HBP+dat$SF)), NA) 614 | return(wRAA) 615 | } 616 | 617 | 618 | #' @title Batting: Calculate Weighted Runs Created (wRC) 619 | #' @description Find the wRC for all players with one or more hits for a particular season. 620 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB." 621 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 622 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 623 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 624 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 625 | #' @param FieldingTable A full fielding table from the \code{Lahman} package or the Chadwick Bureau GitHub repository. 626 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended. 627 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping 628 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for 629 | #' park factors between the American and National leagues. The default for this argument is FALSE. 630 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits 631 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wRC for any player prior to 1954. 632 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice. 633 | #' If FALSE, the wRC calculation will return NaN for years with missing data. 634 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRC 635 | #' calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula. 636 | #' This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRC should be expected. 637 | #' The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE. 638 | #' @keywords wRC Weighted Runs Above Average 639 | #' @family Batting functions 640 | #' @import dplyr 641 | #' @export wRC 642 | #' @examples 643 | #' 644 | #' data("Batting2016") 645 | #' head(Batting2016) 646 | #' data("Pitching2016") 647 | #' head(Pitching2016) 648 | #' data("Fielding2016") 649 | #' head(Fielding2016) 650 | #' 651 | #' Batting2016$wRC <- wRC(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE, 652 | #' NA_to_zero=TRUE, Sep.Leagues=FALSE) 653 | #' 654 | wRC <- function (BattingTable=NULL, PitchingTable=NULL, FieldingTable=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){ 655 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){ 656 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...") 657 | Fangraphs=FALSE 658 | } 659 | dat <- BattingTable 660 | 661 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB") %in% names(dat))){ 662 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n 663 | 'HR', 'HBP', 'SF', and 'IBB.'") 664 | } 665 | 666 | wOBA_values <- wOBA_values(BattingTable, PitchingTable, FieldingTable, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues) 667 | 668 | if (!isTRUE(Fangraphs)) { 669 | wOBA_values$lg_r_pa <- wOBA_values$R / (wOBA_values$AB+wOBA_values$BB+wOBA_values$HBP+wOBA_values$SF) 670 | } 671 | if (isTRUE(NA_to_zero)){ 672 | dat <- mutate(dat, SF=ifelse(is.na(SF),0,SF), IBB=ifelse(is.na(IBB),0,IBB), HBP=ifelse(is.na(HBP),0,HBP)) 673 | } 674 | if(isTRUE(Sep.Leagues)){ 675 | wOBA_values <- wOBA_values[, c("yearID", "lgID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba", "lg_r_pa")] 676 | dat <- left_join(dat, wOBA_values, by=c("yearID", "lgID")) 677 | } else { 678 | wOBA_values <- wOBA_values[, c("yearID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba", "lg_r_pa")] 679 | dat <- left_join(dat, wOBA_values, by="yearID") 680 | } 681 | 682 | ifelse(dat$H > 0, 683 | dat$wOBA <- (dat$wBB*(dat$BB-dat$IBB) + dat$wHBP*dat$HBP + dat$w1B*(dat$H-dat$X2B-dat$X3B-dat$HR) + 684 | dat$w2B*dat$X2B + dat$w3B*dat$X3B + dat$wHR*dat$HR)/ 685 | (dat$AB+(dat$BB-dat$IBB)+dat$SF+dat$HBP), NA) 686 | 687 | ifelse(dat$H > 0, 688 | wRC <- ((((dat$wOBA-dat$lg_woba) / dat$woba_scale) + dat$lg_r_pa) * (dat$AB+dat$BB+dat$HBP+dat$SF)), NA) 689 | return(wRC) 690 | } 691 | 692 | 693 | 694 | 695 | --------------------------------------------------------------------------------