├── LICENSE
├── .DS_Store
├── data
├── seasonAVG.rda
├── Batting2016.rda
├── Fielding2016.rda
└── Pitching2016.rda
├── tests
├── testthat.R
└── testthat
│ ├── check_urls.R
│ └── test-check_urls.R
├── man
├── figures
│ └── baseballDBR_hex.png
├── pipe.Rd
├── compound_pipe.Rd
├── urlExists.Rd
├── baseballDBR.Rd
├── Ch.Rd
├── Fld_pct.Rd
├── IP.Rd
├── H_9.Rd
├── HR_9.Rd
├── K_9.Rd
├── BB_9.Rd
├── LOB_pct.Rd
├── WHIP.Rd
├── TBs.Rd
├── CTpct.Rd
├── ISO.Rd
├── OBP.Rd
├── SLG.Rd
├── PA.Rd
├── HRpct.Rd
├── Kpct.Rd
├── BA.Rd
├── Fielding2016.Rd
├── OPS.Rd
├── XBperH.Rd
├── XBHpct.Rd
├── get_bbdb.Rd
├── BABIP.Rd
├── RCbasic.Rd
├── seasonAVG.Rd
├── BBpct.Rd
├── Batting2016.Rd
├── RCtech.Rd
├── RC2002.Rd
├── fip_values.Rd
├── Pitching2016.Rd
├── FIP.Rd
├── wOBA_values.Rd
├── wOBA.Rd
├── wRC.Rd
└── wRAA.Rd
├── .Rbuildignore
├── R
├── baseballDBR.R
├── utils.R
├── fielding2016.R
├── seasonAVG.R
├── batting2016.R
├── pitching2016.R
├── fieldingStats.R
├── fip.R
├── get_bbdb.R
├── pitchingStats.R
├── woba_values.R
└── battingStats.R
├── .travis.yml
├── baseballDBR.Rproj
├── .gitignore
├── NEWS.md
├── DESCRIPTION
├── NAMESPACE
├── data-raw
└── data_prep.R
├── vignettes
├── FIP.Rmd
├── wRAA_wRC.Rmd
├── Database_Tools.Rmd
└── wOBA.Rmd
├── README.Rmd
└── README.md
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2017
2 | COPYRIGHT HOLDER: Kris Eberwein
3 |
--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/.DS_Store
--------------------------------------------------------------------------------
/data/seasonAVG.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/seasonAVG.rda
--------------------------------------------------------------------------------
/data/Batting2016.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/Batting2016.rda
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(baseballDBR)
3 |
4 | test_check("baseballDBR")
5 |
--------------------------------------------------------------------------------
/data/Fielding2016.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/Fielding2016.rda
--------------------------------------------------------------------------------
/data/Pitching2016.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/data/Pitching2016.rda
--------------------------------------------------------------------------------
/man/figures/baseballDBR_hex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keberwein/baseballDBR/HEAD/man/figures/baseballDBR_hex.png
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^data-raw$
4 | ^\.travis\.yml$
5 | ^README\.Rmd$
6 | ^NEWS\.Rmd$
7 | ^cran-comments\.md$
8 | ^appveyor\.yml$
9 |
--------------------------------------------------------------------------------
/R/baseballDBR.R:
--------------------------------------------------------------------------------
1 |
2 | #' baseballDBR: A package for working with data from the Baseball Databank/Lahman Database.
3 | #' @name baseballDBR
4 | NULL
5 | # Global variables
6 | PitchingTable=HBP=HR=BB=SO=BB=SO=H=SF=SO=IBB=woba_scale=PitchingTable=NULL
7 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \description{
10 | Pipe operator
11 | }
12 | \keyword{internal}
13 |
--------------------------------------------------------------------------------
/man/compound_pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{\%<>\%}
4 | \alias{\%<>\%}
5 | \title{Compound_pipe}
6 | \usage{
7 | lhs \%<>\% rhs
8 | }
9 | \description{
10 | Compound_pipe
11 | }
12 | \keyword{internal}
13 |
--------------------------------------------------------------------------------
/man/urlExists.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{urlExists}
4 | \alias{urlExists}
5 | \title{urlExists}
6 | \usage{
7 | urlExists(target)
8 | }
9 | \arguments{
10 | \item{target}{url}
11 | }
12 | \description{
13 | A utility function to run a tryCatch on a URL.
14 | }
15 |
--------------------------------------------------------------------------------
/man/baseballDBR.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/baseballDBR.R
3 | \name{baseballDBR}
4 | \alias{baseballDBR}
5 | \title{baseballDBR: A package for working with data from the Baseball Databank/Lahman Database.}
6 | \description{
7 | baseballDBR: A package for working with data from the Baseball Databank/Lahman Database.
8 | }
9 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 |
3 | language: r
4 | warnings_are_errors: false
5 | sudo: false
6 |
7 | env:
8 | global:
9 | - NOT_CRAN=true
10 | before_install: echo "options(repos = c(CRAN='http://cran.rstudio.com'))" > ~/.Rprofile
11 |
12 | notifications:
13 | email:
14 | on_success: change
15 | on_failure: change
--------------------------------------------------------------------------------
/baseballDBR.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | inst/doc
5 | README.Rmd
6 | NEWS.Rmd
7 | # Example code in package build process
8 | *-Ex.R
9 | # Output files from R CMD build
10 | /*.tar.gz
11 | # Output files from R CMD check
12 | /*.Rcheck/
13 | # RStudio files
14 | .Rproj.user/
15 | # produced vignettes
16 | vignettes/*.html
17 | vignettes/*.pdf
18 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
19 | .httr-oauth
20 | # knitr and R markdown default cache directories
21 | /*_cache/
22 | /cache/
23 | # Temporary files created by R markdown
24 | *.utf8.md
25 | *.knit.md
26 | cran-comments.md
27 | .DS_Store
28 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # baseballDBR 0.1.4
2 |
3 | ## Bug fixes
4 |
5 | * Added a warning regarding name changes when user tries to download "Master" table.
6 |
7 | # baseballDBR 0.1.3
8 |
9 | ## Bug Fixes
10 |
11 | * Updated URLs in Description
12 |
13 | # baseballDBR 0.1.2
14 |
15 | ## New Features
16 |
17 | * Added internal data sets `Batting2016`, `Pitching2016`, and `Fielding2016`.
18 |
19 | ## Enhancements
20 |
21 | * Added more testing for functions on package build.
22 |
23 | # baseballDBR 0.1.1
24 |
25 | ## New features
26 |
27 | * Added `urlExists()` function to perform tryCatch on URLs.
28 |
29 | * Added backup URLs for the `get_bbdb()` function.
30 |
31 | * Added `downloadZip` argument to the `get_bbdb()` function.
32 |
33 | * Added vignettes.
34 |
35 | * Added `get_bbdb()` function.
36 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: baseballDBR
2 | Type: Package
3 | Title: Sabermetrics and Advanced Baseball Statistics
4 | Version: 0.1.2.000009
5 | Authors@R: person("Kris", "Eberwein", email = "eberwein@knights.ucf.edu",
6 | role = c("aut", "cre"))
7 | Description: A tool for gathering and analyzing data from the Baseball Databank , which includes player performance statistics from major league baseball in the United States beginning in the year 1871.
8 | Depends:
9 | R (>= 3.3.3)
10 | Imports:
11 | rvest,
12 | xml2,
13 | magrittr,
14 | dplyr
15 | Suggests:
16 | testthat,
17 | rmarkdown,
18 | knitr
19 | License: MIT + file LICENSE
20 | URL: https://github.com/keberwein/baseballDBR
21 | BugReports: https://github.com/keberwein/baseballDBR/issues
22 | LazyData: true
23 | RoxygenNote: 6.1.0
24 | VignetteBuilder: knitr
25 |
--------------------------------------------------------------------------------
/man/Ch.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fieldingStats.R
3 | \name{Ch}
4 | \alias{Ch}
5 | \title{Fielding: Calculate defensive chances}
6 | \usage{
7 | Ch(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | The number of chances a player had to make a defensive play.
15 | Required fields from the Fielding table are; "PO", "A", and "E."
16 | }
17 | \examples{
18 |
19 | data("Fielding2016")
20 | head(Fielding2016)
21 |
22 | Fielding2016$Ch <- Ch(Fielding2016)
23 |
24 | }
25 | \seealso{
26 | Other Fielding functions: \code{\link{Fld_pct}}
27 | }
28 | \concept{Fielding functions}
29 | \keyword{Ch}
30 | \keyword{Chances}
31 | \keyword{Defensive}
32 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 |
2 | #' Pipe operator
3 | #'
4 | #' @name %>%
5 | #' @rdname pipe
6 | #' @keywords internal
7 | #' @importFrom magrittr %>%
8 | #' @usage lhs \%>\% rhs
9 | #' @export
10 | NULL
11 |
12 | #' Compound_pipe
13 | #'
14 | #' @name %<>%
15 | #' @rdname compound_pipe
16 | #' @keywords internal
17 | #' @importFrom magrittr %<>%
18 | #' @usage lhs \%<>\% rhs
19 | #' @export
20 | NULL
21 |
22 |
23 | #' @title urlExists
24 | #' @description A utility function to run a tryCatch on a URL.
25 | #' @param target url
26 | #' @export
27 | urlExists <- function(target) {
28 | tryCatch({
29 | con <- url(target)
30 | a <- capture.output(suppressWarnings(readLines(con)))
31 | close(con)
32 | TRUE;
33 | },
34 | error = function(err) {
35 | occur <- grep("cannot open the connection", capture.output(err));
36 | if(length(occur) > 0) FALSE;
37 | }
38 | )
39 | }
40 |
--------------------------------------------------------------------------------
/man/Fld_pct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fieldingStats.R
3 | \name{Fld_pct}
4 | \alias{Fld_pct}
5 | \title{Fielding: Calculate batting average}
6 | \usage{
7 | Fld_pct(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find batting average for batters with more than zero at bats.
15 | Required fields from the Fielding table are; "PO", "A", and "E."
16 | }
17 | \examples{
18 |
19 | data("Fielding2016")
20 | head(Fielding2016)
21 |
22 | Fielding2016$Fld_pct <- Fld_pct(Fielding2016)
23 |
24 | }
25 | \seealso{
26 | Other Fielding functions: \code{\link{Ch}}
27 | }
28 | \concept{Fielding functions}
29 | \keyword{Fld_pct}
30 | \keyword{fielding}
31 | \keyword{percentage}
32 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export("%<>%")
4 | export("%>%")
5 | export(BA)
6 | export(BABIP)
7 | export(BB_9)
8 | export(BBpct)
9 | export(CTpct)
10 | export(Ch)
11 | export(FIP)
12 | export(Fld_pct)
13 | export(HR_9)
14 | export(HRpct)
15 | export(H_9)
16 | export(IP)
17 | export(ISO)
18 | export(K_9)
19 | export(Kpct)
20 | export(LOB_pct)
21 | export(OBP)
22 | export(OPS)
23 | export(PA)
24 | export(RC2002)
25 | export(RCbasic)
26 | export(RCtech)
27 | export(SLG)
28 | export(TBs)
29 | export(WHIP)
30 | export(XBHpct)
31 | export(XBperH)
32 | export(fip_values)
33 | export(get_bbdb)
34 | export(urlExists)
35 | export(wOBA)
36 | export(wOBA_values)
37 | export(wRAA)
38 | export(wRC)
39 | import(dplyr)
40 | import(utils)
41 | importFrom(magrittr,"%<>%")
42 | importFrom(magrittr,"%>%")
43 | importFrom(rvest,html_node)
44 | importFrom(rvest,html_table)
45 | importFrom(stats,setNames)
46 | importFrom(xml2,read_html)
47 |
--------------------------------------------------------------------------------
/man/IP.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{IP}
4 | \alias{IP}
5 | \title{Pitching: Calculate the innings pitched}
6 | \usage{
7 | IP(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the number of innings a player has pitched for a season.
15 | Required fields from the Pitching table are; "IPouts."
16 | }
17 | \examples{
18 |
19 | data("Pitching2016")
20 | head(Pitching2016)
21 |
22 | Pitching2016$IP <- IP(Pitching2016)
23 |
24 | }
25 | \seealso{
26 | Other Pitching functions: \code{\link{BB_9}},
27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}},
28 | \code{\link{K_9}}, \code{\link{LOB_pct}},
29 | \code{\link{WHIP}}
30 | }
31 | \concept{Pitching functions}
32 | \keyword{innings}
33 | \keyword{pitched}
34 |
--------------------------------------------------------------------------------
/tests/testthat/check_urls.R:
--------------------------------------------------------------------------------
1 | # Check external urls to make sure they have not changed.
2 |
3 | library(testthat)
4 |
5 | # Set up function to do a tryCatch on URL.
6 | urlExists <- function(target) {
7 | tryCatch({
8 | con <- url(target)
9 | a <- capture.output(suppressWarnings(readLines(con)))
10 | close(con)
11 | TRUE;
12 | },
13 | error = function(err) {
14 | occur <- grep("cannot open the connection", capture.output(err));
15 | if(length(occur) > 0) FALSE;
16 | }
17 | )
18 | }
19 |
20 | # Check Chadwick Bureau Git repo.
21 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/AllstarFull.csv"))
22 | # Check my personal fork, backup.
23 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/keberwein/baseballdatabank/master/core/AllstarFull.csv"))
24 | # Check Fangraph guts page.
25 | testthat::expect_true(urlExists("https://www.fangraphs.com/guts.aspx?type=cn"))
26 |
27 |
28 |
--------------------------------------------------------------------------------
/tests/testthat/test-check_urls.R:
--------------------------------------------------------------------------------
1 | # Check external urls to make sure they have not changed.
2 |
3 | library(testthat)
4 |
5 | # Set up function to do a tryCatch on URL.
6 | urlExists <- function(target) {
7 | tryCatch({
8 | con <- url(target)
9 | a <- capture.output(suppressWarnings(readLines(con)))
10 | close(con)
11 | TRUE;
12 | },
13 | error = function(err) {
14 | occur <- grep("cannot open the connection", capture.output(err));
15 | if(length(occur) > 0) FALSE;
16 | }
17 | )
18 | }
19 |
20 | # Check Chadwick Bureau Git repo.
21 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/AllstarFull.csv"))
22 | # Check my personal fork, backup.
23 | testthat::expect_true(urlExists("https://raw.githubusercontent.com/keberwein/baseballdatabank/master/core/AllstarFull.csv"))
24 | # Check Fangraph guts page.
25 | testthat::expect_true(urlExists("https://www.fangraphs.com/guts.aspx?type=cn"))
26 |
27 |
28 |
--------------------------------------------------------------------------------
/man/H_9.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{H_9}
4 | \alias{H_9}
5 | \title{Pitching: Calculate Hits per Nine innings}
6 | \usage{
7 | H_9(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the number of hits a pitcher throws per nine innings pitched.
15 | Required fields from the Pitching table are; "H", "BB", and "IPouts."
16 | }
17 | \examples{
18 |
19 | data("Pitching2016")
20 | head(Pitching2016)
21 |
22 | Pitching2016$H_9 <- H_9(Pitching2016)
23 |
24 | }
25 | \seealso{
26 | Other Pitching functions: \code{\link{BB_9}},
27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{IP}},
28 | \code{\link{K_9}}, \code{\link{LOB_pct}},
29 | \code{\link{WHIP}}
30 | }
31 | \concept{Pitching functions}
32 | \keyword{hits}
33 | \keyword{innings}
34 | \keyword{nine}
35 | \keyword{per}
36 |
--------------------------------------------------------------------------------
/man/HR_9.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{HR_9}
4 | \alias{HR_9}
5 | \title{Pitching: Calculate Home Runs per Nine innings}
6 | \usage{
7 | HR_9(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the number of home runs a pitcher allows per nine innings pitched.
15 | Required fields from the Pitching table are; "H" and "IPouts."
16 | }
17 | \examples{
18 |
19 | data("Pitching2016")
20 | head(Pitching2016)
21 |
22 | Pitching2016$HR_9 <- HR_9(Pitching2016)
23 |
24 | }
25 | \seealso{
26 | Other Pitching functions: \code{\link{BB_9}},
27 | \code{\link{FIP}}, \code{\link{H_9}}, \code{\link{IP}},
28 | \code{\link{K_9}}, \code{\link{LOB_pct}},
29 | \code{\link{WHIP}}
30 | }
31 | \concept{Pitching functions}
32 | \keyword{hits}
33 | \keyword{innings}
34 | \keyword{nine}
35 | \keyword{per}
36 |
--------------------------------------------------------------------------------
/man/K_9.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{K_9}
4 | \alias{K_9}
5 | \title{Pitching: Calculate Strikes per Nine innings}
6 | \usage{
7 | K_9(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the number of strikes a pitcher throws per nine innings pitched.
15 | Required fields from the Pitching table are; "H", "BB", "IPouts", and "SO."
16 | }
17 | \examples{
18 |
19 | data("Pitching2016")
20 | head(Pitching2016)
21 |
22 | Pitching2016$K_9 <- K_9(Pitching2016)
23 |
24 | }
25 | \seealso{
26 | Other Pitching functions: \code{\link{BB_9}},
27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}},
28 | \code{\link{IP}}, \code{\link{LOB_pct}},
29 | \code{\link{WHIP}}
30 | }
31 | \concept{Pitching functions}
32 | \keyword{innings}
33 | \keyword{nine}
34 | \keyword{per}
35 | \keyword{strikes}
36 |
--------------------------------------------------------------------------------
/man/BB_9.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{BB_9}
4 | \alias{BB_9}
5 | \title{Pitching: Calculate walks per nine innings}
6 | \usage{
7 | BB_9(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find batting average walks per nine innings for pitchers with more one or more inning pitched.
15 | Required fields from the Pitching table are; "IPouts", and "BB."
16 | }
17 | \examples{
18 |
19 | data("Pitching2016")
20 | head(Pitching2016)
21 |
22 | Pitching2016$BB_9 <- BB_9(Pitching2016)
23 |
24 | }
25 | \seealso{
26 | Other Pitching functions: \code{\link{FIP}},
27 | \code{\link{HR_9}}, \code{\link{H_9}}, \code{\link{IP}},
28 | \code{\link{K_9}}, \code{\link{LOB_pct}},
29 | \code{\link{WHIP}}
30 | }
31 | \concept{Pitching functions}
32 | \keyword{BB}
33 | \keyword{BB9}
34 | \keyword{BB_9}
35 | \keyword{bb/9}
36 |
--------------------------------------------------------------------------------
/man/LOB_pct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{LOB_pct}
4 | \alias{LOB_pct}
5 | \title{Pitching: Calculate the left on base percentage}
6 | \usage{
7 | LOB_pct(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the percentage of base runners that a pitcher leaves on base of the course of a season.
15 | Required fields from the Pitching table are; "H", "BB", "HBP", "R", and "HR."
16 | }
17 | \examples{
18 |
19 | data("Pitching2016")
20 | head(Pitching2016)
21 |
22 | Pitching2016$LOB_pct <- LOB_pct(Pitching2016)
23 |
24 | }
25 | \seealso{
26 | Other Pitching functions: \code{\link{BB_9}},
27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}},
28 | \code{\link{IP}}, \code{\link{K_9}}, \code{\link{WHIP}}
29 | }
30 | \concept{Pitching functions}
31 | \keyword{LOB}
32 | \keyword{LOB_pct}
33 | \keyword{percentage}
34 |
--------------------------------------------------------------------------------
/man/WHIP.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{WHIP}
4 | \alias{WHIP}
5 | \title{Pitching: Calculate Walks plus Hits per Innings Pitched}
6 | \usage{
7 | WHIP(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the number of walks plus hits a pitcher allows per inning pitched.
15 | Required fields from the Pitching table are; "H", "BB", and "IPouts."
16 | }
17 | \examples{
18 |
19 | data("Pitching2016")
20 | head(Pitching2016)
21 |
22 | Pitching2016$WHIP <- WHIP(Pitching2016)
23 |
24 | }
25 | \seealso{
26 | Other Pitching functions: \code{\link{BB_9}},
27 | \code{\link{FIP}}, \code{\link{HR_9}}, \code{\link{H_9}},
28 | \code{\link{IP}}, \code{\link{K_9}},
29 | \code{\link{LOB_pct}}
30 | }
31 | \concept{Pitching functions}
32 | \keyword{Hits}
33 | \keyword{Innings}
34 | \keyword{Pitched}
35 | \keyword{WHIP}
36 | \keyword{Walks}
37 | \keyword{per}
38 | \keyword{plus}
39 |
--------------------------------------------------------------------------------
/R/fielding2016.R:
--------------------------------------------------------------------------------
1 | #' A sample subset of the Fielding table from the Baseball Databank for the year 2016.
2 | #'
3 | #' A dataset containing Fielding statistics in 2016.
4 | #'
5 | #' @format A data frame with 1953 rows and 18 variables:
6 | #' \describe{
7 | #' \item{playerID}{database key for unique player}
8 | #' \item{yearID}{year}
9 | #' \item{stint}{number of times played on team in a single year}
10 | #' \item{teamID}{database key for unique team}
11 | #' \item{lgID}{database key for unique league}
12 | #' \item{POS}{primary position}
13 | #' \item{G}{number of games played}
14 | #' \item{GS}{number of games started}
15 | #' \item{InnOuts}{number of outs played in field}
16 | #' \item{PO}{number of putouts}
17 | #' \item{A}{number of assists}
18 | #' \item{E}{number of home errors}
19 | #' \item{DP}{number of double plays}
20 | #' \item{PB}{number of passed balls by catchers}
21 | #' \item{WP}{number of wild pitches by catchers}
22 | #' \item{SB}{opponent stolen bases by catchers}
23 | #' \item{CS}{opponents caught stealing by catchers}
24 | #' \item{ZR}{zone rating}
25 |
26 | #' }
27 | #' @docType data
28 | #' @keywords internal
29 | #' @usage data(Fielding2016)
30 | #' @note Last updated 2016-06-15
31 | "Fielding2016"
32 |
--------------------------------------------------------------------------------
/man/TBs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{TBs}
4 | \alias{TBs}
5 | \title{Batting: Calculate a batter's total bases}
6 | \usage{
7 | TBs(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find total bases.
15 | Required fields from the batting table are "AB","H", "X2B", "X3B" and "HR."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$TBs <- TBs(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
30 | \code{\link{OPS}}, \code{\link{PA}},
31 | \code{\link{RC2002}}, \code{\link{RCbasic}},
32 | \code{\link{RCtech}}, \code{\link{SLG}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{TBs}
38 | \keyword{bases}
39 | \keyword{total}
40 |
--------------------------------------------------------------------------------
/man/CTpct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{CTpct}
4 | \alias{CTpct}
5 | \title{Batting: Calculate a batter's contact rate}
6 | \usage{
7 | CTpct(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the contact rate for batters.
15 | Required fields from the batting table are "AB" and "SO."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$CTpct <- CTpct(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{HRpct}}, \code{\link{ISO}},
29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{CTpct}
38 | \keyword{contact}
39 | \keyword{rate}
40 |
--------------------------------------------------------------------------------
/man/ISO.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{ISO}
4 | \alias{ISO}
5 | \title{Batting: Calculate ISO for batters}
6 | \usage{
7 | ISO(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find isolated power (ISO) for batters with more than zero at bats.
15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$ISO <- ISO(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{ISO}
38 | \keyword{isolated}
39 | \keyword{power}
40 |
--------------------------------------------------------------------------------
/man/OBP.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{OBP}
4 | \alias{OBP}
5 | \title{Batting: Calculate on base percentage (OBP)}
6 | \usage{
7 | OBP(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the OBP for batters with more than zero hits.
15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$OBP <- OBP(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OPS}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{OBP}
38 | \keyword{base}
39 | \keyword{on}
40 | \keyword{percentage}
41 |
--------------------------------------------------------------------------------
/man/SLG.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{SLG}
4 | \alias{SLG}
5 | \title{Batting: Calculate slugging percentage (SLG)}
6 | \usage{
7 | SLG(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the SLG for batters with more than zero hits.
15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$SLG <- SLG(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
30 | \code{\link{OPS}}, \code{\link{PA}},
31 | \code{\link{RC2002}}, \code{\link{RCbasic}},
32 | \code{\link{RCtech}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{SLG}
38 | \keyword{base}
39 | \keyword{on}
40 | \keyword{percentage}
41 |
--------------------------------------------------------------------------------
/man/PA.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{PA}
4 | \alias{PA}
5 | \title{Batting: Calculate plate appearances for batters}
6 | \usage{
7 | PA(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the plate appearances (PA) for batters.
15 | Required fields from the batting table are "AB", "BB", "HBP", "SH", and "SF."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$PA <- PA(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
30 | \code{\link{OPS}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{PA}
38 | \keyword{base}
39 | \keyword{on}
40 | \keyword{percentage}
41 |
--------------------------------------------------------------------------------
/man/HRpct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{HRpct}
4 | \alias{HRpct}
5 | \title{Batting: Calculate home run percentage}
6 | \usage{
7 | HRpct(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find home run percentage for batters with more than zero at bats.
15 | Required fields from the Batting table are "AB" and "HR."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$HRpct <- HRpct(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{ISO}},
29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{HRpct}
38 | \keyword{home}
39 | \keyword{percentage}
40 | \keyword{run}
41 |
--------------------------------------------------------------------------------
/man/Kpct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{Kpct}
4 | \alias{Kpct}
5 | \title{Batting: Calculate strikeout percentage}
6 | \usage{
7 | Kpct(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find strikeout percentage for batters with more than zero at bats.
15 | Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$Kpct <- Kpct(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{OBP}}, \code{\link{OPS}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{Kpct}
38 | \keyword{percentage}
39 | \keyword{strikeout}
40 |
--------------------------------------------------------------------------------
/man/BA.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{BA}
4 | \alias{BA}
5 | \title{Batting: Calculate batting average}
6 | \usage{
7 | BA(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find batting average for batters with more than zero at bats.
15 | Required fields from the Batting table are; "AB", and "H."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$BA <- BA(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BBpct}}, \code{\link{CTpct}},
28 | \code{\link{HRpct}}, \code{\link{ISO}},
29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{BA}
38 | \keyword{ball}
39 | \keyword{base}
40 | \keyword{bb}
41 | \keyword{on}
42 | \keyword{percentage}
43 |
--------------------------------------------------------------------------------
/man/Fielding2016.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fielding2016.R
3 | \docType{data}
4 | \name{Fielding2016}
5 | \alias{Fielding2016}
6 | \title{A sample subset of the Fielding table from the Baseball Databank for the year 2016.}
7 | \format{A data frame with 1953 rows and 18 variables:
8 | \describe{
9 | \item{playerID}{database key for unique player}
10 | \item{yearID}{year}
11 | \item{stint}{number of times played on team in a single year}
12 | \item{teamID}{database key for unique team}
13 | \item{lgID}{database key for unique league}
14 | \item{POS}{primary position}
15 | \item{G}{number of games played}
16 | \item{GS}{number of games started}
17 | \item{InnOuts}{number of outs played in field}
18 | \item{PO}{number of putouts}
19 | \item{A}{number of assists}
20 | \item{E}{number of home errors}
21 | \item{DP}{number of double plays}
22 | \item{PB}{number of passed balls by catchers}
23 | \item{WP}{number of wild pitches by catchers}
24 | \item{SB}{opponent stolen bases by catchers}
25 | \item{CS}{opponents caught stealing by catchers}
26 | \item{ZR}{zone rating}
27 | }}
28 | \usage{
29 | data(Fielding2016)
30 | }
31 | \description{
32 | A dataset containing Fielding statistics in 2016.
33 | }
34 | \note{
35 | Last updated 2016-06-15
36 | }
37 | \keyword{internal}
38 |
--------------------------------------------------------------------------------
/man/OPS.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{OPS}
4 | \alias{OPS}
5 | \title{Batting: Calculate on base percentage plus slugging (OPS)}
6 | \usage{
7 | OPS(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the OPS for batters with more than zero hits.
15 | Required fields from the batting table are "H", "X2B", "X3B", "HR", "BB", "HBP", "AB" and "SF."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$OPS <- OPS(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{OPS}
38 | \keyword{base}
39 | \keyword{on}
40 | \keyword{percentage}
41 |
--------------------------------------------------------------------------------
/man/XBperH.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{XBperH}
4 | \alias{XBperH}
5 | \title{Batting: Calculate extra base per hit}
6 | \usage{
7 | XBperH(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the average extra bases per hit for batters with more than zero hits.
15 | Required fields from the batting table are "H", "X2B", "X3B", "HR"."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$XBperH <- XBperH(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
30 | \code{\link{OPS}}, \code{\link{PA}},
31 | \code{\link{RC2002}}, \code{\link{RCbasic}},
32 | \code{\link{RCtech}}, \code{\link{SLG}},
33 | \code{\link{TBs}}, \code{\link{XBHpct}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{XBperH}
38 | \keyword{base}
39 | \keyword{extra}
40 | \keyword{hit}
41 | \keyword{per}
42 |
--------------------------------------------------------------------------------
/R/seasonAVG.R:
--------------------------------------------------------------------------------
1 | #' League averages and aggregates from 1901 to present.
2 | #'
3 | #' A dataset containing combined aggregates and averages for all of the
4 | #' MLB. These stats are primarily used in calculated advanced player stats.
5 | #'
6 | #' @format A data frame with 115 rows and 20 variables:
7 | #' \describe{
8 | #' \item{yearID}{year}
9 | #' \item{tot_G}{total games played}
10 | #' \item{tot_PA}{total plate appearances}
11 | #' \item{tot_HR}{total home runs}
12 | #' \item{tot_R}{total runs scored}
13 | #' \item{tot_RBI}{total runs batted in}
14 | #' \item{tot_SB}{total stolen bases}
15 | #' \item{avg_BB}{mean base on ball percentage}
16 | #' \item{avg_K}{mean strikeout percentage}
17 | #' \item{avg_ISO}{mean isolated power}
18 | #' \item{avg_BABIP}{mean batting average on balls in play}
19 | #' \item{avg_BA}{mean batting average}
20 | #' \item{avg_OBP}{mean on base percentage}
21 | #' \item{avg_SLG}{mean slugging percentage}
22 | #' \item{avg_wOBA}{mean weighted on base average}
23 | #' \item{avg_wRC}{mean weighted runs created}
24 | #' \item{avg_BsR}{mean base running average}
25 | #' \item{off}{offense}
26 | #' \item{def}{defense}
27 | #' \item{avg_WAR}{mean wins above replacement}
28 | #' }
29 | #' @docType data
30 | #' @keywords internal
31 | #' @usage data(seasonAVG)
32 | #' @note Last updated 2016-09-21
33 | "seasonAVG"
34 |
--------------------------------------------------------------------------------
/man/XBHpct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{XBHpct}
4 | \alias{XBHpct}
5 | \title{Batting: Calculate extra base percentage}
6 | \usage{
7 | XBHpct(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find extra base percentage for batters with more than zero at bats.
15 | Required fields from the batting table are "AB", "BB", "HBP", "SF", "SH", "X2B", "X3B", "HR"."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$XBHpct <- XBHpct(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
30 | \code{\link{OPS}}, \code{\link{PA}},
31 | \code{\link{RC2002}}, \code{\link{RCbasic}},
32 | \code{\link{RCtech}}, \code{\link{SLG}},
33 | \code{\link{TBs}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{XBHpct}
38 | \keyword{base}
39 | \keyword{extra}
40 | \keyword{percentage}
41 |
--------------------------------------------------------------------------------
/man/get_bbdb.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_bbdb.R
3 | \name{get_bbdb}
4 | \alias{get_bbdb}
5 | \title{Get an up to date copy of the Baseball Databank.}
6 | \usage{
7 | get_bbdb(table = NULL, downloadZip = FALSE, AllTables = FALSE)
8 | }
9 | \arguments{
10 | \item{table}{The tables you would like to download. Uses Lahman table names Ex. "Batting", "Master", "AllstarFull", etc...
11 | If this argument is left as NULL, the function will download all twenty-seven tables.}
12 |
13 | \item{downloadZip}{If true, this will download a zip file of all twenty-seven tables in .csv format to your working directory.}
14 |
15 | \item{AllTables}{If true, this will download all the tables in the database. The default is set to false.}
16 | }
17 | \description{
18 | Download the newest version of the Baseball Databank from the Chadwick Bureau GitHub repository. This is the source of
19 | Sean Lahman's baseball database and is always under development. This function will read the .csv files and return them as data frames.
20 | There is also an option to download the entire directory.
21 | }
22 | \examples{
23 |
24 | get_bbdb(table = "Batting")
25 |
26 | \dontrun{
27 | get_bbdb(table = c("Batting", "Pitching"))
28 | }
29 |
30 | \dontrun{
31 | get_bbdb(AllTables = TRUE)
32 | }
33 | }
34 | \keyword{data}
35 | \keyword{database,}
36 | \keyword{frame}
37 |
--------------------------------------------------------------------------------
/man/BABIP.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{BABIP}
4 | \alias{BABIP}
5 | \title{Batting: Calculate batting average on balls in play (BABIP)}
6 | \usage{
7 | BABIP(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find BABIP for batters with more than zero at bats.
15 | Required fields from the Batting table are; "AB", "BB", "H", "HBP", "SF", "SH", "HR" and "SO."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$BABIP <- BABIP(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BA}},
27 | \code{\link{BBpct}}, \code{\link{CTpct}},
28 | \code{\link{HRpct}}, \code{\link{ISO}},
29 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}},
30 | \code{\link{PA}}, \code{\link{RC2002}},
31 | \code{\link{RCbasic}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{BABIP}
38 | \keyword{ball}
39 | \keyword{base}
40 | \keyword{bb}
41 | \keyword{on}
42 | \keyword{percentage}
43 |
--------------------------------------------------------------------------------
/R/batting2016.R:
--------------------------------------------------------------------------------
1 | #' A sample subset of the Batting table from the Baseball Databank for the year 2016.
2 | #'
3 | #' A dataset containing Batting statistics in 2016.
4 | #'
5 | #' @format A data frame with 1483 rows and 22 variables:
6 | #' \describe{
7 | #' \item{playerID}{database key for unique player}
8 | #' \item{yearID}{year}
9 | #' \item{stint}{number of times played on team in a single year}
10 | #' \item{teamID}{database key for unique team}
11 | #' \item{lgID}{database key for unique league}
12 | #' \item{G}{number of games played}
13 | #' \item{AB}{number of at bats}
14 | #' \item{R}{number of runs scored}
15 | #' \item{H}{number of hits}
16 | #' \item{X2B}{number of doubles}
17 | #' \item{X3B}{number of triples}
18 | #' \item{HR}{number of home runs}
19 | #' \item{RBI}{number of runs batted in}
20 | #' \item{SB}{number of stolen bases}
21 | #' \item{CS}{number of times caught stealing}
22 | #' \item{BB}{number of base on balls}
23 | #' \item{SO}{number of strike outs}
24 | #' \item{IBB}{number of intentional base on balls}
25 | #' \item{HBP}{number of times hit by pitch}
26 | #' \item{SH}{number of sacrifice hits}
27 | #' \item{SF}{number of sacrifice flys}
28 | #' \item{GIDP}{number of times grounded into a double play}
29 | #' }
30 | #' @docType data
31 | #' @keywords internal
32 | #' @usage data(Batting2016)
33 | #' @note Last updated 2016-06-15
34 | "Batting2016"
35 |
--------------------------------------------------------------------------------
/man/RCbasic.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{RCbasic}
4 | \alias{RCbasic}
5 | \title{Batting: Calculate Runs Created using the basic formula.}
6 | \usage{
7 | RCbasic(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find the runs created using the basic formula presented by Bill James in the late 1970s.
15 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", and "HR."
16 | }
17 | \examples{
18 |
19 | data("Batting2016")
20 | head(Batting2016)
21 |
22 | Batting2016$RCbasic <- RCbasic(Batting2016)
23 |
24 | }
25 | \seealso{
26 | Other Batting functions: \code{\link{BABIP}},
27 | \code{\link{BA}}, \code{\link{BBpct}},
28 | \code{\link{CTpct}}, \code{\link{HRpct}},
29 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
30 | \code{\link{OPS}}, \code{\link{PA}},
31 | \code{\link{RC2002}}, \code{\link{RCtech}},
32 | \code{\link{SLG}}, \code{\link{TBs}},
33 | \code{\link{XBHpct}}, \code{\link{XBperH}},
34 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
35 | }
36 | \concept{Batting functions}
37 | \keyword{RCbasic}
38 | \keyword{base}
39 | \keyword{extra}
40 | \keyword{hit}
41 | \keyword{per}
42 |
--------------------------------------------------------------------------------
/man/seasonAVG.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/seasonAVG.R
3 | \docType{data}
4 | \name{seasonAVG}
5 | \alias{seasonAVG}
6 | \title{League averages and aggregates from 1901 to present.}
7 | \format{A data frame with 115 rows and 20 variables:
8 | \describe{
9 | \item{yearID}{year}
10 | \item{tot_G}{total games played}
11 | \item{tot_PA}{total plate appearances}
12 | \item{tot_HR}{total home runs}
13 | \item{tot_R}{total runs scored}
14 | \item{tot_RBI}{total runs batted in}
15 | \item{tot_SB}{total stolen bases}
16 | \item{avg_BB}{mean base on ball percentage}
17 | \item{avg_K}{mean strikeout percentage}
18 | \item{avg_ISO}{mean isolated power}
19 | \item{avg_BABIP}{mean batting average on balls in play}
20 | \item{avg_BA}{mean batting average}
21 | \item{avg_OBP}{mean on base percentage}
22 | \item{avg_SLG}{mean slugging percentage}
23 | \item{avg_wOBA}{mean weighted on base average}
24 | \item{avg_wRC}{mean weighted runs created}
25 | \item{avg_BsR}{mean base running average}
26 | \item{off}{offense}
27 | \item{def}{defense}
28 | \item{avg_WAR}{mean wins above replacement}
29 | }}
30 | \usage{
31 | data(seasonAVG)
32 | }
33 | \description{
34 | A dataset containing combined aggregates and averages for all of the
35 | MLB. These stats are primarily used in calculated advanced player stats.
36 | }
37 | \note{
38 | Last updated 2016-09-21
39 | }
40 | \keyword{internal}
41 |
--------------------------------------------------------------------------------
/man/BBpct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{BBpct}
4 | \alias{BBpct}
5 | \title{Batting: Calculate base on ball percentage}
6 | \usage{
7 | BBpct(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | Find base on ball percentage for batters with more than zero at bats.
15 | Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH."
16 | Intentional base on balls (IBB) is added for the years that metric is available.
17 | }
18 | \examples{
19 |
20 | data("Batting2016")
21 | head(Batting2016)
22 |
23 | Batting2016$BBpct <- BBpct(Batting2016)
24 |
25 | }
26 | \seealso{
27 | Other Batting functions: \code{\link{BABIP}},
28 | \code{\link{BA}}, \code{\link{CTpct}},
29 | \code{\link{HRpct}}, \code{\link{ISO}},
30 | \code{\link{Kpct}}, \code{\link{OBP}}, \code{\link{OPS}},
31 | \code{\link{PA}}, \code{\link{RC2002}},
32 | \code{\link{RCbasic}}, \code{\link{RCtech}},
33 | \code{\link{SLG}}, \code{\link{TBs}},
34 | \code{\link{XBHpct}}, \code{\link{XBperH}},
35 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
36 | }
37 | \concept{Batting functions}
38 | \keyword{BBpct}
39 | \keyword{ball}
40 | \keyword{base}
41 | \keyword{bb}
42 | \keyword{on}
43 | \keyword{percentage}
44 |
--------------------------------------------------------------------------------
/data-raw/data_prep.R:
--------------------------------------------------------------------------------
1 | # Prep seasonAVG data set.
2 | seasonAVG <- read.csv("~/Downloads/seasonAVG.csv")
3 | # These data come from Fangraphs
4 | # http://www.fangraphs.com/leaders.aspx?pos=all&stats=bat&lg=all&qual=0&type=8&season=2015&month=0&season1=1901&ind=0&team=0,ss&rost=0&age=0&filter=&players=0
5 |
6 | # Check to make sure new columns line up with the old. May need to adjust colnames if they don't match.
7 | colnames(seasonAVG) <- c("yearID", "tot_G", "tot_PA", "tot_HR", "tot_R", "tot_RBI", "tot_SB", "avg_BB",
8 | "avg_K", "avg_ISO", "avg_BABIP", "avg_BA", "avg_OBP", "avg_SLG", "avg_wOBA",
9 | "avg_wRC", "avg_BsR", "off", "def", "avg_WAR")
10 | names(seasonAVG)
11 | devtools::use_data(seasonAVG, overwrite = TRUE)
12 | rm(seasonAVG)
13 |
14 |
15 | # Get example Batting, Pitching, and Fielding tables to be used in package testing to avoid long test times.
16 | library(baseballDBR)
17 | library(dplyr)
18 |
19 | get_bbdb("Batting")
20 | Batting2016 <- subset(Batting, yearID == "2016")
21 | devtools::use_data(Batting2016, overwrite = TRUE)
22 | rm(Batting2016)
23 | rm(Batting)
24 |
25 | get_bbdb("Pitching")
26 | Pitching2016 <- subset(Pitching, yearID == "2016")
27 | devtools::use_data(Pitching2016, overwrite = TRUE)
28 | rm(Pitching2016)
29 | rm(Pitching)
30 |
31 | get_bbdb("Fielding")
32 | Fielding2016 <- subset(Fielding, yearID == "2016")
33 | devtools::use_data(Fielding2016, overwrite = TRUE)
34 | rm(Fielding2016)
35 | rm(Fielding)
36 |
--------------------------------------------------------------------------------
/man/Batting2016.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/batting2016.R
3 | \docType{data}
4 | \name{Batting2016}
5 | \alias{Batting2016}
6 | \title{A sample subset of the Batting table from the Baseball Databank for the year 2016.}
7 | \format{A data frame with 1483 rows and 22 variables:
8 | \describe{
9 | \item{playerID}{database key for unique player}
10 | \item{yearID}{year}
11 | \item{stint}{number of times played on team in a single year}
12 | \item{teamID}{database key for unique team}
13 | \item{lgID}{database key for unique league}
14 | \item{G}{number of games played}
15 | \item{AB}{number of at bats}
16 | \item{R}{number of runs scored}
17 | \item{H}{number of hits}
18 | \item{X2B}{number of doubles}
19 | \item{X3B}{number of triples}
20 | \item{HR}{number of home runs}
21 | \item{RBI}{number of runs batted in}
22 | \item{SB}{number of stolen bases}
23 | \item{CS}{number of times caught stealing}
24 | \item{BB}{number of base on balls}
25 | \item{SO}{number of strike outs}
26 | \item{IBB}{number of intentional base on balls}
27 | \item{HBP}{number of times hit by pitch}
28 | \item{SH}{number of sacrifice hits}
29 | \item{SF}{number of sacrifice flys}
30 | \item{GIDP}{number of times grounded into a double play}
31 | }}
32 | \usage{
33 | data(Batting2016)
34 | }
35 | \description{
36 | A dataset containing Batting statistics in 2016.
37 | }
38 | \note{
39 | Last updated 2016-06-15
40 | }
41 | \keyword{internal}
42 |
--------------------------------------------------------------------------------
/man/RCtech.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{RCtech}
4 | \alias{RCtech}
5 | \title{Batting: Calculate Runs Created using the technical formula.}
6 | \usage{
7 | RCtech(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | The "Technical Version" is the most well-known formula for RC. It adds several factors to the
15 | basic formula such as sacrifice hits, stolen bases and intentional base on balls.
16 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS",
17 | "SF" and "SH," and "IBB."
18 | }
19 | \examples{
20 |
21 | data("Batting2016")
22 | head(Batting2016)
23 |
24 | Batting2016$RCtech <- RCtech(Batting2016)
25 |
26 | }
27 | \seealso{
28 | Other Batting functions: \code{\link{BABIP}},
29 | \code{\link{BA}}, \code{\link{BBpct}},
30 | \code{\link{CTpct}}, \code{\link{HRpct}},
31 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
32 | \code{\link{OPS}}, \code{\link{PA}},
33 | \code{\link{RC2002}}, \code{\link{RCbasic}},
34 | \code{\link{SLG}}, \code{\link{TBs}},
35 | \code{\link{XBHpct}}, \code{\link{XBperH}},
36 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
37 | }
38 | \concept{Batting functions}
39 | \keyword{RCtech}
40 | \keyword{base}
41 | \keyword{extra}
42 | \keyword{hit}
43 | \keyword{per}
44 |
--------------------------------------------------------------------------------
/man/RC2002.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{RC2002}
4 | \alias{RC2002}
5 | \title{Batting: Calculate Runs Created using the updated 2002 formula.}
6 | \usage{
7 | RC2002(dat = NULL)
8 | }
9 | \arguments{
10 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
11 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
12 | }
13 | \description{
14 | The "2002 Version" is an updated version of the "Technical Version" by Bill James.
15 | The 2002 RC uses the same counting stats as the Technical Version but applies weights to many of the raw stats.
16 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS",
17 | "SF" and "SH," "SO", and "IBB."
18 | }
19 | \examples{
20 |
21 | data("Batting2016")
22 | head(Batting2016)
23 |
24 | Batting2016$RC2002 <- RC2002(Batting2016)
25 |
26 | }
27 | \seealso{
28 | Other Batting functions: \code{\link{BABIP}},
29 | \code{\link{BA}}, \code{\link{BBpct}},
30 | \code{\link{CTpct}}, \code{\link{HRpct}},
31 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
32 | \code{\link{OPS}}, \code{\link{PA}},
33 | \code{\link{RCbasic}}, \code{\link{RCtech}},
34 | \code{\link{SLG}}, \code{\link{TBs}},
35 | \code{\link{XBHpct}}, \code{\link{XBperH}},
36 | \code{\link{wOBA}}, \code{\link{wRAA}}, \code{\link{wRC}}
37 | }
38 | \concept{Batting functions}
39 | \keyword{RC2002}
40 | \keyword{base}
41 | \keyword{extra}
42 | \keyword{hit}
43 | \keyword{per}
44 |
--------------------------------------------------------------------------------
/man/fip_values.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fip.R
3 | \name{fip_values}
4 | \alias{fip_values}
5 | \title{Return FIP constants per season}
6 | \usage{
7 | fip_values(dat = NULL, Sep.Leagues = FALSE, Fangraphs = FALSE)
8 | }
9 | \arguments{
10 | \item{dat}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
11 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
12 |
13 | \item{Sep.Leagues}{If TRUE, this will split the calculation and return unique FIP constants for the various leagues. This can be
14 | helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues.}
15 |
16 | \item{Fangraphs}{If TRUE the function will return the Fangraphs FIP constants. This can not be used in conjunction with the
17 | \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league.}
18 | }
19 | \description{
20 | Get fip constants for each season. By default the function uses a method adapted from
21 | Tom Tango and used by Fangraphs. The function returns FIP constants based on ERA \code{FIP_ERA} as well as constants based on RA \code{FIP_RA}.
22 | Both the Tango and Frangraphs formulas use ERA for their FIP constants.
23 | }
24 | \examples{
25 |
26 | data("Pitching2016")
27 | head(Pitching2016)
28 |
29 | fip_df <- fip_values(Pitching2016, Fangraphs=FALSE)
30 | head(fip_df)
31 |
32 |
33 | }
34 | \keyword{average,}
35 | \keyword{base}
36 | \keyword{fangraphs}
37 | \keyword{on}
38 | \keyword{wOBA,}
39 | \keyword{woba,}
40 |
--------------------------------------------------------------------------------
/R/pitching2016.R:
--------------------------------------------------------------------------------
1 | #' A sample subset of the Pitching table from the Baseball Databank for the year 2016.
2 | #'
3 | #' A dataset containing Pitching statistics in 2016.
4 | #'
5 | #' @format A data frame with 824 rows and 30 variables:
6 | #' \describe{
7 | #' \item{playerID}{database key for unique player}
8 | #' \item{yearID}{year}
9 | #' \item{stint}{number of times played on team in a single year}
10 | #' \item{teamID}{database key for unique team}
11 | #' \item{lgID}{database key for unique league}
12 | #' \item{W}{number of games won}
13 | #' \item{L}{number of games lost}
14 | #' \item{G}{number of games played}
15 | #' \item{GS}{number of games started}
16 | #' \item{CG}{number of complete games}
17 | #' \item{SHO}{number of shutouts}
18 | #' \item{SV}{number of home saves}
19 | #' \item{IPouts}{number of outs pitched}
20 | #' \item{H}{number of hits surrendered}
21 | #' \item{ER}{number of earned runs}
22 | #' \item{HR}{number of home runs surrendered}
23 | #' \item{BB}{number of base on balls}
24 | #' \item{SO}{number of strike outs}
25 | #' \item{BAOpp}{opponent batting average}
26 | #' \item{ERA}{earned run average}
27 | #' \item{IBB}{number of intentional base on balls}
28 | #' \item{WP}{number of wild pitches}
29 | #' \item{HBP}{number of hit batters by pitch}
30 | #' \item{BK}{number of balks}
31 | #' \item{BFP}{batters faced by pitcher}
32 | #' \item{GF}{Games finished}
33 | #' \item{R}{number of runs allowed}
34 | #' \item{SH}{number of sacrifice hits by opposing batters}
35 | #' \item{SF}{number of sacrifice flys by opposing batters}
36 | #' \item{GIDP}{number of grounded into double plays by opposing batters}
37 | #'
38 | #' }
39 | #' @docType data
40 | #' @keywords internal
41 | #' @usage data(Pitching2016)
42 | #' @note Last updated 2016-06-15
43 | "Pitching2016"
44 |
--------------------------------------------------------------------------------
/man/Pitching2016.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitching2016.R
3 | \docType{data}
4 | \name{Pitching2016}
5 | \alias{Pitching2016}
6 | \title{A sample subset of the Pitching table from the Baseball Databank for the year 2016.}
7 | \format{A data frame with 824 rows and 30 variables:
8 | \describe{
9 | \item{playerID}{database key for unique player}
10 | \item{yearID}{year}
11 | \item{stint}{number of times played on team in a single year}
12 | \item{teamID}{database key for unique team}
13 | \item{lgID}{database key for unique league}
14 | \item{W}{number of games won}
15 | \item{L}{number of games lost}
16 | \item{G}{number of games played}
17 | \item{GS}{number of games started}
18 | \item{CG}{number of complete games}
19 | \item{SHO}{number of shutouts}
20 | \item{SV}{number of home saves}
21 | \item{IPouts}{number of outs pitched}
22 | \item{H}{number of hits surrendered}
23 | \item{ER}{number of earned runs}
24 | \item{HR}{number of home runs surrendered}
25 | \item{BB}{number of base on balls}
26 | \item{SO}{number of strike outs}
27 | \item{BAOpp}{opponent batting average}
28 | \item{ERA}{earned run average}
29 | \item{IBB}{number of intentional base on balls}
30 | \item{WP}{number of wild pitches}
31 | \item{HBP}{number of hit batters by pitch}
32 | \item{BK}{number of balks}
33 | \item{BFP}{batters faced by pitcher}
34 | \item{GF}{Games finished}
35 | \item{R}{number of runs allowed}
36 | \item{SH}{number of sacrifice hits by opposing batters}
37 | \item{SF}{number of sacrifice flys by opposing batters}
38 | \item{GIDP}{number of grounded into double plays by opposing batters}
39 |
40 | }}
41 | \usage{
42 | data(Pitching2016)
43 | }
44 | \description{
45 | A dataset containing Pitching statistics in 2016.
46 | }
47 | \note{
48 | Last updated 2016-06-15
49 | }
50 | \keyword{internal}
51 |
--------------------------------------------------------------------------------
/R/fieldingStats.R:
--------------------------------------------------------------------------------
1 |
2 | #' @title Fielding: Calculate defensive chances
3 | #' @description The number of chances a player had to make a defensive play.
4 | #' Required fields from the Fielding table are; "PO", "A", and "E."
5 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
6 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
7 | #' @keywords Ch Defensive Chances
8 | #' @family Fielding functions
9 | #' @export Ch
10 | #' @examples
11 | #'
12 | #' data("Fielding2016")
13 | #' head(Fielding2016)
14 | #'
15 | #' Fielding2016$Ch <- Ch(Fielding2016)
16 | #'
17 | Ch <- function (dat=NULL){
18 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
19 |
20 | if(!all(c("PO", "A", "E") %in% colnames(dat))) {
21 | message("Not enough data to calculate. Please make sure your data inclueds 'PO', 'A' and 'E'")
22 | }
23 | Ch <- dat$A + dat$PO + dat$E
24 | return(Ch)
25 | }
26 |
27 |
28 |
29 | #' @title Fielding: Calculate batting average
30 | #' @description Find batting average for batters with more than zero at bats.
31 | #' Required fields from the Fielding table are; "PO", "A", and "E."
32 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
33 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
34 | #' @keywords Fld_pct fielding percentage
35 | #' @family Fielding functions
36 | #' @export Fld_pct
37 | #' @examples
38 | #'
39 | #' data("Fielding2016")
40 | #' head(Fielding2016)
41 | #'
42 | #' Fielding2016$Fld_pct <- Fld_pct(Fielding2016)
43 | #'
44 | Fld_pct <- function (dat=NULL){
45 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
46 |
47 | if(!all(c("PO", "A", "E") %in% colnames(dat))) {
48 | message("Not enough data to calculate. Please make sure your data inclueds 'PO', 'A' and 'E'")
49 | }
50 | ifelse(dat$PO+dat$A+dat$E > 0, Fld_pct <- round(((dat$PO + dat$A) / (dat$PO + dat$A + dat$E)), 3), NA)
51 | return(Fld_pct)
52 | }
53 |
--------------------------------------------------------------------------------
/man/FIP.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pitchingStats.R
3 | \name{FIP}
4 | \alias{FIP}
5 | \title{Pitching: Fielding Independent Pitching (FIP)}
6 | \usage{
7 | FIP(dat = NULL, Fangraphs = FALSE, NA_to_zero = TRUE,
8 | Sep.Leagues = FALSE)
9 | }
10 | \arguments{
11 | \item{dat}{A data frame you would wish to calculate. The data frame must have the same column names found in
12 | The \code{Lahman} package or the Chadwick Bureau GitHub repository.}
13 |
14 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal
15 | formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and
16 | does not require an external download. If not specified, the default is set to FALSE.}
17 |
18 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
19 | were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954.
20 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
21 | If FALSE, the wOBA calculation will return NaN for years with missing data.}
22 |
23 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
24 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
25 | park factors between the American and National leagues. The default for this argument is FALSE.}
26 | }
27 | \description{
28 | Find the FIP for all pitchers with one or strike outs in a particular season.
29 | Required fields from the Pitching table are "BB", "HBP", "SO", and "IPouts."
30 | }
31 | \examples{
32 |
33 | data("Pitching2016")
34 | head(Pitching2016)
35 |
36 | Pitching2016$FIP <- FIP(Pitching2016, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE)
37 |
38 | }
39 | \seealso{
40 | Other Pitching functions: \code{\link{BB_9}},
41 | \code{\link{HR_9}}, \code{\link{H_9}}, \code{\link{IP}},
42 | \code{\link{K_9}}, \code{\link{LOB_pct}},
43 | \code{\link{WHIP}}
44 | }
45 | \concept{Pitching functions}
46 | \keyword{FIP}
47 | \keyword{fielding}
48 | \keyword{independent}
49 | \keyword{pitching}
50 |
--------------------------------------------------------------------------------
/man/wOBA_values.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/woba_values.R
3 | \name{wOBA_values}
4 | \alias{wOBA_values}
5 | \title{Return wOBA values per season}
6 | \usage{
7 | wOBA_values(BattingTable, PitchingTable, FieldingTable,
8 | Sep.Leagues = FALSE, Fangraphs = FALSE)
9 | }
10 | \arguments{
11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
13 |
14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
16 |
17 | \item{FieldingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
19 |
20 | \item{Sep.Leagues}{If TRUE, this will split the calculation and return unique wOBA values for the various leagues. This can be
21 | helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues.}
22 |
23 | \item{Fangraphs}{if TRUE the function will return the Fangraphs wOBA values. By default the function uses a method adapted from
24 | Tom Tango. These values are often very close to Fangraphs, but are not the same due to Fangraphs using a different algorithm.
25 | This can not be used in conjunction with the \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league.}
26 | }
27 | \description{
28 | Get wOBA values for each year in your database. This calculation requires all fields of
29 | the Pitching, Fielding and Batting tables from the Lahman package, or a comparable data set. The function uses
30 | a version of Tom Tango's wOBA formula by default, but can also return Fangraphs wOBA values.
31 | }
32 | \examples{
33 |
34 | data("Batting2016")
35 | head(Batting2016)
36 | data("Pitching2016")
37 | head(Pitching2016)
38 | data("Fielding2016")
39 | head(Fielding2016)
40 |
41 | woba_df <- wOBA_values(Batting2016, Pitching2016, Fielding2016, Sep.Leagues=FALSE, Fangraphs=FALSE)
42 |
43 | }
44 | \keyword{average,}
45 | \keyword{base}
46 | \keyword{fangraphs}
47 | \keyword{on}
48 | \keyword{wOBA,}
49 | \keyword{woba,}
50 |
--------------------------------------------------------------------------------
/vignettes/FIP.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Fielding Independent Pitching (FIP)"
3 | author: "Kris Eberwein"
4 | date: "`r Sys.Date()`"
5 | output: rmarkdown::html_vignette
6 | vignette: >
7 | %\VignetteIndexEntry{Fielding Independent Pitching (FIP)}
8 | %\VignetteEngine{knitr::rmarkdown}
9 | %\VignetteEncoding{UTF-8}
10 | ---
11 |
12 | The `baseballDBR` package provides two functions for fielding independent pitching (FIP), the `FIP()` function, which calculates the metric for players in the "Pitching" table, and the `fip_values()` function, which calculates the league averages and constraints used in the calculation.
13 |
14 | The FIP metric attempts to estimate a pitcher's performance independent of the performance of the other players on the defense. The goal is similar to that of earned run average (ERA), but unlike ERA, FIP removes the variable of a pitcher's "luck" from plays made by other defenders.
15 |
16 | Like wOBA, FIP is a weighted metric that relies on a system of linear weights and league averages. The formula for FIP is:
17 |
18 | $$\frac{13*HR + 3(BB + HBP) - 2*SO}{IP} + FIP constant$$
19 |
20 | The above formula is based off of work by Tom Tango, and is similar to the DIPS metric by Voros McCracken. It looks at only the things a pitching *can* control such as hit-by-pitch, base-on-ball, strikeouts, and home runs and weights them accordingly.
21 |
22 | ## The FIP Constant
23 |
24 | The FIP constant is used in the FIP calculation solely to bring the number onto the ERA scale, and make it more familiar to users. The wOBA constant works in much the same way to bring wOBA closer to OBP. The formula for the FIP constant is:
25 |
26 | $$\frac{lgERA - (HR*13) + (BB + IBB + HBP - IBB)3 - (SO*2)}{IP}$$
27 |
28 | ```{r, eval=FALSE}
29 | library(baseballDBR)
30 |
31 | # Grab the Pitching table from the Baseabll Databank
32 | get_bbdb(table = "Pitching")
33 |
34 | Pitching$FIP <- FIP(Pitching, Fangraphs=FALSE, NA_to_zero=FALSE, Sep.Leagues=FALSE)
35 |
36 |
37 | ```
38 |
39 |
40 | ### Arguments
41 |
42 | Unlike the `wOBA()` functions, which require three data frames, the `FIP()` function only requires the "Pitching" data frame.
43 |
44 | * Fan graphs: Should the function use the package's native algorithm or download the FIP constraints from Fangraphs? The default is FALSE, as the results will likely be identical.
45 |
46 | * Sep.Leagues: Should the function determine separate FIP values for the National and American leagues. Standard practice would be to use FIP values that combine both leagues. Note, this function is not possible if `Fangraphs=TRUE` as Fangraphs does not separate leagues.
47 |
48 | * NA_to_zero: Should the function apply `0` to statistics that may not have been counted? For example, Cy Young's intentional base-on-balls `IBB` metric is NA because that statistic wasn't tracked when he played, so his `FIPS` should be NA. Note, that it is a statistically unsound practice to set NAs to zero. However, the authors of this package recognize the desire to compare past players to current players.
49 |
50 | ### Fip Values
51 |
52 | For deeper analysis, the `fip_values()` function allows us to look at the league averages that are the underpinnings of the FIP calculation.
53 |
54 | ```{r, eval=FALSE}
55 | library(baseballDBR)
56 |
57 | fip_valz <- fip_values(Pitching, Fangraphs = FALSE, Sep.Leagues = FALSE)
58 | ```
59 |
60 |
--------------------------------------------------------------------------------
/vignettes/wRAA_wRC.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Weighted Runs Above Average (wRAA) and Weighted Runs Created (wRC)"
3 | author: "Kris Eberwein"
4 | date: "`r Sys.Date()`"
5 | output: rmarkdown::html_vignette
6 | vignette: >
7 | %\VignetteIndexEntry{Weighted Runs Above Average (wRAA) and Weighted Runs Created (wRC)}
8 | %\VignetteEngine{knitr::rmarkdown}
9 | %\VignetteEncoding{UTF-8}
10 | ---
11 |
12 | # Weighted Runs Above Average (wRAA)
13 |
14 | The wRAA metric attempts to establish an average of runs scored by all the players in the league and rate a single player as how many runs, above or below average, that player scored in a given year. The baseline of the metric is zero, so some players may have a negative wRAA.
15 |
16 | Despite having all the characteristics of a "counting statistic," wRAA is based on wOBA and the wOBA scale, which rely on linear weights, so wRAA could certainly be considered a predictive metric.
17 |
18 | The formula for wRAA is:
19 |
20 | $$\frac{wOBA - leagueWOBA}{wOBA scale} * (AB+BB-IBB+SF+HBP) = PA$$
21 |
22 | The multiplier to the right is a formula for plate appearances, which differs from at-bats. Note that, SHs and IBBs are not counted in the `PA` formula because they are largely regarded as managerial decisions. For more information on wOBA, league wOBA, and wOBA scale; please see the `wOBA` vignette.
23 |
24 | The wRAA metric is also used to calculate wins above replacement (WAR.)
25 |
26 | Since wRAA relies on wOBA coefficients, we need three tables to make the calculation; `Batting`, `Pitching`, and `Fielding`. We use all three tables in order to determine a player's primary position. More on this can be found in the wOBA vignette.
27 |
28 | ```{r, eval=FALSE}
29 | library(baseballDBR)
30 | # Load data from Baseball Databank
31 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
32 |
33 | Batting$wRAA <- wRAA(Batting, Pitching, Fielding, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE)
34 | ```
35 |
36 | ### Arguments
37 |
38 | * Fangraphs: Should the function use Fangraphs wOBA values or the package's native Tango method?
39 |
40 | * NA_to_zero: Should the function apply `0` to statistics that may not have been counted. For example, Babe Ruth's sacrifice fly `SF` metric is NA because that statistic wasn't tracked when he played, so his `wRAA` should be NA. Note, that it is a statistically unsound practice to set NAs to zero. However, the authors of this package recognize the desire to compare past players to current players.
41 |
42 | * Sep.Leagues: Should the function determine separate wOBA values for the National and American leagues. Standard practice would be to use wOBA values that combine both leagues. Note, this function is not possible if `Fangraphs=TRUE`.
43 |
44 | # Weighted Runs Created (wRC)
45 |
46 | The wRC metric attempts to quantify a player’s total offensive value and measure it by runs that player creates. The wRC metric is based the "Runs Created" metric that was originally used by Bill James.
47 |
48 | wRC requires the same data and accepts the same arguments as the `wRAA()` and `wOBA()` functions.
49 |
50 | ```{r, eval=FALSE}
51 | library(baseballDBR)
52 | # Load data from Baseball Databank
53 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
54 |
55 | Batting$wRC <- wRC(Batting, Pitching, Fielding, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE)
56 | ```
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/man/wOBA.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{wOBA}
4 | \alias{wOBA}
5 | \title{Batting: Calculate Weighted On-Base Average (wOBA)}
6 | \usage{
7 | wOBA(BattingTable = NULL, PitchingTable = NULL, FieldingTable = NULL,
8 | Fangraphs = FALSE, NA_to_zero = TRUE, Sep.Leagues = FALSE)
9 | }
10 | \arguments{
11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
13 |
14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
16 |
17 | \item{FieldingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
19 |
20 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal
21 | formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and
22 | does not require an external download. If not specified, the default is set to FALSE.}
23 |
24 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
25 | were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954.
26 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
27 | If FALSE, the wOBA calculation will return NaN for years with missing data.}
28 |
29 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
30 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
31 | park factors between the American and National leagues. The default for this argument is FALSE.}
32 | }
33 | \description{
34 | Find the wOBA for all players with one or more hits for a particular season.
35 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB."
36 | }
37 | \examples{
38 |
39 | data("Batting2016")
40 | head(Batting2016)
41 | data("Pitching2016")
42 | head(Pitching2016)
43 | data("Fielding2016")
44 | head(Fielding2016)
45 |
46 | Batting2016$wOBA <- wOBA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE,
47 | NA_to_zero=TRUE, Sep.Leagues=FALSE)
48 |
49 | }
50 | \seealso{
51 | Other Batting functions: \code{\link{BABIP}},
52 | \code{\link{BA}}, \code{\link{BBpct}},
53 | \code{\link{CTpct}}, \code{\link{HRpct}},
54 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
55 | \code{\link{OPS}}, \code{\link{PA}},
56 | \code{\link{RC2002}}, \code{\link{RCbasic}},
57 | \code{\link{RCtech}}, \code{\link{SLG}},
58 | \code{\link{TBs}}, \code{\link{XBHpct}},
59 | \code{\link{XBperH}}, \code{\link{wRAA}},
60 | \code{\link{wRC}}
61 | }
62 | \concept{Batting functions}
63 | \keyword{Average}
64 | \keyword{On-Base}
65 | \keyword{Weighted}
66 | \keyword{wOBA}
67 |
--------------------------------------------------------------------------------
/man/wRC.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{wRC}
4 | \alias{wRC}
5 | \title{Batting: Calculate Weighted Runs Created (wRC)}
6 | \usage{
7 | wRC(BattingTable = NULL, PitchingTable = NULL, FieldingTable = NULL,
8 | Fangraphs = FALSE, NA_to_zero = TRUE, Sep.Leagues = FALSE)
9 | }
10 | \arguments{
11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
13 |
14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
16 |
17 | \item{FieldingTable}{A full fielding table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
19 |
20 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRC
21 | calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula.
22 | This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRC should be expected.
23 | The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE.}
24 |
25 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
26 | were not a counted statistic until 1954, therefore we are technically unable to calculate wRC for any player prior to 1954.
27 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
28 | If FALSE, the wRC calculation will return NaN for years with missing data.}
29 |
30 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
31 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
32 | park factors between the American and National leagues. The default for this argument is FALSE.}
33 | }
34 | \description{
35 | Find the wRC for all players with one or more hits for a particular season.
36 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB."
37 | }
38 | \examples{
39 |
40 | data("Batting2016")
41 | head(Batting2016)
42 | data("Pitching2016")
43 | head(Pitching2016)
44 | data("Fielding2016")
45 | head(Fielding2016)
46 |
47 | Batting2016$wRC <- wRC(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE,
48 | NA_to_zero=TRUE, Sep.Leagues=FALSE)
49 |
50 | }
51 | \seealso{
52 | Other Batting functions: \code{\link{BABIP}},
53 | \code{\link{BA}}, \code{\link{BBpct}},
54 | \code{\link{CTpct}}, \code{\link{HRpct}},
55 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
56 | \code{\link{OPS}}, \code{\link{PA}},
57 | \code{\link{RC2002}}, \code{\link{RCbasic}},
58 | \code{\link{RCtech}}, \code{\link{SLG}},
59 | \code{\link{TBs}}, \code{\link{XBHpct}},
60 | \code{\link{XBperH}}, \code{\link{wOBA}},
61 | \code{\link{wRAA}}
62 | }
63 | \concept{Batting functions}
64 | \keyword{Above}
65 | \keyword{Average}
66 | \keyword{Runs}
67 | \keyword{Weighted}
68 | \keyword{wRC}
69 |
--------------------------------------------------------------------------------
/man/wRAA.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/battingStats.R
3 | \name{wRAA}
4 | \alias{wRAA}
5 | \title{Batting: Calculate Weighted Runs Above Average (wRAA)}
6 | \usage{
7 | wRAA(BattingTable = NULL, PitchingTable = NULL, FieldingTable = NULL,
8 | Fangraphs = FALSE, NA_to_zero = TRUE, Sep.Leagues = FALSE)
9 | }
10 | \arguments{
11 | \item{BattingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
12 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
13 |
14 | \item{PitchingTable}{A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
15 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
16 |
17 | \item{FieldingTable}{A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
18 | Any subsetting or removal of players will affect your results. All players for each year are recommended.}
19 |
20 | \item{Fangraphs}{If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRAA
21 | calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula.
22 | This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRAA should be expected.
23 | The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE.}
24 |
25 | \item{NA_to_zero}{If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
26 | were not a counted statistic until 1954, therefore we are technically unable to calculate wRAA for any player prior to 1954.
27 | The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
28 | If FALSE, the wRAA calculation will return NaN for years with missing data.}
29 |
30 | \item{Sep.Leagues}{If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
31 | the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
32 | park factors between the American and National leagues. The default for this argument is FALSE.}
33 | }
34 | \description{
35 | Find the wRAA for all players with one or more hits for a particular season.
36 | Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB."
37 | }
38 | \examples{
39 |
40 | data("Batting2016")
41 | head(Batting2016)
42 | data("Pitching2016")
43 | head(Pitching2016)
44 | data("Fielding2016")
45 | head(Fielding2016)
46 |
47 | Batting2016$wRAA <- wRAA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE,
48 | NA_to_zero=TRUE, Sep.Leagues=FALSE)
49 |
50 | }
51 | \seealso{
52 | Other Batting functions: \code{\link{BABIP}},
53 | \code{\link{BA}}, \code{\link{BBpct}},
54 | \code{\link{CTpct}}, \code{\link{HRpct}},
55 | \code{\link{ISO}}, \code{\link{Kpct}}, \code{\link{OBP}},
56 | \code{\link{OPS}}, \code{\link{PA}},
57 | \code{\link{RC2002}}, \code{\link{RCbasic}},
58 | \code{\link{RCtech}}, \code{\link{SLG}},
59 | \code{\link{TBs}}, \code{\link{XBHpct}},
60 | \code{\link{XBperH}}, \code{\link{wOBA}},
61 | \code{\link{wRC}}
62 | }
63 | \concept{Batting functions}
64 | \keyword{Above}
65 | \keyword{Average}
66 | \keyword{Runs}
67 | \keyword{Weighted}
68 | \keyword{wRAA}
69 |
--------------------------------------------------------------------------------
/R/fip.R:
--------------------------------------------------------------------------------
1 | #' @title Return FIP constants per season
2 | #' @description Get fip constants for each season. By default the function uses a method adapted from
3 | #' Tom Tango and used by Fangraphs. The function returns FIP constants based on ERA \code{FIP_ERA} as well as constants based on RA \code{FIP_RA}.
4 | #' Both the Tango and Frangraphs formulas use ERA for their FIP constants.
5 | #' @param dat A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
6 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
7 | #' @param Sep.Leagues If TRUE, this will split the calculation and return unique FIP constants for the various leagues. This can be
8 | #' helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues.
9 | #' @param Fangraphs If TRUE the function will return the Fangraphs FIP constants. This can not be used in conjunction with the
10 | #' \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league.
11 | #' @keywords woba, wOBA, on base average, fangraphs
12 | #' @importFrom rvest html_node
13 | #' @importFrom xml2 read_html
14 | #' @importFrom stats setNames
15 | #' @import dplyr
16 | #' @export fip_values
17 | #' @examples
18 | #'
19 | #' data("Pitching2016")
20 | #' head(Pitching2016)
21 | #'
22 | #' fip_df <- fip_values(Pitching2016, Fangraphs=FALSE)
23 | #' head(fip_df)
24 | #'
25 | #'
26 |
27 | fip_values <- function(dat=NULL, Sep.Leagues=FALSE, Fangraphs=FALSE){
28 | # Declare values for Rcheck so it won't throw a note.
29 | yearID=lgID=G=IPouts=H=HR=BB=SO=IBB=HBP=R=SF=W=L=GS=CG=SHO=SV=ER=WP=BK=BFP=GF=SH=GIDP=IP=lgERA=lgRA=NULL
30 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){
31 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...")
32 | }
33 |
34 | if(isTRUE(Fangraphs)){
35 | # If user wants to use Fangraphs, grab it from the website.
36 | dat <- xml2::read_html("http://www.fangraphs.com/guts.aspx?type=cn") %>%
37 | rvest::html_node(xpath = '//*[(@id = "GutsBoard1_dg1_ctl00")]') %>%
38 | rvest::html_table() %>%
39 | stats::setNames(c("yearID", "lg_woba", "woba_scale", "wBB", "wHBP", "w1B", "w2B",
40 | "w3B", "wHR", "runSB", "runCS", "lg_r_pa", "lg_r_w", "cFIP"))
41 | }
42 |
43 | if(!isTRUE(Fangraphs)){
44 | dat <- dat[, !names(dat) %in% c("playerID", "teamID", "stint", "BAOpp", "ERA")]
45 | # Replace NA with 0, otherwise our runsMinus and runsPlus calculations will thow NA.
46 | dat[is.na(dat)] <- 0
47 |
48 | if(isTRUE(Sep.Leagues)){
49 | dat %<>% dplyr::group_by(yearID, lgID)
50 | } else {
51 | dat %<>% dplyr::group_by(yearID)
52 | }
53 |
54 | dat %<>%
55 | #dplyr::group_by(yearID, lgID) %>%
56 | dplyr::summarise(W=sum(W), L=sum(L), G=sum(G), GS=sum(GS), CG=sum(CG), SHO=sum(SHO), SV=sum(SV),
57 | IPouts=sum(IPouts), H=sum(H), ER=sum(ER), HR=sum(HR), BB=sum(BB), SO=sum(SO), IBB=sum(IBB),
58 | WP=sum(WP), HBP=sum(HBP), BK=sum(BK), BFP=sum(BFP), GF=sum(GF), R=sum(R), SH=sum(SH),
59 | SF=sum(SF), GIDP=sum(GIDP)) %>%
60 | dplyr::mutate(IP=IPouts/3, lgERA=(ER / IP)*9, lgRA=(R / IP)*9,
61 | cFIP=lgERA - ((HR*13) + ((BB + IBB + HBP - IBB)*3) - (SO*2)) / IP,
62 | cRA=lgRA - ((HR*13) + ((BB + IBB + HBP - IBB)*3) - (SO*2)) / IP)
63 | }
64 | return(dat)
65 | }
66 |
67 |
--------------------------------------------------------------------------------
/R/get_bbdb.R:
--------------------------------------------------------------------------------
1 | #' @title Get an up to date copy of the Baseball Databank.
2 | #' @description Download the newest version of the Baseball Databank from the Chadwick Bureau GitHub repository. This is the source of
3 | #' Sean Lahman's baseball database and is always under development. This function will read the .csv files and return them as data frames.
4 | #' There is also an option to download the entire directory.
5 | #' @param table The tables you would like to download. Uses Lahman table names Ex. "Batting", "Master", "AllstarFull", etc...
6 | #' If this argument is left as NULL, the function will download all twenty-seven tables.
7 | #' @param downloadZip If true, this will download a zip file of all twenty-seven tables in .csv format to your working directory.
8 | #' @param AllTables If true, this will download all the tables in the database. The default is set to false.
9 | #' @keywords database, data frame
10 | #' @import utils
11 | #' @export get_bbdb
12 | #' @examples
13 | #'
14 | #' get_bbdb(table = "Batting")
15 | #'
16 | #' \dontrun{
17 | #' get_bbdb(table = c("Batting", "Pitching"))
18 | #'}
19 | #'
20 | #'\dontrun{
21 | #' get_bbdb(AllTables = TRUE)
22 | #'}
23 |
24 | get_bbdb <- function(table=NULL, downloadZip=FALSE, AllTables=FALSE){
25 | if (isTRUE(downloadZip)) {
26 | # Try to ping the Chadwick Bureau repository. If that fails to connect, try the backup repo.
27 | if (isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){
28 | download.file("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip", "master.zip")
29 | }
30 | else {
31 | print(print("Chadwick Bureau failed to connect, trying backup."))
32 | if (!isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){
33 | download.file("https://github.com/keberwein/baseballdatabank/archive/master.zip", "master.zip")
34 | }
35 | }
36 | }
37 | if (!is.null(table)) {
38 | if("Master" %in% table) {
39 | warning('The "Master" table is referred to as "People" in the Baseball Data Bank. Downloading the "People table."')
40 | table <- gsub("Master", "People", table)
41 | }
42 |
43 | # Try to ping the Chadwick Bureau repository. If that fails to connect, try the backup repo.
44 | if (isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/tree/master/core"))){
45 | baseURL <- "https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/"
46 | }
47 | else {
48 | print(print("Chadwick Bureau failed to connect, trying backup."))
49 | if (!isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){
50 | baseURL <- "https://raw.githubusercontent.com/keberwein/baseballdatabank/master/core/"
51 | }
52 | }
53 |
54 | urlList <- list()
55 |
56 | for (i in 1:length(table)) {
57 | urlList[[i]] <- paste0(baseURL, table[i], ".csv")
58 | }
59 |
60 | list2env(lapply(setNames(urlList, make.names(gsub("*.csv$", "", table))), read.csv, stringsAsFactors=FALSE), envir = .GlobalEnv)
61 | }
62 |
63 | if (is.null(table) & isTRUE(AllTables)) {
64 | # Try to ping the Chadwick Bureau repository. If that fails to connect, try the backup repo.
65 | if (isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/tree/master/core"))){
66 | download.file("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip", "master.zip")
67 | }
68 | else {
69 | print(print("Chadwick Bureau failed to connect, trying backup."))
70 | if (!isTRUE(baseballDBR::urlExists("https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"))){
71 | download.file("https://github.com/keberwein/baseballdatabank/archive/master.zip", "master.zip")
72 | }
73 | }
74 |
75 | unzip("master.zip")
76 | baseDIR <- "baseballdatabank-master/core/"
77 | fileList <- list.files(path = baseDIR, pattern = "*.csv")
78 | urlList <- list()
79 | for (i in 1:length(fileList)) {
80 | urlList[[i]] <- paste0(baseDIR, fileList[i])
81 | }
82 | list2env(lapply(setNames(urlList, make.names(gsub("*.csv$", "", fileList))), read.csv, stringsAsFactors=FALSE), envir = .GlobalEnv)
83 | if (!isTRUE(downloadZip)) {
84 | unlink("master.zip")
85 | }
86 | unlink("baseballdatabank-master", recursive=T)
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/vignettes/Database_Tools.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Database Loading and Extracting Baseball Data"
3 | author: "Kris Eberwein"
4 | date: "`r Sys.Date()`"
5 | output: rmarkdown::html_vignette
6 | vignette: >
7 | %\VignetteIndexEntry{Database Loading and Extracting Baseball Data}
8 | %\VignetteEngine{knitr::rmarkdown}
9 | %\VignetteEncoding{UTF-8}
10 | ---
11 |
12 | The data contained in the Baseball Databank / Lahman Database are small enough to fit directly into memory using R. However, many may find it preferable to store the data in a local database to be accessed at a later time. The following examples use a local PostgreSQL database and the `RPostgreSQL` package. Many of the following methods would also work with other database connections such as the `RMySQL` and `DBI` packages.
13 |
14 | ## Loading Data
15 |
16 | The goal here is to extract the data from source, do the transformations in R, and load them into the database. With the `baseballDBR` and `RPostgreSQL` packages, this is a simple task. The following assumes we are starting with an empty PostgreSQL instance named "lahman."
17 |
18 | ```{r, eval=FALSE}
19 | library(baseballDBR)
20 | library(RPostgreSQL)
21 | library(dplyr)
22 |
23 | get_bbdb(AllTables = TRUE)
24 |
25 | # Make a list of all data frames.
26 | dbTables <- names(Filter(isTRUE, eapply(.GlobalEnv, is.data.frame)))
27 |
28 | # Load data base drivers and load all data frames in a loop.
29 | drv <- dbDriver("PostgreSQL")
30 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD")
31 |
32 | # Do some transformations on our selected dataframes.
33 | Batting <- mutate(Batting, BA = BA(Batting), PA = PA(Batting), OBP = OBP(Batting), OPS = OPS(Batting))
34 |
35 | # Create a loop to write all of our data frames our Postgres instance.
36 | for (i in 1:length(dbTables)) {
37 | if (dbExistsTable(con, dbTables[i])) {
38 | dbRemoveTable(con, dbTables[i])
39 | }
40 | dbWriteTable(con, name = dbTables[i], value = get0(dbTables[i]), row.names = FALSE)
41 | }
42 |
43 | # Make sure every thing wrote correctly.
44 | test <- dbGetQuery(con, "SELECT * FROM Batting LIMIT 5")
45 | test
46 | rm(test)
47 |
48 | # Close the database connection.
49 | dbDisconnect(con)
50 | dbUnloadDriver(drv)
51 | ```
52 |
53 | ## Accessing Data
54 |
55 | Once our data are loaded into a local database instance, working with the data becomes less cumbersome because we have the choice to only load into R the data that we need, or the choice to leverage `dplyr` or `SQL` backends to work with the data without loading into R at all.
56 |
57 | ### Using dplyr Backends
58 |
59 | The `dplyr` package allows us to work with remote or on-disk data stored in databases using a familiar syntax and `dplyr` verbs. For more on database backends, see the dplyr [Databases vignette](https://cran.r-project.org/package=dplyr). A current limitation to this method is, functions from the `baseballDBR` package will not work, and the method is limited to `dplyr` verbs.
60 |
61 | ```{r, eval=FALSE}
62 | library(dplyr)
63 | library(baseballDBR)
64 |
65 | # Use an "src" connection to access the database. This requires the same arguments that RPostreSQL uses to connect to the database.
66 | # Note: There are also src functions for connecting to various types of databases.
67 | pgcon <- src_postgres(host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD")
68 |
69 | # We're not laoding the batting table, rather we're loading a method to access it.
70 | Batting_tbl <- tbl(pgcon, "Batting")
71 |
72 | # Use familiar dplyr verbs to access our data.
73 | Batting_tbl %>% select(playerID, yearID, H, AB) %>% filter(yearID >= 1900)
74 |
75 | # We can pull our data into R to permorm some modifications.
76 | Batting <- collect(Batting_tbl, n = Inf) %>% mutate(BABIP = BABIP(Batting))
77 |
78 | # Write the modified table back to the database.
79 | db_insert_into( con = pgcon$con, table = "Batting", values = Batting)
80 |
81 | ```
82 |
83 | ### Using SQL Backends
84 |
85 | For those familiar with SQL, the same process can be archived in our relational database's native language.
86 |
87 | ```{r, eval=FALSE}
88 | library(RPostgreSQL)
89 | library(baseballDBR)
90 |
91 | # Load data base drivers and load all data frames in a loop.
92 | drv <- dbDriver("PostgreSQL")
93 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD")
94 |
95 | # Load the Batting table into R.
96 | Batting <- dbGetQuery(con, "SELECT * FROM Batting")
97 |
98 | # Permorm some modifications.
99 | Batting <- mutate(BABIP = BABIP(Batting))
100 |
101 | # Write modified table back to the database.
102 | dbWriteTable("Batting", Batting)
103 |
104 | # Close the database connection.
105 | dbDisconnect(con)
106 | dbUnloadDriver(drv)
107 |
108 | ```
109 |
110 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output:
3 | md_document:
4 | variant: markdown_github
5 | ---
6 |
7 | ```{r, echo = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "README-",
12 | warning = FALSE,
13 | message=FALSE
14 | )
15 | library(baseballDBR)
16 | ```
17 |
18 | # BaseballDBR
19 |
20 | [](https://travis-ci.org/keberwein/baseballDBR)
21 | [](http://www.r-pkg.org/badges/version/baseballDBR)
22 | [](http://www.repostatus.org/#active)
23 |
24 | # Install
25 |
26 | * Install from CRAN
27 |
28 | ```{r eval=FALSE}
29 | install.packages("baseballDBR")
30 | ```
31 |
32 | * Or, install the latest development version from GitHub:
33 |
34 | ```{r eval=FALSE}
35 | devtools::install_github("keberwein/baseballDBR")
36 | ```
37 |
38 | # Gathering Data
39 |
40 | The `baseballDBR` package requires data that is formatted similar to the [Baseball Databank](https://github.com/chadwickbureau/baseballdatabank) or Sean Lahman's [Baseball Database](http://www.seanlahman.com/baseball-archive/statistics/). The package also contains the `get_bbdb()` function, which allows us to download the most up-to-date tables directly from the Chadwick Bureau's GitHub repository. For example, we can easily load the "Batting" table into our R environment.
41 |
42 | ```{r}
43 | library(baseballDBR)
44 |
45 | get_bbdb(table = "Batting")
46 | head(Batting)
47 | ```
48 |
49 | ### Use with the Lahman Package
50 |
51 | ```{r}
52 | library(Lahman)
53 | library(baseballDBR)
54 |
55 | Batting <- Lahman::Batting
56 | head(Batting)
57 | ```
58 |
59 |
60 | # Adding Basic Metrics
61 |
62 | Simple batting metrics can be easily added to any batting data frame. For example, we can add slugging percentage, on-base percentage and on-base plus slugging. Note that OPS and OBP appears as "NA" for the years before IBB was tracked.
63 |
64 | ```{r}
65 | library(baseballDBR)
66 |
67 | Batting$SLG <- SLG(Batting)
68 |
69 | Batting$OBP <- OBP(Batting)
70 |
71 | head(Batting, 3)
72 | ```
73 |
74 | # Advanced Metrics
75 |
76 | The package includes a suite of advanced metrics such as wOBA, RAA, and FIP, among others. Many of the advanced metrics require multiple tables. For example, the wOBA metric requires the Batting, Pitching, and Fielding tables in order to establish a player's regular defensive position.
77 |
78 | ```{r}
79 | library(baseballDBR)
80 |
81 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
82 |
83 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = T)
84 | head(Batting, 3)
85 | ```
86 |
87 | The code above uses [Fangraphs](http://www.fangraphs.com/guts.aspx?type=cn) wOBA values. The default behavior is to uses Tom Tango's adapted [SQL formula](http://www.insidethebook.com/ee/index.php/site/article/woba_year_by_year_calculations/). Other options include `Sep.Leagues`, which may act as a buffer to any bias created by the designated hitter.
88 |
89 | ```{r}
90 | library(baseballDBR)
91 |
92 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
93 |
94 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = F, Sep.Leagues = T)
95 | head(Batting, 3)
96 | ```
97 |
98 | We can also produce a data frame that only shows the wOBA multipliers. Notice the Fangraphs wOBA multipliers slightly differ from the Tango multipliers.
99 |
100 | ```{r}
101 | library(baseballDBR)
102 |
103 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
104 |
105 | fangraphs_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=T)
106 | head(fangraphs_woba, 3)
107 |
108 | tango_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=F)
109 | head(tango_woba, 3)
110 |
111 |
112 | ```
113 |
114 | # Create Local Database
115 |
116 | A relational database is not needed to work with these data. However, we may want to store the data to be called more quickly at a later time. We can download all of the tables at once with the `get_bbdb()` function and then write them to an empty schema in our favorite database. The example uses a newly created PostgreSQL instance, but other database tools can be used assuming an appropriate R package exists.
117 |
118 | ```{r, eval=F}
119 | library(baseballDBR)
120 | library(RPostgreSQL)
121 |
122 | # Load all tables into the Global Environment.
123 | get_bbdb(AllTables = TRUE)
124 |
125 | # Make a list of all data frames.
126 | dbTables <- names(Filter(isTRUE, eapply(.GlobalEnv, is.data.frame)))
127 |
128 | # Load data base drivers and load all data frames in a loop.
129 | drv <- dbDriver("PostgreSQL")
130 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD")
131 |
132 | for (i in 1:length(dbTables)) {
133 | dbWriteTable(con, name = dbTables[i], value = get0(dbTables[i]), overwrite = TRUE)
134 | }
135 |
136 | # Disconnect from database.
137 | dbDisconnect(con)
138 | rm(con, drv)
139 | ```
140 |
141 |
142 |
143 |
--------------------------------------------------------------------------------
/vignettes/wOBA.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Weighted On-base Average (wOBA)"
3 | author: "Kris Eberwein"
4 | date: "`r Sys.Date()`"
5 | output: rmarkdown::html_vignette
6 | vignette: >
7 | %\VignetteIndexEntry{Weighted On-base Average (wOBA)}
8 | %\VignetteEngine{knitr::rmarkdown}
9 | %\VignetteEncoding{UTF-8}
10 | ---
11 |
12 | The `baseballDBR` package provides several variations of the wOBA calculation. There are two primary functions that provide the data and calculations. The `wOBA()` function provides the final calculation, while the `WOBA_values()` function provides the season average data that drive the higher level calculation.
13 |
14 | ## Quick Start
15 |
16 | ```{r, eval=FALSE}
17 | library(baseballDBR)
18 | # Load data from Baseball Databank
19 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
20 |
21 | Batting <- wOBA(Batting, Pitching, Fielding, Fangraphs = T)
22 | head(Batting, 3)
23 | ```
24 |
25 |
26 | ## Understanding wOBA
27 |
28 | Weighted on-base average was a statistic first used by sabermatrican Tom Tango and published in [The Book](http://www.insidethebook.com/). The wOBA metric has been show to strongly correlate to the number of runs scored. The basic formula is:
29 |
30 | $$\frac{wBB*BB + wHBP*HBP + wX1B*X1B + wX2B*X2B + wX3B*X3B + wHR*HR}{(AB+BB-IBB+SF+SH+HBP)=PA}$$
31 |
32 | The basic formula is simple enough, but first we must find the `w` values, or weighted values. Calculating the weighted values is not as straight forward and is done by applying a system of linear weights to yearly league averages in order to create a "run scoring environment" for the year. The `baseballDBR` package uses [Tom Tango's formula](http://www.insidethebook.com/ee/index.php/site/article/woba_year_by_year_calculations/) to calculate weighted values. Tango's `SQL` has been ported to `R` for our use. The wOBA functions also offer a "Fangraphs" argument, which uses the weights provided by [Fangraphs](http://www.fangraphs.com/guts.aspx?type=cn). The Fangraphs algorithm and Tango algorithm produce similar woba values, but can be slightly different.
33 |
34 | ### Fangraphs wOBA vs Tango wOBA
35 |
36 | As we discussed above, the modifiers that Fangraphs produces are slightly different than the modifiers that the Tango algorithm produces, therefore the two produce slightly different wOBA values. The wOBA values are normally within one one-thousandth of one percent.
37 |
38 | **Why are they different?**
39 |
40 | The data from the Baseball Databank does not specify a player's position. Therefore, "fuzzy logic" is used to determine a player's primary position. This may cause instances where a player's statistics are weighted according to a position other than their primary position.
41 |
42 | ```{r, eval=FALSE}
43 | library(baseballDBR)
44 | library(dplyr)
45 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
46 |
47 | Batting$f_wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = T)
48 |
49 | Batting$t_wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = F)
50 |
51 | # Going to subset for players who had more than 100 at-bats and played in at least eighty games.
52 | # This shoul eliminate most of the pitchers and minor league call-ups.
53 | Batting_2016 <- subset(Batting, yearID >= 2016 & AB >= 100 & G >= 80) %>%
54 | arrange(desc(t_wOBA))
55 |
56 | head(Batting_2016)
57 |
58 | ```
59 |
60 | ### Arguments
61 |
62 | The `wOBA()` and `wOBA_values()` functions require three data frames:
63 |
64 | * Fangraphs: Should the function use Fangraphs wOBA values or the package's native Tango method?
65 |
66 | * NA_to_zero: Should the function apply `0` to statistics that may not have been counted? For example, Babe Ruth's sacrifice fly `SF` metric is NA because that statistic wasn't tracked when he played, so his `wOBA` should be NA. Note, that it is a statistically unsound practice to set NAs to zero. However, the authors of this package recognize the desire to compare past players to current players.
67 |
68 | * Sep.Leagues: Should the function determine separate wOBA values for the National and American leagues. Standard practice would be to use wOBA values that combine both leagues. Note, this function is not possible if `Fangraphs=TRUE`.
69 |
70 | Even though wOBA is a batting metric, the Pitching and Fielding tables are used to determine a player's primary position. The tables should be full tables of entire years, and not a subset, because the wOBA calculation depends on yearly league average values.
71 |
72 | ### The wOBA_values Function
73 |
74 | The higher-level `wOBA()` function relies on `wOBA_values()`. It is not necessary to call the `wOBA_values()` function to use the `wOBA()` function, but it this function has been exported to the package to give users the opportunity for deeper analysis. Arguments include:
75 |
76 | * Sep.Leagues - If `TRUE`, this will calculate separate wOBA vales for the American and National leagues. The default setting is `FALSE` because league separation is not typically performed in wOBA calculations. The advantage to separating the leagues is, the resulting wOBA values will naturally account for the DH and batting pitchers.
77 |
78 | * Fangraphs - If `TRUE` the function will use wOBA values provided by Fangraphs. The default is to use a ported version of Tom Tango's algorithm as applied to the Baseball Databank. The two algorithms produce similar, but slightly different results. The advantage to using the Tango algorithm is, it can be used in conjunction with `Sep.Leagues=TRUE`, whereas the Fangraphs data only provide for the combined leagues.
79 |
80 | ```{r, eval=FALSE}
81 | library(baseballDBR)
82 | # Load data from Baseball Databank
83 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
84 |
85 | # Run wOBA values for seperate leagues
86 | w_vals <- wOBA_values(BattingTable = Batting, FieldingTable = Fielding, PitchingTable = Pitching, Sep.Leagues = TRUE)
87 |
88 | ```
89 |
90 | If we look at the data, we notice that the years 1871 to 1875 produce several NAs. This is due to incomplete or untracked data during that time period. We also notice there was only one league in existence during those years. Otherwise, the data are complete. The "league wOBA" for the two leagues is often close, but varies depending on the quality of play across various years.
91 |
92 | ```{r, eval=FALSE}
93 | head(w_vals)
94 | ```
95 |
96 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | BaseballDBR
2 | =======================================================================
3 |
4 | [](https://travis-ci.org/keberwein/baseballDBR) [](http://www.r-pkg.org/badges/version/baseballDBR) [](http://www.repostatus.org/#active)
5 |
6 | Install
7 | =======
8 |
9 | - Install from CRAN
10 |
11 | ``` r
12 | install.packages("baseballDBR")
13 | ```
14 |
15 | - Or, install the latest development version from GitHub:
16 |
17 | ``` r
18 | devtools::install_github("keberwein/baseballDBR")
19 | ```
20 |
21 | Gathering Data
22 | ==============
23 |
24 | The `baseballDBR` package requires data that is formatted similar to the [Baseball Databank](https://github.com/chadwickbureau/baseballdatabank) or Sean Lahman's [Baseball Database](http://www.seanlahman.com/baseball-archive/statistics/). The package also contains the `get_bbdb()` function, which allows us to download the most up-to-date tables directly from the Chadwick Bureau's GitHub repository. For example, we can easily load the "Batting" table into our R environment.
25 |
26 | ``` r
27 | library(baseballDBR)
28 |
29 | get_bbdb(table = "Batting")
30 | head(Batting)
31 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB
32 | #> 1 abercda01 1871 1 TRO 1 4 0 0 0 0 0 0 0 0 0
33 | #> 2 addybo01 1871 1 RC1 25 118 30 32 6 0 0 13 8 1 4
34 | #> 3 allisar01 1871 1 CL1 29 137 28 40 4 5 0 19 3 1 2
35 | #> 4 allisdo01 1871 1 WS3 27 133 28 44 10 2 2 27 1 1 0
36 | #> 5 ansonca01 1871 1 RC1 25 120 29 39 11 3 0 16 6 2 2
37 | #> 6 armstbo01 1871 1 FW1 12 49 9 11 2 1 0 5 0 1 0
38 | #> SO IBB HBP SH SF GIDP
39 | #> 1 0 NA NA NA NA NA
40 | #> 2 0 NA NA NA NA NA
41 | #> 3 5 NA NA NA NA NA
42 | #> 4 2 NA NA NA NA NA
43 | #> 5 1 NA NA NA NA NA
44 | #> 6 1 NA NA NA NA NA
45 | ```
46 |
47 | ### Use with the Lahman Package
48 |
49 | ``` r
50 | library(Lahman)
51 | library(baseballDBR)
52 |
53 | Batting <- Lahman::Batting
54 | head(Batting)
55 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB
56 | #> 1 abercda01 1871 1 TRO NA 1 4 0 0 0 0 0 0 0 0 0
57 | #> 2 addybo01 1871 1 RC1 NA 25 118 30 32 6 0 0 13 8 1 4
58 | #> 3 allisar01 1871 1 CL1 NA 29 137 28 40 4 5 0 19 3 1 2
59 | #> 4 allisdo01 1871 1 WS3 NA 27 133 28 44 10 2 2 27 1 1 0
60 | #> 5 ansonca01 1871 1 RC1 NA 25 120 29 39 11 3 0 16 6 2 2
61 | #> 6 armstbo01 1871 1 FW1 NA 12 49 9 11 2 1 0 5 0 1 0
62 | #> SO IBB HBP SH SF GIDP
63 | #> 1 0 NA NA NA NA NA
64 | #> 2 0 NA NA NA NA NA
65 | #> 3 5 NA NA NA NA NA
66 | #> 4 2 NA NA NA NA NA
67 | #> 5 1 NA NA NA NA NA
68 | #> 6 1 NA NA NA NA NA
69 | ```
70 |
71 | Adding Basic Metrics
72 | ====================
73 |
74 | Simple batting metrics can be easily added to any batting data frame. For example, we can add slugging percentage, on-base percentage and on-base plus slugging. Note that OPS and OBP appears as "NA" for the years before IBB was tracked.
75 |
76 | ``` r
77 | library(baseballDBR)
78 |
79 | Batting$SLG <- SLG(Batting)
80 |
81 | Batting$OBP <- OBP(Batting)
82 |
83 | head(Batting, 3)
84 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB
85 | #> 1 abercda01 1871 1 TRO NA 1 4 0 0 0 0 0 0 0 0 0
86 | #> 2 addybo01 1871 1 RC1 NA 25 118 30 32 6 0 0 13 8 1 4
87 | #> 3 allisar01 1871 1 CL1 NA 29 137 28 40 4 5 0 19 3 1 2
88 | #> SO IBB HBP SH SF GIDP SLG OBP
89 | #> 1 0 NA NA NA NA NA 0.000 NA
90 | #> 2 0 NA NA NA NA NA 0.322 NA
91 | #> 3 5 NA NA NA NA NA 0.394 NA
92 | ```
93 |
94 | Advanced Metrics
95 | ================
96 |
97 | The package includes a suite of advanced metrics such as wOBA, RAA, and FIP, among others. Many of the advanced metrics require multiple tables. For example, the wOBA metric requires the Batting, Pitching, and Fielding tables in order to establish a player's regular defensive position.
98 |
99 | ``` r
100 | library(baseballDBR)
101 |
102 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
103 |
104 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = T)
105 | head(Batting, 3)
106 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB
107 | #> 1 abercda01 1871 1 TRO 1 4 0 0 0 0 0 0 0 0 0
108 | #> 2 addybo01 1871 1 RC1 25 118 30 32 6 0 0 13 8 1 4
109 | #> 3 allisar01 1871 1 CL1 29 137 28 40 4 5 0 19 3 1 2
110 | #> SO IBB HBP SH SF GIDP wOBA
111 | #> 1 0 NA NA NA NA NA 0.0000000
112 | #> 2 0 NA NA NA NA NA 0.2855902
113 | #> 3 5 NA NA NA NA NA 0.3078849
114 | ```
115 |
116 | The code above uses [Fangraphs](http://www.fangraphs.com/guts.aspx?type=cn) wOBA values. The default behavior is to uses Tom Tango's adapted [SQL formula](http://www.insidethebook.com/ee/index.php/site/article/woba_year_by_year_calculations/). Other options include `Sep.Leagues`, which may act as a buffer to any bias created by the designated hitter.
117 |
118 | ``` r
119 | library(baseballDBR)
120 |
121 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
122 |
123 | Batting$wOBA <- wOBA(Batting, Pitching, Fielding, Fangraphs = F, Sep.Leagues = T)
124 | head(Batting, 3)
125 | #> playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI SB CS BB
126 | #> 1 abercda01 1871 1 TRO 1 4 0 0 0 0 0 0 0 0 0
127 | #> 2 addybo01 1871 1 RC1 25 118 30 32 6 0 0 13 8 1 4
128 | #> 3 allisar01 1871 1 CL1 29 137 28 40 4 5 0 19 3 1 2
129 | #> SO IBB HBP SH SF GIDP wOBA
130 | #> 1 0 NA NA NA NA NA NA
131 | #> 2 0 NA NA NA NA NA NA
132 | #> 3 5 NA NA NA NA NA NA
133 | ```
134 |
135 | We can also produce a data frame that only shows the wOBA multipliers. Notice the Fangraphs wOBA multipliers slightly differ from the Tango multipliers.
136 |
137 | ``` r
138 | library(baseballDBR)
139 |
140 | get_bbdb(table = c("Batting", "Pitching", "Fielding"))
141 |
142 | fangraphs_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=T)
143 | head(fangraphs_woba, 3)
144 | #> yearID lg_woba woba_scale wBB wHBP w1B w2B w3B wHR runSB
145 | #> 1 2017 0.320 1.192 0.693 0.723 0.878 1.236 1.558 1.989 0.2
146 | #> 2 2016 0.318 1.212 0.691 0.721 0.878 1.242 1.569 2.015 0.2
147 | #> 3 2015 0.313 1.251 0.687 0.718 0.881 1.256 1.594 2.065 0.2
148 | #> runCS lg_r_pa lg_r_w cFIP
149 | #> 1 -0.421 0.121 10.007 3.126
150 | #> 2 -0.410 0.118 9.778 3.147
151 | #> 3 -0.392 0.113 9.421 3.134
152 |
153 | tango_woba <- wOBA_values(Batting, Pitching, Fielding, Fangraphs=F)
154 | head(tango_woba, 3)
155 | #> # A tibble: 3 x 35
156 | #> # Groups: yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR,
157 | #> # runSB, runCS [3]
158 | #> yearID AB R H X2B X3B HR SB CS BB SO IBB
159 | #>
160 | #> 1 1871 23179 5659 6616 950 495 101 948 270 817 371 0
161 | #> 2 1872 34755 7487 10003 1212 293 88 536 264 477 532 0
162 | #> 3 1873 40346 8487 11832 1308 472 102 395 253 747 552 0
163 | #> # ... with 23 more variables: HBP , SF , RperOut ,
164 | #> # runBB , runHBP , run1B , run2B , run3B ,
165 | #> # runHR , runSB , runCS , runMinus , runPlus ,
166 | #> # lg_woba , woba_scale , wBB , wHBP , w1B ,
167 | #> # w2B , w3B , wHR , wSB , wCS
168 | ```
169 |
170 | Create Local Database
171 | =====================
172 |
173 | A relational database is not needed to work with these data. However, we may want to store the data to be called more quickly at a later time. We can download all of the tables at once with the `get_bbdb()` function and then write them to an empty schema in our favorite database. The example uses a newly created PostgreSQL instance, but other database tools can be used assuming an appropriate R package exists.
174 |
175 | ``` r
176 | library(baseballDBR)
177 | library(RPostgreSQL)
178 |
179 | # Load all tables into the Global Environment.
180 | get_bbdb(AllTables = TRUE)
181 |
182 | # Make a list of all data frames.
183 | dbTables <- names(Filter(isTRUE, eapply(.GlobalEnv, is.data.frame)))
184 |
185 | # Load data base drivers and load all data frames in a loop.
186 | drv <- dbDriver("PostgreSQL")
187 | con <- dbConnect(drv, host= "localhost", dbname= "lahman", user= "YOUR_USERNAME", password = "YOUR_PASSWORD")
188 |
189 | for (i in 1:length(dbTables)) {
190 | dbWriteTable(con, name = dbTables[i], value = get0(dbTables[i]), overwrite = TRUE)
191 | }
192 |
193 | # Disconnect from database.
194 | dbDisconnect(con)
195 | rm(con, drv)
196 | ```
197 |
--------------------------------------------------------------------------------
/R/pitchingStats.R:
--------------------------------------------------------------------------------
1 |
2 | #' @title Pitching: Calculate walks per nine innings
3 | #' @description Find batting average walks per nine innings for pitchers with more one or more inning pitched.
4 | #' Required fields from the Pitching table are; "IPouts", and "BB."
5 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
6 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
7 | #' @keywords BB BB_9 BB9 bb/9
8 | #' @family Pitching functions
9 | #' @export BB_9
10 | #' @examples
11 | #'
12 | #' data("Pitching2016")
13 | #' head(Pitching2016)
14 | #'
15 | #' Pitching2016$BB_9 <- BB_9(Pitching2016)
16 | #'
17 | BB_9 <- function (dat=NULL){
18 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
19 |
20 | if (!all(c("BB", "IPouts") %in% names(dat))){
21 | message("Not enough data to calculate. Please make sure your data inclueds 'BB', and 'IPouts'")
22 | }
23 |
24 | ifelse(dat$IPouts > 2,
25 | BB_9 <- round((dat$BB*9 / (dat$IPouts / 3)), 3), NA)
26 | return(BB_9)
27 | }
28 |
29 | #' @title Pitching: Fielding Independent Pitching (FIP)
30 | #' @description Find the FIP for all pitchers with one or strike outs in a particular season.
31 | #' Required fields from the Pitching table are "BB", "HBP", "SO", and "IPouts."
32 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
33 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
34 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
35 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
36 | #' park factors between the American and National leagues. The default for this argument is FALSE.
37 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
38 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954.
39 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
40 | #' If FALSE, the wOBA calculation will return NaN for years with missing data.
41 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal
42 | #' formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and
43 | #' does not require an external download. If not specified, the default is set to FALSE.
44 | #' @keywords FIP fielding independent pitching
45 | #' @family Pitching functions
46 | #' @export FIP
47 | #' @examples
48 | #'
49 | #' data("Pitching2016")
50 | #' head(Pitching2016)
51 | #'
52 | #' Pitching2016$FIP <- FIP(Pitching2016, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE)
53 | #'
54 | FIP <- function (dat=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){
55 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
56 |
57 | if (!all(c("BB", "HBP", "SO", "IPouts") %in% names(dat))){
58 | message("Not enough data to calculate. Please make sure your data inclueds 'BB', 'HBP', 'K', and 'IPouts'")
59 | }
60 |
61 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){
62 | print("The Fangraphs Guts table does not sperate FIP by league. Applying the default calculation...")
63 | Fangraphs=FALSE
64 | }
65 |
66 | fip <- fip_values(dat=dat, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues)
67 |
68 | if (isTRUE(NA_to_zero)){
69 | dat <- dplyr::mutate(dat, HBP=ifelse(is.na(HBP),0,HBP))
70 | }
71 |
72 | if(isTRUE(Sep.Leagues)){
73 | fip <- fip[, c("yearID", "lgID", "cFIP")]
74 | dat <- dplyr::left_join(dat, fip, by=c("yearID", "lgID"))
75 | } else {
76 | fip <- fip[, c("yearID", "cFIP")]
77 | dat <- dplyr::left_join(dat, fip, by="yearID")
78 | }
79 |
80 | ifelse(dat$SO > 0,
81 | fip <- (((dat$HR*13) + ((dat$BB + dat$IBB + dat$HBP - dat$IBB)*3) - (dat$SO*2)) / (dat$IPouts/3) + dat$cFIP), NA)
82 |
83 | return(fip)
84 | }
85 |
86 |
87 | #' @title Pitching: Calculate Hits per Nine innings
88 | #' @description Find the number of hits a pitcher throws per nine innings pitched.
89 | #' Required fields from the Pitching table are; "H", "BB", and "IPouts."
90 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
91 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
92 | #' @keywords hits per nine innings
93 | #' @family Pitching functions
94 | #' @export H_9
95 | #' @examples
96 | #'
97 | #' data("Pitching2016")
98 | #' head(Pitching2016)
99 | #'
100 | #' Pitching2016$H_9 <- H_9(Pitching2016)
101 | #'
102 | H_9 <- function (dat=NULL){
103 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
104 |
105 | if (!all(c("H", "BB", "IPouts") %in% names(dat))){
106 | message("Not enough data to calculate. Please make sure your data inclueds 'H', and 'IPouts'")
107 | }
108 |
109 | ifelse(dat$IPouts > 2,
110 | H_9 <- round((dat$H*9) / (dat$IPouts/3), 3), NA)
111 |
112 | return(H_9)
113 | }
114 |
115 | #' @title Pitching: Calculate Home Runs per Nine innings
116 | #' @description Find the number of home runs a pitcher allows per nine innings pitched.
117 | #' Required fields from the Pitching table are; "H" and "IPouts."
118 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
119 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
120 | #' @keywords hits per nine innings
121 | #' @family Pitching functions
122 | #' @export HR_9
123 | #' @examples
124 | #'
125 | #' data("Pitching2016")
126 | #' head(Pitching2016)
127 | #'
128 | #' Pitching2016$HR_9 <- HR_9(Pitching2016)
129 | #'
130 | HR_9 <- function (dat=NULL){
131 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
132 |
133 | if (!all(c("HR", "IPouts") %in% names(dat))){
134 | message("Not enough data to calculate. Please make sure your data inclueds 'HR', and 'IPouts'")
135 | }
136 |
137 | ifelse(dat$IPouts > 2, HR_9 <- round((dat$HR*9) / (dat$IPouts/3), 3), NA)
138 | return(HR_9)
139 | }
140 |
141 | #' @title Pitching: Calculate the innings pitched
142 | #' @description Find the number of innings a player has pitched for a season.
143 | #' Required fields from the Pitching table are; "IPouts."
144 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
145 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
146 | #' @keywords innings pitched
147 | #' @family Pitching functions
148 | #' @export IP
149 | #' @examples
150 | #'
151 | #' data("Pitching2016")
152 | #' head(Pitching2016)
153 | #'
154 | #' Pitching2016$IP <- IP(Pitching2016)
155 | #'
156 | IP <- function (dat=NULL){
157 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
158 |
159 | if (!all(c("IPouts") %in% names(dat))){
160 | message("Not enough data to calculate. Please make sure your data inclueds 'IPouts'")
161 | }
162 |
163 | ifelse(dat$IPouts > 2, IP <- round(dat$IPouts/3, 3), NA)
164 | return(IP)
165 | }
166 |
167 | #' @title Pitching: Calculate Strikes per Nine innings
168 | #' @description Find the number of strikes a pitcher throws per nine innings pitched.
169 | #' Required fields from the Pitching table are; "H", "BB", "IPouts", and "SO."
170 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
171 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
172 | #' @keywords strikes per nine innings
173 | #' @family Pitching functions
174 | #' @export K_9
175 | #' @examples
176 | #'
177 | #' data("Pitching2016")
178 | #' head(Pitching2016)
179 | #'
180 | #' Pitching2016$K_9 <- K_9(Pitching2016)
181 | #'
182 | K_9 <- function (dat=NULL){
183 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
184 |
185 | if (!all(c("H", "BB", "IPouts", "SO") %in% names(dat))){
186 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'BB', 'SO', and 'IPouts'")
187 | }
188 |
189 | ifelse(dat$IPouts > 2, K_9 <- round((dat$SO*9) / (dat$IPouts/3), 3), NA)
190 | return(K_9)
191 | }
192 |
193 | #' @title Pitching: Calculate the left on base percentage
194 | #' @description Find the percentage of base runners that a pitcher leaves on base of the course of a season.
195 | #' Required fields from the Pitching table are; "H", "BB", "HBP", "R", and "HR."
196 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
197 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
198 | #' @keywords LOB_pct LOB LOB percentage
199 | #' @family Pitching functions
200 | #' @export LOB_pct
201 | #' @examples
202 | #'
203 | #' data("Pitching2016")
204 | #' head(Pitching2016)
205 | #'
206 | #' Pitching2016$LOB_pct <- LOB_pct(Pitching2016)
207 | #'
208 | LOB_pct <- function (dat=NULL){
209 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
210 |
211 | if (!all(c("H", "BB", "HBP", "R", "HR") %in% names(dat))){
212 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'BB', 'IBB', 'HBP', 'SF', and 'SH'")
213 | }
214 |
215 | ifelse(dat$IPouts > 2, LOB_pct <- round((dat$H+dat$BB+dat$HBP-dat$R) / (dat$H+dat$BB+dat$HBP-(1.4*dat$HR)), 3), NA)
216 |
217 | return(LOB_pct)
218 | }
219 |
220 | #' @title Pitching: Calculate Walks plus Hits per Innings Pitched
221 | #' @description Find the number of walks plus hits a pitcher allows per inning pitched.
222 | #' Required fields from the Pitching table are; "H", "BB", and "IPouts."
223 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
224 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
225 | #' @keywords Walks plus Hits per Innings Pitched WHIP
226 | #' @family Pitching functions
227 | #' @export WHIP
228 | #' @examples
229 | #'
230 | #' data("Pitching2016")
231 | #' head(Pitching2016)
232 | #'
233 | #' Pitching2016$WHIP <- WHIP(Pitching2016)
234 | #'
235 | WHIP <- function (dat=NULL){
236 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
237 |
238 | if (!all(c("H", "BB", "IPouts") %in% names(dat))){
239 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'BB', and 'IPouts'")
240 | }
241 |
242 | ifelse(dat$IPouts > 2, WHIP <- round((dat$BB+dat$H) / (dat$IPouts/3), 3), NA)
243 |
244 | return(WHIP)
245 | }
246 |
247 |
248 |
249 |
250 |
--------------------------------------------------------------------------------
/R/woba_values.R:
--------------------------------------------------------------------------------
1 | #' @title Return wOBA values per season
2 | #' @description Get wOBA values for each year in your database. This calculation requires all fields of
3 | #' the Pitching, Fielding and Batting tables from the Lahman package, or a comparable data set. The function uses
4 | #' a version of Tom Tango's wOBA formula by default, but can also return Fangraphs wOBA values.
5 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
6 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
7 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
8 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
9 | #' @param FieldingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
10 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
11 | #' @param Sep.Leagues If TRUE, this will split the calculation and return unique wOBA values for the various leagues. This can be
12 | #' helpful in handling Designated Hitters and National League pitchers. It also isolates the park factors to their respective leagues.
13 | #' @param Fangraphs if TRUE the function will return the Fangraphs wOBA values. By default the function uses a method adapted from
14 | #' Tom Tango. These values are often very close to Fangraphs, but are not the same due to Fangraphs using a different algorithm.
15 | #' This can not be used in conjunction with the \code{Sep.Leagues} argument because Fangraphs does not separate FIP constants by league.
16 | #' @keywords woba, wOBA, on base average, fangraphs
17 | #' @importFrom rvest html_node html_table
18 | #' @importFrom xml2 read_html
19 | #' @importFrom stats setNames
20 | #' @import dplyr
21 | #' @export wOBA_values
22 | #' @examples
23 | #'
24 | #' data("Batting2016")
25 | #' head(Batting2016)
26 | #' data("Pitching2016")
27 | #' head(Pitching2016)
28 | #' data("Fielding2016")
29 | #' head(Fielding2016)
30 | #'
31 | #' woba_df <- wOBA_values(Batting2016, Pitching2016, Fielding2016, Sep.Leagues=FALSE, Fangraphs=FALSE)
32 | #'
33 |
34 | wOBA_values <- function(BattingTable, PitchingTable, FieldingTable, Sep.Leagues=FALSE, Fangraphs=FALSE){
35 | # Declare values for Rcheck so it won't throw a note.
36 | POS=yearID=postf=playerID=teamID=lgID=G=IPouts=R=RperOut=runBB=run1B=run2B=runHBP=run3B=
37 | runHR=runSB=runCS=AB=H=X2B=X3B=HR=SB=CS=BB=SO=IBB=HBP=SF=runPlus=runMinus=wOBAscale=NULL
38 | # Make sure users don't contradict themselves.
39 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){
40 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...")
41 | Fangraphs=FALSE
42 | }
43 |
44 | if(isTRUE(Fangraphs)){
45 | # If user wants to use Fangraphs, grab it from the website.
46 | runsBatting <- xml2::read_html("https://www.fangraphs.com/guts.aspx?type=cn") %>%
47 | html_node(xpath = '//*[(@id = "GutsBoard1_dg1_ctl00")]') %>%
48 | html_table() %>%
49 | setNames(c("yearID", "lg_woba", "woba_scale", "wBB", "wHBP", "w1B", "w2B",
50 | "w3B", "wHR", "runSB", "runCS", "lg_r_pa", "lg_r_w", "cFIP"))
51 | }
52 |
53 | if(!isTRUE(Fangraphs)){
54 | # Find primary positions
55 | fielding <- FieldingTable
56 | # The "postf" field below is to filter out Natl. League players who may have
57 | # played as DH in inter-leauge games, and may have multiple entries at diff. positions.
58 | PrimPos <- dplyr::mutate(fielding, postf=ifelse(POS=="OF" & yearID>1995, 1,0)) %>%
59 | subset(postf==0,
60 | select=c("playerID", "yearID", "teamID", "lgID","G", "POS")) %>%
61 | group_by(playerID, yearID, teamID, lgID, POS) %>%
62 | summarise(G = sum(G))
63 |
64 | # Find a run environment for each season, including pitchers.
65 | pitching <- PitchingTable %>% subset(select=c("yearID", "playerID", "lgID","R", "IPouts"))
66 |
67 | pitchersPOS <- subset(PrimPos, POS=="P")
68 |
69 | pitchingLRPO <- inner_join(pitchersPOS, pitching, by=c("yearID", "playerID", "lgID"))
70 |
71 |
72 | LeagueRunsPerOut <- subset(pitchingLRPO, select=c("yearID", "lgID", "R", "IPouts")) %>%
73 | # Set NA to 0 so the sums will work.
74 | mutate(IPouts=ifelse(is.na(IPouts),0,IPouts))
75 |
76 | # Check to see if user wants the AL and NL split.
77 | if(isTRUE(Sep.Leagues)){
78 | LeagueRunsPerOut <- group_by(LeagueRunsPerOut, yearID, lgID) %>%
79 | summarise(R=sum(R), IPouts=sum(IPouts)) %>%
80 | mutate(RperOut=R/IPouts) %>%
81 | rename(totR=R, totOuts=IPouts)
82 |
83 | RunValues <- subset(LeagueRunsPerOut, select=c("yearID", "lgID", "RperOut")) %>%
84 | group_by(yearID, lgID) %>%
85 | mutate(runBB=RperOut+0.14, runHBP=runBB+0.025, run1B=runBB+0.155, run2B=run1B+0.3,
86 | run3B=run2B+0.27, runHR=1.4, runSB=0.2, runCS=(2*RperOut)+0.075) %>%
87 | group_by(yearID, lgID ,RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS)
88 | } else {
89 | LeagueRunsPerOut <- group_by(LeagueRunsPerOut, yearID) %>%
90 | summarise(R=sum(R), IPouts=sum(IPouts)) %>%
91 | mutate(RperOut=R/IPouts) %>%
92 | rename(totR=R, totOuts=IPouts)
93 |
94 | # Calculate the Run Values for each event using Tom Tango's linear weights.
95 | # More info from Tango can be found here:
96 | # http://www.insidethebook.com/ee/index.php/site/comments/woba_year_by_year_calculations/
97 | # Note that HR and SB are static values. Tango admits this isn't perfect but is close.
98 | RunValues <- subset(LeagueRunsPerOut, select=c("yearID", "RperOut")) %>% group_by(yearID) %>%
99 | mutate(runBB=RperOut+0.14, runHBP=runBB+0.025, run1B=runBB+0.155, run2B=run1B+0.3,
100 | run3B=run2B+0.27, runHR=1.4, runSB=0.2, runCS=(2*RperOut)+0.075) %>%
101 | group_by(yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS)
102 | }
103 |
104 |
105 | # Use Position Players table to find the runsPlus and runsMinus values to use in the wOBA multiplier.
106 | batting <- BattingTable
107 | batting <- batting[, !names(batting) %in% c("G")]
108 | batting <- inner_join(batting, PrimPos, by=c("playerID", "yearID", "lgID"))
109 | # Replace NA with 0, otherwise our runsMinus and runsPlus calculations will thow NA.
110 | batting[is.na(batting)] <- 0
111 |
112 | if(isTRUE(Sep.Leagues)){
113 | # Summarize values by year.
114 | yearbatting <- subset(batting, select=c("yearID", "lgID", "AB", "R", "H", "X2B", "X3B", "HR",
115 | "SB", "CS", "BB", "SO", "IBB", "HBP", "SF")) %>%
116 | group_by(yearID, lgID) %>%
117 | summarise(AB=sum(AB), R=sum(R), H=sum(H), X2B=sum(X2B), X3B=sum(X3B), HR=sum(HR),
118 | SB=sum(SB), CS=sum(CS), BB=sum(BB), SO=sum(SO), IBB=sum(IBB), HBP=sum(HBP),
119 | SF=sum(SF))
120 |
121 | # Join yearly aggregates with the RunValues modifiers.
122 | runsBatting <- left_join(yearbatting, RunValues, by= c("yearID", "lgID")) %>%
123 | group_by(yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS) %>%
124 | # Calculate modifiers for wOBA events and wOBA scale.
125 | mutate(runMinus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) +
126 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (AB-H+SF)) %>%
127 | # Calculate modifier for wOBA scale.
128 | mutate(runPlus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) +
129 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (BB-IBB+HBP+H)) %>%
130 | # Calculate league wOBA.
131 | mutate(lg_woba = (H+BB+IBB+HBP) / (AB+BB-IBB+HBP+SF)) %>%
132 | # Calculate wOBA scale.
133 | mutate(woba_scale = 1/(runPlus+runMinus)) %>%
134 | # wOBA hit-event modifiers.
135 | mutate(wBB = (runBB+runMinus)*woba_scale, wHBP = (runHBP+runMinus)*woba_scale,
136 | w1B = (run1B+runMinus)*woba_scale, w2B = (run2B+runMinus)*woba_scale,
137 | w3B = (run3B+runMinus)*woba_scale, wHR = (runHR+runMinus)*woba_scale,
138 | wSB = runSB*woba_scale, wCS = runCS*woba_scale)
139 | } else {
140 | # Summarize values by year.
141 | yearbatting <- subset(batting, select=c("yearID", "AB", "R", "H", "X2B", "X3B", "HR",
142 | "SB", "CS", "BB", "SO", "IBB", "HBP", "SF")) %>%
143 | group_by(yearID) %>%
144 | summarise(AB=sum(AB), R=sum(R), H=sum(H), X2B=sum(X2B), X3B=sum(X3B), HR=sum(HR),
145 | SB=sum(SB), CS=sum(CS), BB=sum(BB), SO=sum(SO), IBB=sum(IBB), HBP=sum(HBP),
146 | SF=sum(SF))
147 |
148 | # Join yearly aggregates with the RunValues modifiers.
149 | runsBatting <- left_join(yearbatting, RunValues, by="yearID") %>%
150 | group_by(yearID, RperOut, runBB, runHBP, run1B, run2B, run3B, runHR, runSB, runCS) %>%
151 | # Calculate modifiers for wOBA events and wOBA scale.
152 | mutate(runMinus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) +
153 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (AB-H+SF)) %>%
154 | # Calculate modifier for wOBA scale.
155 | mutate(runPlus = ((runBB*(BB-IBB)) + (runHBP*HBP) + (run1B*(H-X2B-X3B-HR)) +
156 | (run2B*X2B) + (run3B*X3B) + (1.4*HR) + (runSB*SB) - (runCS*CS)) / (BB-IBB+HBP+H)) %>%
157 | # Calculate league wOBA.
158 | mutate(lg_woba = (H+BB+IBB+HBP) / (AB+BB-IBB+HBP+SF)) %>%
159 | # Calculate wOBA scale.
160 | mutate(woba_scale = 1/(runPlus+runMinus)) %>%
161 | # wOBA hit-event modifiers.
162 | mutate(wBB = (runBB+runMinus)*woba_scale, wHBP = (runHBP+runMinus)*woba_scale,
163 | w1B = (run1B+runMinus)*woba_scale, w2B = (run2B+runMinus)*woba_scale,
164 | w3B = (run3B+runMinus)*woba_scale, wHR = (runHR+runMinus)*woba_scale,
165 | wSB = runSB*woba_scale, wCS = runCS*woba_scale)
166 | }
167 | }
168 | return(runsBatting)
169 | }
170 |
--------------------------------------------------------------------------------
/R/battingStats.R:
--------------------------------------------------------------------------------
1 |
2 | #' @title Batting: Calculate batting average
3 | #' @description Find batting average for batters with more than zero at bats.
4 | #' Required fields from the Batting table are; "AB", and "H."
5 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
6 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
7 | #' @keywords BA base on ball percentage bb
8 | #' @family Batting functions
9 | #' @export BA
10 | #' @examples
11 | #'
12 | #' data("Batting2016")
13 | #' head(Batting2016)
14 | #'
15 | #' Batting2016$BA <- BA(Batting2016)
16 | #'
17 | BA <- function (dat=NULL){
18 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
19 |
20 | if(!all(c("AB", "H") %in% colnames(dat))) {
21 | message("Not enough data to calculate. Please make sure your data inclueds 'H', and 'AB'")
22 | }
23 | ifelse(dat$AB > 0, BA <- round((dat$H/dat$AB), 3), NA)
24 | return(BA)
25 | }
26 |
27 | #' @title Batting: Calculate batting average on balls in play (BABIP)
28 | #' @description Find BABIP for batters with more than zero at bats.
29 | #' Required fields from the Batting table are; "AB", "BB", "H", "HBP", "SF", "SH", "HR" and "SO."
30 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
31 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
32 | #' @keywords BABIP base on ball percentage bb
33 | #' @family Batting functions
34 | #' @export BABIP
35 | #' @examples
36 | #'
37 | #' data("Batting2016")
38 | #' head(Batting2016)
39 | #'
40 | #' Batting2016$BABIP <- BABIP(Batting2016)
41 | #'
42 | BABIP <- function (dat=NULL){
43 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
44 |
45 | if(!all(c("AB", "SO", "H", "SF") %in% colnames(dat))) {
46 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'SO', 'H' and 'SF'")
47 | }
48 |
49 | ifelse(dat$AB > 0, BABIP <- round(((dat$H-dat$HR)/(dat$AB-dat$SO-dat$HR+dat$SF)), 3), NA)
50 |
51 | return(BABIP)
52 | }
53 |
54 | #' @title Batting: Calculate base on ball percentage
55 | #' @description Find base on ball percentage for batters with more than zero at bats.
56 | #' Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH."
57 | #' Intentional base on balls (IBB) is added for the years that metric is available.
58 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
59 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
60 | #' @keywords BBpct base on ball percentage bb
61 | #' @family Batting functions
62 | #' @export BBpct
63 | #' @examples
64 | #'
65 | #' data("Batting2016")
66 | #' head(Batting2016)
67 | #'
68 | #' Batting2016$BBpct <- BBpct(Batting2016)
69 | #'
70 | BBpct <- function (dat=NULL){
71 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
72 |
73 | if (!all(c("AB", "BB", "HBP", "SF", "SH") %in% names(dat))){
74 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'BB', 'IBB', 'HBP', 'SF', and 'SH'")
75 | }
76 |
77 | ifelse(dat$AB > 0, BBpct <- round((dat$BB/(dat$AB+dat$BB+dat$HBP+dat$SF+dat$SH)) * 100, 3) , NA)
78 | return(BBpct)
79 | }
80 |
81 | #' @title Batting: Calculate a batter's contact rate
82 | #' @description Find the contact rate for batters.
83 | #' Required fields from the batting table are "AB" and "SO."
84 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
85 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
86 | #' @keywords CTpct contact rate
87 | #' @family Batting functions
88 | #' @export CTpct
89 | #' @examples
90 | #'
91 | #' data("Batting2016")
92 | #' head(Batting2016)
93 | #'
94 | #' Batting2016$CTpct <- CTpct(Batting2016)
95 | #'
96 | CTpct <- function (dat=NULL){
97 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
98 |
99 | if (!all(c("AB", "SO") %in% names(dat))){
100 | message("Not enough data to calculate. Please make sure your data inclueds 'AB' and 'SO'")
101 | }
102 |
103 | ifelse(dat$AB > 0, CTpct <- round(((dat$AB-dat$SO)/dat$AB) * 100, 3), NA)
104 | return(CTpct)
105 | }
106 |
107 | #' @title Batting: Calculate home run percentage
108 | #' @description Find home run percentage for batters with more than zero at bats.
109 | #' Required fields from the Batting table are "AB" and "HR."
110 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
111 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
112 | #' @keywords HRpct home run percentage
113 | #' @family Batting functions
114 | #' @export HRpct
115 | #' @examples
116 | #'
117 | #' data("Batting2016")
118 | #' head(Batting2016)
119 | #'
120 | #' Batting2016$HRpct <- HRpct(Batting2016)
121 | #'
122 | HRpct <- function (dat=NULL){
123 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
124 |
125 | if (!all(c("AB", "HR") %in% names(dat))){
126 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'HR'")
127 | }
128 |
129 | ifelse(dat$HR > 0, HRpct <- round(dat$HR/dat$AB * 100, 3), NA)
130 | return(HRpct)
131 | }
132 |
133 | #' @title Batting: Calculate ISO for batters
134 | #' @description Find isolated power (ISO) for batters with more than zero at bats.
135 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"."
136 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
137 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
138 | #' @keywords ISO isolated power
139 | #' @family Batting functions
140 | #' @export ISO
141 | #' @examples
142 | #'
143 | #' data("Batting2016")
144 | #' head(Batting2016)
145 | #'
146 | #' Batting2016$ISO <- ISO(Batting2016)
147 | #'
148 | ISO <- function (dat=NULL){
149 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
150 |
151 | if (!all(c("AB", "X2B", "X3B", "HR") %in% names(dat))){
152 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'X2B', 'X3B' and 'HR'")
153 | }
154 |
155 | ifelse(dat$AB > 0,
156 | ISO <- round((((dat$H-dat$X2B-dat$X3B-dat$HR) + (dat$X2B*2) + (dat$X3B*3) + (dat$HR*4))/dat$AB)-dat$H/dat$AB, 3), NA)
157 | return(ISO)
158 | }
159 |
160 | #' @title Batting: Calculate strikeout percentage
161 | #' @description Find strikeout percentage for batters with more than zero at bats.
162 | #' Required fields from the Batting table are; "AB", "SO", "BB", "HBP", "SF", and "SH."
163 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
164 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
165 | #' @keywords Kpct strikeout percentage
166 | #' @family Batting functions
167 | #' @export Kpct
168 | #' @examples
169 | #'
170 | #' data("Batting2016")
171 | #' head(Batting2016)
172 | #'
173 | #' Batting2016$Kpct <- Kpct(Batting2016)
174 | #'
175 | Kpct <- function (dat=NULL){
176 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
177 |
178 | if (!all(c("AB", "SO", "BB", "HBP", "SF", "SH") %in% names(dat))){
179 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'SO', 'BB', 'HBP', 'SF', and 'SH'")
180 | }
181 |
182 | ifelse(dat$SO > 0,
183 | Kpct <- round((dat$SO / (dat$AB + dat$BB + dat$HBP + dat$SF + dat$SH)) * 100, 3), NA)
184 | return(Kpct)
185 | }
186 |
187 | #' @title Batting: Calculate on base percentage (OBP)
188 | #' @description Find the OBP for batters with more than zero hits.
189 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"."
190 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
191 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
192 | #' @keywords OBP on base percentage
193 | #' @family Batting functions
194 | #' @export OBP
195 | #' @examples
196 | #'
197 | #' data("Batting2016")
198 | #' head(Batting2016)
199 | #'
200 | #' Batting2016$OBP <- OBP(Batting2016)
201 | #'
202 | OBP <- function (dat=NULL){
203 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
204 |
205 | if (!all(c("H", "BB", "HBP", "AB", "SF") %in% names(dat))){
206 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'AB', 'BB', 'HBP' and 'SF'")
207 | }
208 |
209 | ifelse(dat$H > 0,
210 | OBP <- round((dat$H+dat$BB+dat$HBP)/(dat$AB+dat$BB+dat$HBP+dat$SF), 3), NA)
211 | return(OBP)
212 | }
213 |
214 | #' @title Batting: Calculate on base percentage plus slugging (OPS)
215 | #' @description Find the OPS for batters with more than zero hits.
216 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR", "BB", "HBP", "AB" and "SF."
217 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
218 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
219 | #' @keywords OPS on base percentage
220 | #' @family Batting functions
221 | #' @export OPS
222 | #' @examples
223 | #'
224 | #' data("Batting2016")
225 | #' head(Batting2016)
226 | #'
227 | #' Batting2016$OPS <- OPS(Batting2016)
228 | #'
229 | OPS <- function (dat=NULL){
230 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
231 |
232 | if (!all(c("H", "BB", "HBP", "AB", "SF", "X2B", "X3B", "HR", "AB") %in% names(dat))){
233 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'AB', 'BB', 'SF', 'X2B', 'X3B', and 'HR'")
234 | }
235 |
236 | ifelse(dat$H > 0,
237 | OPS <- round((dat$H+dat$BB+dat$HBP) / (dat$AB+dat$BB+dat$HBP+dat$SF) +
238 | ((dat$H-dat$X2B-dat$X3B-dat$HR) + (dat$X2B*2) + (dat$X3B*3) + (dat$HR*4))/dat$AB, 3), NA)
239 | return(OPS)
240 | }
241 |
242 | #' @title Batting: Calculate plate appearances for batters
243 | #' @description Find the plate appearances (PA) for batters.
244 | #' Required fields from the batting table are "AB", "BB", "HBP", "SH", and "SF."
245 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
246 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
247 | #' @keywords PA on base percentage
248 | #' @family Batting functions
249 | #' @export PA
250 | #' @examples
251 | #'
252 | #' data("Batting2016")
253 | #' head(Batting2016)
254 | #'
255 | #' Batting2016$PA <- PA(Batting2016)
256 | #'
257 | PA <- function (dat=NULL){
258 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
259 |
260 | if (!all(c("AB", "BB", "HBP", "SF") %in% names(dat))){
261 | message("Not enough data to calculate. Please make sure your data inclueds AB', 'BB', 'HBP', and 'SF'")
262 | }
263 |
264 | ifelse(dat$AB >= 0,
265 | PA <- dat$AB+dat$BB+dat$HBP+dat$SF)
266 | return(PA)
267 | }
268 |
269 | #' @title Batting: Calculate extra base percentage
270 | #' @description Find extra base percentage for batters with more than zero at bats.
271 | #' Required fields from the batting table are "AB", "BB", "HBP", "SF", "SH", "X2B", "X3B", "HR"."
272 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
273 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
274 | #' @keywords XBHpct extra base percentage
275 | #' @family Batting functions
276 | #' @export XBHpct
277 | #' @examples
278 | #'
279 | #' data("Batting2016")
280 | #' head(Batting2016)
281 | #'
282 | #' Batting2016$XBHpct <- XBHpct(Batting2016)
283 | #'
284 | XBHpct <- function (dat=NULL){
285 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
286 |
287 | if (!all(c("AB", "BB", "HBP", "SF", "SH", "X2B", "X3B", "HR") %in% names(dat))){
288 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'BB', 'HBP', 'SF', 'SH', 'X2B', 'X2B' and 'HR'")
289 | }
290 |
291 | ifelse(dat$AB > 0,
292 | XBHpct <- round(((dat$X2B+dat$X3B+dat$HR)/(dat$AB + dat$BB + dat$HBP + dat$SF + dat$SH)) * 100, 3), NA)
293 | return(XBHpct)
294 | }
295 |
296 | #' @title Batting: Calculate extra base per hit
297 | #' @description Find the average extra bases per hit for batters with more than zero hits.
298 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"."
299 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
300 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
301 | #' @keywords XBperH extra base per hit
302 | #' @family Batting functions
303 | #' @export XBperH
304 | #' @examples
305 | #'
306 | #' data("Batting2016")
307 | #' head(Batting2016)
308 | #'
309 | #' Batting2016$XBperH <- XBperH(Batting2016)
310 | #'
311 | XBperH <- function (dat=NULL){
312 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
313 |
314 | if (!all(c("H", "X2B", "X3B", "HR") %in% names(dat))){
315 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'X2B', 'X3B' and 'HR'")
316 | }
317 |
318 | ifelse(dat$H > 0,
319 | XBperH <- round(((dat$X2B+dat$X3B+dat$HR)/(dat$H)) * 100, 3), NA)
320 |
321 | return(XBperH)
322 | }
323 |
324 | #' @title Batting: Calculate Runs Created using the basic formula.
325 | #' @description Find the runs created using the basic formula presented by Bill James in the late 1970s.
326 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", and "HR."
327 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
328 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
329 | #' @keywords RCbasic extra base per hit
330 | #' @family Batting functions
331 | #' @export RCbasic
332 | #' @examples
333 | #'
334 | #' data("Batting2016")
335 | #' head(Batting2016)
336 | #'
337 | #' Batting2016$RCbasic <- RCbasic(Batting2016)
338 | #'
339 | RCbasic <- function (dat=NULL){
340 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
341 |
342 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR") %in% names(dat))){
343 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B', and 'HR.'")
344 | }
345 |
346 | ifelse(dat$AB > 0,
347 | RCbasic <- ((dat$H+dat$BB)*(dat$H+2*dat$X2B+3*dat$X3B+4*dat$HR)/(dat$AB+dat$BB)), NA)
348 | return(RCbasic)
349 | }
350 |
351 | #' @title Batting: Calculate Runs Created using the technical formula.
352 | #' @description The "Technical Version" is the most well-known formula for RC. It adds several factors to the
353 | #' basic formula such as sacrifice hits, stolen bases and intentional base on balls.
354 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS",
355 | #' "SF" and "SH," and "IBB."
356 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
357 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
358 | #' @keywords RCtech extra base per hit
359 | #' @family Batting functions
360 | #' @export RCtech
361 | #' @examples
362 | #'
363 | #' data("Batting2016")
364 | #' head(Batting2016)
365 | #'
366 | #' Batting2016$RCtech <- RCtech(Batting2016)
367 | #'
368 | RCtech <- function (dat=NULL){
369 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
370 |
371 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP",
372 | "SB", "CS", "SF", "SH", "IBB") %in% names(dat))){
373 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n
374 | 'HR', 'GIDP', 'HBP', 'SB', 'CS', 'SF', 'SH', and 'IBB.'")
375 | }
376 | X1B <- dat$H-dat$X2B-dat$X3B-dat$HR
377 | TB <- X1B + 2*dat$X2B + 3*dat$X3B + 4*dat$HR
378 | ifelse(dat$AB > 0,
379 | RCtech <- (((dat$H+dat$BB-dat$CS+dat$HBP-dat$GIDP)*
380 | (TB+(.26*(dat$BB-dat$IBB+dat$HBP))) + (.52*(dat$SH+dat$SF+dat$SB)))/
381 | (dat$AB+dat$BB+dat$HBP+dat$SH+dat$SF)), NA)
382 | return(RCtech)
383 | }
384 |
385 | #' @title Batting: Calculate Runs Created using the updated 2002 formula.
386 | #' @description The "2002 Version" is an updated version of the "Technical Version" by Bill James.
387 | #' The 2002 RC uses the same counting stats as the Technical Version but applies weights to many of the raw stats.
388 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP", "SB", "CS",
389 | #' "SF" and "SH," "SO", and "IBB."
390 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
391 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
392 | #' @keywords RC2002 extra base per hit
393 | #' @family Batting functions
394 | #' @export RC2002
395 | #' @examples
396 | #'
397 | #' data("Batting2016")
398 | #' head(Batting2016)
399 | #'
400 | #' Batting2016$RC2002 <- RC2002(Batting2016)
401 | #'
402 | RC2002 <- function (dat=NULL){
403 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
404 |
405 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "GIDP", "HBP",
406 | "SB", "CS", "SF", "SH", "IBB", "SO") %in% names(dat))){
407 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n
408 | 'HR', 'GIDP', 'HBP', 'SB', 'CS', 'SF', 'SH', 'SO', and 'IBB.'")
409 | }
410 |
411 | X1B <- dat$H-dat$X2B-dat$X3B-dat$HR
412 | OnBaseFact <- dat$H+dat$BB-dat$CS+dat$HBP-dat$GIDP
413 | AdvanceFact <- (1.25*X1B)+(1.69*dat$X2B)+(3.02*dat$X3B)+(3.73*dat$HR)+0.29*(dat$BB-dat$IBB+dat$HBP)+
414 | 0.492*(dat$SH+dat$SF+dat$SB)-(0.04*dat$SO)
415 | OpportunityFact <- dat$AB+dat$BB+dat$HBP+dat$SH+dat$SF
416 | ifelse(dat$AB > 0,
417 | RC2002 <- (((((2.4*OpportunityFact)+OnBaseFact)*((3*OpportunityFact)+AdvanceFact))/
418 | (9*OpportunityFact))-(0.9*OpportunityFact)), NA)
419 | return(RC2002)
420 | }
421 |
422 | #' @title Batting: Calculate slugging percentage (SLG)
423 | #' @description Find the SLG for batters with more than zero hits.
424 | #' Required fields from the batting table are "H", "X2B", "X3B", "HR"."
425 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
426 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
427 | #' @keywords SLG on base percentage
428 | #' @family Batting functions
429 | #' @export SLG
430 | #' @examples
431 | #'
432 | #' data("Batting2016")
433 | #' head(Batting2016)
434 | #'
435 | #' Batting2016$SLG <- SLG(Batting2016)
436 | #'
437 | SLG <- function (dat=NULL){
438 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
439 |
440 | if (!all(c("H", "X2B", "X3B", "AB", "HR") %in% names(dat))){
441 | message("Not enough data to calculate. Please make sure your data inclueds 'H', 'AB', 'X2B', 'X3B' and 'HR'")
442 | }
443 |
444 | ifelse(dat$H > 0,
445 | SLG <- round(((dat$H-dat$X2B-dat$X3B-dat$HR) + (dat$X2B*2) + (dat$X3B*3) + (dat$HR*4))/dat$AB, 3), NA)
446 | return(SLG)
447 | }
448 |
449 | #' @title Batting: Calculate a batter's total bases
450 | #' @description Find total bases.
451 | #' Required fields from the batting table are "AB","H", "X2B", "X3B" and "HR."
452 | #' @param dat A data frame you would wish to calculate. The data frame must have the same column names found in
453 | #' The \code{Lahman} package or the Chadwick Bureau GitHub repository.
454 | #' @keywords TBs total bases
455 | #' @family Batting functions
456 | #' @export TBs
457 | #' @examples
458 | #'
459 | #' data("Batting2016")
460 | #' head(Batting2016)
461 | #'
462 | #' Batting2016$TBs <- TBs(Batting2016)
463 | #'
464 | TBs <- function (dat=NULL){
465 | ifelse(is.null(dat), message("Please supply a valid data frame."), dat <- dat)
466 |
467 | if (!all(c("H", "X2B", "X3B", "HR") %in% names(dat))){
468 | message("Not enough data to calculate. Please make sure your data inclueds 'AB','H', 'X2B', 'X3B' and 'HR'")
469 | }
470 |
471 | ifelse(dat$AB > 0,
472 | TBs <- round(((dat$H)+(2*dat$X2B)+(3*dat$X3B)+(4*dat$HR)), 3), NA)
473 | return(TBs)
474 | }
475 |
476 | #' @title Batting: Calculate Weighted On-Base Average (wOBA)
477 | #' @description Find the wOBA for all players with one or more hits for a particular season.
478 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB."
479 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
480 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
481 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
482 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
483 | #' @param FieldingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
484 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
485 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
486 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
487 | #' park factors between the American and National leagues. The default for this argument is FALSE.
488 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
489 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wOBA for any player prior to 1954.
490 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
491 | #' If FALSE, the wOBA calculation will return NaN for years with missing data.
492 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. If FALSE the function will use the internal
493 | #' formula adapted from Tom Tango's original wOBA formula. Note, the internal formula is typically identical to Fangraphs and
494 | #' does not require an external download. If not specified, the default is set to FALSE.
495 | #' @keywords wOBA Weighted On-Base Average
496 | #' @family Batting functions
497 | #' @import dplyr
498 | #' @export wOBA
499 | #' @examples
500 | #'
501 | #' data("Batting2016")
502 | #' head(Batting2016)
503 | #' data("Pitching2016")
504 | #' head(Pitching2016)
505 | #' data("Fielding2016")
506 | #' head(Fielding2016)
507 | #'
508 | #' Batting2016$wOBA <- wOBA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE,
509 | #' NA_to_zero=TRUE, Sep.Leagues=FALSE)
510 | #'
511 | wOBA <- function (BattingTable=NULL, PitchingTable=NULL, FieldingTable=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){
512 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){
513 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...")
514 | Fangraphs=FALSE
515 | }
516 |
517 | dat <- BattingTable
518 | wOBA_values <- wOBA_values(BattingTable, PitchingTable, FieldingTable, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues)
519 |
520 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB") %in% names(dat))){
521 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n
522 | 'HR', 'HBP', 'SF', and 'IBB.'")
523 | }
524 |
525 | if (isTRUE(NA_to_zero)){
526 | dat <- mutate(dat, SF=ifelse(is.na(SF),0,SF), IBB=ifelse(is.na(IBB),0,IBB), HBP=ifelse(is.na(HBP),0,HBP))
527 | }
528 |
529 | if(isTRUE(Sep.Leagues)){
530 | wOBA_values <- wOBA_values[, c("yearID", "lgID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR")]
531 | dat <- left_join(dat, wOBA_values, by=c("yearID", "lgID"))
532 |
533 | } else {
534 | wOBA_values <- wOBA_values[, c("yearID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR")]
535 | dat <- left_join(dat, wOBA_values, by="yearID")
536 | }
537 |
538 | ifelse(dat$H > 0,
539 | wOBA <- (dat$wBB*(dat$BB-dat$IBB) + dat$wHBP*dat$HBP + dat$w1B*(dat$H-dat$X2B-dat$X3B-dat$HR) +
540 | dat$w2B*dat$X2B + dat$w3B*dat$X3B + dat$wHR*dat$HR)/
541 | (dat$AB+(dat$BB-dat$IBB)+dat$SF+dat$HBP) , NA)
542 | return(wOBA)
543 | }
544 |
545 |
546 | #' @title Batting: Calculate Weighted Runs Above Average (wRAA)
547 | #' @description Find the wRAA for all players with one or more hits for a particular season.
548 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB."
549 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
550 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
551 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
552 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
553 | #' @param FieldingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
554 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
555 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
556 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
557 | #' park factors between the American and National leagues. The default for this argument is FALSE.
558 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
559 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wRAA for any player prior to 1954.
560 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
561 | #' If FALSE, the wRAA calculation will return NaN for years with missing data.
562 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRAA
563 | #' calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula.
564 | #' This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRAA should be expected.
565 | #' The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE.
566 | #' @keywords wRAA Weighted Runs Above Average
567 | #' @family Batting functions
568 | #' @import dplyr
569 | #' @export wRAA
570 | #' @examples
571 | #'
572 | #' data("Batting2016")
573 | #' head(Batting2016)
574 | #' data("Pitching2016")
575 | #' head(Pitching2016)
576 | #' data("Fielding2016")
577 | #' head(Fielding2016)
578 | #'
579 | #' Batting2016$wRAA <- wRAA(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE,
580 | #' NA_to_zero=TRUE, Sep.Leagues=FALSE)
581 | #'
582 | wRAA <- function (BattingTable=NULL, PitchingTable=NULL, FieldingTable=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){
583 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){
584 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...")
585 | Fangraphs=FALSE
586 | }
587 | dat <- BattingTable
588 |
589 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB") %in% names(dat))){
590 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n
591 | 'HR', 'HBP', 'SF', and 'IBB.'")
592 | }
593 |
594 | wOBA_values <- wOBA_values(BattingTable, PitchingTable, FieldingTable, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues)
595 |
596 | if (isTRUE(NA_to_zero)){
597 | dat <- mutate(dat, SF=ifelse(is.na(SF),0,SF), IBB=ifelse(is.na(IBB),0,IBB), HBP=ifelse(is.na(HBP),0,HBP))
598 | }
599 | if(isTRUE(Sep.Leagues)){
600 | wOBA_values <- wOBA_values[, c("yearID", "lgID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba")]
601 | dat <- left_join(dat, wOBA_values, by=c("yearID", "lgID"))
602 | } else {
603 | wOBA_values <- wOBA_values[, c("yearID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba")]
604 | dat <- left_join(dat, wOBA_values, by="yearID")
605 | }
606 |
607 | ifelse(dat$H > 0,
608 | dat$wOBA <- (dat$wBB*(dat$BB-dat$IBB) + dat$wHBP*dat$HBP + dat$w1B*(dat$H-dat$X2B-dat$X3B-dat$HR) +
609 | dat$w2B*dat$X2B + dat$w3B*dat$X3B + dat$wHR*dat$HR)/
610 | (dat$AB+(dat$BB-dat$IBB)+dat$SF+dat$HBP), NA)
611 |
612 | ifelse(dat$H > 0,
613 | wRAA <- ((dat$wOBA-dat$lg_woba) / dat$woba_scale * (dat$AB+dat$BB+dat$HBP+dat$SF)), NA)
614 | return(wRAA)
615 | }
616 |
617 |
618 | #' @title Batting: Calculate Weighted Runs Created (wRC)
619 | #' @description Find the wRC for all players with one or more hits for a particular season.
620 | #' Required fields from the batting table are "AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB."
621 | #' @param BattingTable A full batting table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
622 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
623 | #' @param PitchingTable A full pitching table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
624 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
625 | #' @param FieldingTable A full fielding table from the \code{Lahman} package or the Chadwick Bureau GitHub repository.
626 | #' Any subsetting or removal of players will affect your results. All players for each year are recommended.
627 | #' @param Sep.Leagues If TRUE the algorithm will calculate different run environments for the National and American leagues. Grouping
628 | #' the leagues can solve problems introduced by the designated hitter and hitting pitchers. It also serves to further isolate for
629 | #' park factors between the American and National leagues. The default for this argument is FALSE.
630 | #' @param NA_to_zero If TRUE this will replace NAs with 0 for years that certain stats were not counted. For example, sacrifice hits
631 | #' were not a counted statistic until 1954, therefore we are technically unable to calculate wRC for any player prior to 1954.
632 | #' The default is set to TRUE. Even though this is bad practice mathematically, many in the sabermetrics community accept the practice.
633 | #' If FALSE, the wRC calculation will return NaN for years with missing data.
634 | #' @param Fangraphs If TRUE the function will download wOBA values from Fangraphs. Both wOBA scale and league wOBA are used in the wRC
635 | #' calculation. If FALSE the function will use the internal wOBA algorithm, which is adapted from Tom Tango's original wOBA formula.
636 | #' This algorithm produces a slightly different wOBA scale than the Fangraphs wOBA scale, so variations in wRC should be expected.
637 | #' The default internal method does not require an external download from Fangraphs. If not specified, the default is set to FALSE.
638 | #' @keywords wRC Weighted Runs Above Average
639 | #' @family Batting functions
640 | #' @import dplyr
641 | #' @export wRC
642 | #' @examples
643 | #'
644 | #' data("Batting2016")
645 | #' head(Batting2016)
646 | #' data("Pitching2016")
647 | #' head(Pitching2016)
648 | #' data("Fielding2016")
649 | #' head(Fielding2016)
650 | #'
651 | #' Batting2016$wRC <- wRC(Batting2016, Pitching2016, Fielding2016, Fangraphs=FALSE,
652 | #' NA_to_zero=TRUE, Sep.Leagues=FALSE)
653 | #'
654 | wRC <- function (BattingTable=NULL, PitchingTable=NULL, FieldingTable=NULL, Fangraphs=FALSE, NA_to_zero=TRUE, Sep.Leagues=FALSE){
655 | if(isTRUE(Sep.Leagues) & isTRUE(Fangraphs)){
656 | print("The Fangraphs Guts table does not sperate wOBA by league. Applying the default calculation...")
657 | Fangraphs=FALSE
658 | }
659 | dat <- BattingTable
660 |
661 | if (!all(c("AB", "H", "BB", "X2B", "X3B", "HR", "HBP", "SF", "IBB") %in% names(dat))){
662 | message("Not enough data to calculate. Please make sure your data inclueds 'AB', 'H', 'BB', 'X2B', 'X3B',\n
663 | 'HR', 'HBP', 'SF', and 'IBB.'")
664 | }
665 |
666 | wOBA_values <- wOBA_values(BattingTable, PitchingTable, FieldingTable, Fangraphs=Fangraphs, Sep.Leagues=Sep.Leagues)
667 |
668 | if (!isTRUE(Fangraphs)) {
669 | wOBA_values$lg_r_pa <- wOBA_values$R / (wOBA_values$AB+wOBA_values$BB+wOBA_values$HBP+wOBA_values$SF)
670 | }
671 | if (isTRUE(NA_to_zero)){
672 | dat <- mutate(dat, SF=ifelse(is.na(SF),0,SF), IBB=ifelse(is.na(IBB),0,IBB), HBP=ifelse(is.na(HBP),0,HBP))
673 | }
674 | if(isTRUE(Sep.Leagues)){
675 | wOBA_values <- wOBA_values[, c("yearID", "lgID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba", "lg_r_pa")]
676 | dat <- left_join(dat, wOBA_values, by=c("yearID", "lgID"))
677 | } else {
678 | wOBA_values <- wOBA_values[, c("yearID", "wBB", "wHBP", "w1B", "w2B", "w3B", "wHR", "woba_scale", "lg_woba", "lg_r_pa")]
679 | dat <- left_join(dat, wOBA_values, by="yearID")
680 | }
681 |
682 | ifelse(dat$H > 0,
683 | dat$wOBA <- (dat$wBB*(dat$BB-dat$IBB) + dat$wHBP*dat$HBP + dat$w1B*(dat$H-dat$X2B-dat$X3B-dat$HR) +
684 | dat$w2B*dat$X2B + dat$w3B*dat$X3B + dat$wHR*dat$HR)/
685 | (dat$AB+(dat$BB-dat$IBB)+dat$SF+dat$HBP), NA)
686 |
687 | ifelse(dat$H > 0,
688 | wRC <- ((((dat$wOBA-dat$lg_woba) / dat$woba_scale) + dat$lg_r_pa) * (dat$AB+dat$BB+dat$HBP+dat$SF)), NA)
689 | return(wRC)
690 | }
691 |
692 |
693 |
694 |
695 |
--------------------------------------------------------------------------------