├── .github
    ├── .gitignore
    └── workflows
    │   ├── pkgdown.yaml
    │   └── R-CMD-check.yaml
├── R
    ├── ucidata.R
    ├── car_eval_docs.R
    ├── autompg_docs.R
    ├── bridges_docs.R
    ├── glass_docs.R
    ├── hepatitis_docs.R
    ├── wine_docs.R
    ├── bcw_original_docs.R
    ├── abalone_docs.R
    ├── forest_fires_docs.R
    ├── adult_docs.R
    ├── heart_disease_processed_docs.R
    ├── autoimports_docs.R
    └── bike_sharing_daily_docs.R
├── NAMESPACE
├── data
    ├── wine.rda
    ├── adult.rda
    ├── glass.rda
    ├── abalone.rda
    ├── autompg.rda
    ├── bridges.rda
    ├── car_eval.rda
    ├── hepatitis.rda
    ├── autoimports.rda
    ├── bcw_original.rda
    ├── forest_fires.rda
    ├── heart_disease_ch.rda
    ├── heart_disease_cl.rda
    ├── heart_disease_hu.rda
    ├── heart_disease_va.rda
    └── bike_sharing_daily.rda
├── .gitignore
├── _pkgdown.yml
├── .Rbuildignore
├── data-raw
    ├── forest_fires_build.R
    ├── autompg_build.R
    ├── car_eval_build.R
    ├── abalone_build.R
    ├── adult_build.R
    ├── bridges_build.R
    ├── wine_build.R
    ├── autoimports_build.R
    ├── glass_build.R
    ├── hepatitis_build.R
    ├── bcw_original_build.R
    ├── bike_sharing_daily_build.R
    └── heart_disease_build.R
├── ucidata.Rproj
├── DESCRIPTION
├── cran-comments.md
├── man
    ├── ucidata-package.Rd
    ├── car_eval.Rd
    ├── autompg.Rd
    ├── glass.Rd
    ├── bridges.Rd
    ├── wine.Rd
    ├── bcw_original.Rd
    ├── hepatitis.Rd
    ├── abalone.Rd
    ├── forest_fires.Rd
    ├── adult.Rd
    ├── heart_disease.Rd
    ├── autoimports.Rd
    └── bike_sharing_daily.Rd
├── NEWS.md
├── README.md
└── README.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/R/ucidata.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | 


--------------------------------------------------------------------------------
/data/wine.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/wine.rda


--------------------------------------------------------------------------------
/data/adult.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/adult.rda


--------------------------------------------------------------------------------
/data/glass.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/glass.rda


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | docs
7 | 


--------------------------------------------------------------------------------
/data/abalone.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/abalone.rda


--------------------------------------------------------------------------------
/data/autompg.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/autompg.rda


--------------------------------------------------------------------------------
/data/bridges.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/bridges.rda


--------------------------------------------------------------------------------
/data/car_eval.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/car_eval.rda


--------------------------------------------------------------------------------
/data/hepatitis.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/hepatitis.rda


--------------------------------------------------------------------------------
/data/autoimports.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/autoimports.rda


--------------------------------------------------------------------------------
/data/bcw_original.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/bcw_original.rda


--------------------------------------------------------------------------------
/data/forest_fires.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/forest_fires.rda


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: http://r-pkg.thecoatlessprofessor.com/ucidata/
2 | template:
3 |   bootstrap: 5
4 | 
5 | 


--------------------------------------------------------------------------------
/data/heart_disease_ch.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/heart_disease_ch.rda


--------------------------------------------------------------------------------
/data/heart_disease_cl.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/heart_disease_cl.rda


--------------------------------------------------------------------------------
/data/heart_disease_hu.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/heart_disease_hu.rda


--------------------------------------------------------------------------------
/data/heart_disease_va.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/heart_disease_va.rda


--------------------------------------------------------------------------------
/data/bike_sharing_daily.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coatless-rpkg/ucidata/HEAD/data/bike_sharing_daily.rda


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^data-raw$
 4 | ^README\.Rmd$
 5 | ^README-.*\.png$
 6 | ^\.travis\.yml$
 7 | ^cran-comments\.md$
 8 | ^\.github$
 9 | ^_pkgdown\.yml$
10 | ^docs$
11 | ^pkgdown$
12 | 


--------------------------------------------------------------------------------
/data-raw/forest_fires_build.R:
--------------------------------------------------------------------------------
1 | ### UCI Irvine
2 | ## Forest Fire Data https://archive.ics.uci.edu/ml/datasets/Forest+Fires
3 | 
4 | url_forest_fires = "https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv"
5 | 
6 | forest_fires = read.csv(url_forest_fires, header = TRUE)
7 | 
8 | usethis::use_data(forest_fires, overwrite = TRUE)
9 | 


--------------------------------------------------------------------------------
/ucidata.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/data-raw/autompg_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## autompg Data https://archive.ics.uci.edu/ml/datasets/auto+mpg
 3 | 
 4 | autompg = read.table(
 5 |   "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data",
 6 |   quote = "\"",
 7 |   comment.char = "",
 8 |   stringsAsFactors = FALSE,
 9 |   header = FALSE)
10 | 
11 | colnames(autompg) = c("mpg", "cylinders", "displacement", "horsepower",
12 |                       "weight", "acceleration", "model_year", "origin", "car_name")
13 | 
14 | usethis::use_data(autompg, overwrite = TRUE)
15 | 


--------------------------------------------------------------------------------
/data-raw/car_eval_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## Car Evalutation Data https://archive.ics.uci.edu/ml/datasets/Car+Evaluation
 3 | 
 4 | url_car_eval = "https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"
 5 | 
 6 | car_eval = read.csv(url_car_eval, header = FALSE)
 7 | 
 8 | colnames(car_eval) = c("buying",
 9 |                        "maint",
10 |                        "doors",
11 |                        "persons",
12 |                        "lug_boot",
13 |                        "safety",
14 |                        "class_value")
15 | 
16 | usethis::use_data(car_eval, overwrite = TRUE)
17 | 


--------------------------------------------------------------------------------
/data-raw/abalone_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## Abalone Data https://archive.ics.uci.edu/ml/datasets/Abalone
 3 | 
 4 | url_abalone = "http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"
 5 | 
 6 | abalone = read.csv(url_abalone, header = FALSE)
 7 | 
 8 | colnames(abalone) = c("sex",
 9 |                       "length",
10 |                       "diameter",
11 |                       "height",
12 |                       "whole_weight",
13 |                       "shucked_weight",
14 |                       "viscera_weight",
15 |                       "shell_weight",
16 |                       "rings")
17 | 
18 | # Save dataset
19 | usethis::use_data(abalone, overwrite = TRUE)
20 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: ucidata
 2 | Title: Collection of Datasets from the UC Irvine Machine Learning Repository
 3 | Version: 0.0.3
 4 | Authors@R: person("James", "Balamuta", email = "balamut2@illinois.edu", role = c("aut", "cre"))
 5 | Description: Select datasets from the UC Irvine 
 6 |     Machine Learning Repository that conform to being reasonable in 
 7 |     size while also allowing for a wide variety of visualizations and models
 8 |     to be formed.
 9 | Depends: R (>= 4.1.0)
10 | License: GPL (>= 2)
11 | URL: https://github.com/coatless-rpkg/ucidata, http://r-pkg.thecoatlessprofessor.com/ucidata/
12 | BugReports: https://github.com/coatless-rpkg/ucidata/issues
13 | Encoding: UTF-8
14 | LazyData: true
15 | Roxygen: list(markdown = TRUE)
16 | RoxygenNote: 7.2.3
17 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | 
 3 | - local OS X install, R 3.4.3
 4 | - ubuntu 12.04 (on travis-ci), R 3.4.3
 5 | - win-builder (devel and release)
 6 | 
 7 | ## R CMD check results
 8 | 
 9 | 0 errors | 0 warnings | 1 note
10 | 
11 | Possibly mis-spelled words in DESCRIPTION:
12 |   UC (2:40, 6:66)
13 | 
14 | Found the following (possibly) invalid URLs:
15 |   URL: http://www.r-pkg.org/pkg/ucidata (moved to https://www.r-pkg.org:443/pkg/ucidata)
16 |     From: README.md
17 |     Status: 404
18 |     Message: Not Found
19 | 
20 | - This is a new release of a data package. As a result, the `r-pkg.org` link
21 |   included in the readme has yet to be created. Once approved, this link will
22 |   become active.
23 | - `UC` is the acronym for the University of California network of schools.
24 | 
25 | ## Reverse dependencies
26 | 
27 | This is a new release, so there are no reverse dependencies.
28 | 


--------------------------------------------------------------------------------
/man/ucidata-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ucidata.R
 3 | \docType{package}
 4 | \name{ucidata-package}
 5 | \alias{ucidata}
 6 | \alias{ucidata-package}
 7 | \title{ucidata: Collection of Datasets from the UC Irvine Machine Learning Repository}
 8 | \description{
 9 | Select datasets from the UC Irvine Machine Learning Repository that conform to being reasonable in size while also allowing for a wide variety of visualizations and models to be formed.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/coatless-rpkg/ucidata}
15 |   \item \url{http://r-pkg.thecoatlessprofessor.com/ucidata/}
16 |   \item Report bugs at \url{https://github.com/coatless-rpkg/ucidata/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: James Balamuta \email{balamut2@illinois.edu}
22 | 
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/data-raw/adult_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | # Adult data https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data
 3 | 
 4 | adult = read.csv('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data',
 5 |                  na.strings = "?", fill = FALSE, strip.white = TRUE, header = FALSE)
 6 | 
 7 | colnames(adult) = c('age',
 8 |                     'workclass',
 9 |                     'fnlwgt',
10 |                     'education',
11 |                     'education_num',
12 |                     'marital_status',
13 |                     'occupation',
14 |                     'relationship',
15 |                     'race',
16 |                     'sex',
17 |                     'capital_gain',
18 |                     'capital_loss',
19 |                     'hours_per_week',
20 |                     'native_country',
21 |                     'income')
22 | 
23 | usethis::use_data(adult, overwrite = TRUE)
24 | 


--------------------------------------------------------------------------------
/data-raw/bridges_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | # Pittsburgh Bridges Data https://archive.ics.uci.edu/ml/datasets/Pittsburgh+Bridges
 3 | 
 4 | url_bridges = "https://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1"
 5 | 
 6 | bridges = read.csv(url_bridges,
 7 |                    header = FALSE, na.strings = "?")
 8 | 
 9 | # Columns taken verbatim from ML page
10 | # Regex search with: [0-9]{1,2}\. (.*) / .* / .* / .*
11 | # Replacement: "\1",
12 | var_names = c(
13 |   "IDENTIF",
14 |   "RIVER",
15 |   "LOCATION",
16 |   "ERECTED",
17 |   "PURPOSE",
18 |   "LENGTH",
19 |   "LANES",
20 |   "CLEAR-G",
21 |   "T-OR-D",
22 |   "MATERIAL",
23 |   "SPAN",
24 |   "REL-L",
25 |   "TYPE"
26 | )
27 | 
28 | # Label columns
29 | colnames(bridges) = gsub("-", "_", tolower(var_names))
30 | 
31 | # Switch from numeric to factor:
32 | bridges = within(bridges, {
33 |   lanes = factor(lanes)
34 | })
35 | 
36 | usethis::use_data(bridges, overwrite = TRUE)
37 | 
38 | 


--------------------------------------------------------------------------------
/data-raw/wine_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## Wine Data https://archive.ics.uci.edu/ml/datasets/wine
 3 | 
 4 | # Location of Data Sets
 5 | red_wine_url   = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
 6 | white_wine_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
 7 | 
 8 | # Note the .csv uses a `;` as the separater. Not `,`
 9 | red_wine_data   = read.csv(red_wine_url, sep = ";", header = TRUE)
10 | white_wine_data = read.csv(white_wine_url, sep = ";", header = TRUE)
11 | 
12 | # Load in Red vs. White Data
13 | red_wine_data$color   = "Red"
14 | white_wine_data$color = "White"
15 | 
16 | # Merge the two data sets together
17 | wine = rbind(red_wine_data, white_wine_data)
18 | 
19 | # Convert color into a factor
20 | wine$color = as.factor(wine$color)
21 | 
22 | # Remove periods
23 | colnames(wine) = gsub("\\.", "_", colnames(wine))
24 | 
25 | usethis::use_data(wine, overwrite = TRUE)
26 | 


--------------------------------------------------------------------------------
/R/car_eval_docs.R:
--------------------------------------------------------------------------------
 1 | #' Car Evaluation Data Set
 2 | #'
 3 | #' Car Evaluation Database was derived from a simple hierarchical decision model
 4 | #' originally developed for the demonstration of DEX.
 5 | #'
 6 | #' @format A data frame with 1728 observations on the following 7 variables.
 7 | #' - `buying`
 8 | #'     - vhigh, high, med, low.
 9 | #' - `maint`
10 | #'     - vhigh, high, med, low.
11 | #' - `doors`
12 | #'      - 2, 3, 4, 5more.
13 | #' - `persons`
14 | #'     - 2, 4, more.
15 | #' - `lug_boot`
16 | #'     - small, med, big.
17 | #' - `safety`
18 | #'     - low, med, high.
19 | #' - `class_value`
20 | #'     - unacc, acc, good, vgood
21 | #' @source
22 | #' Marko Bohanec (marko.bohanec '@' ijs.si)
23 | #' Blaz Zupan (blaz.zupan '@' ijs.si)
24 | #' @references
25 | #' M. Bohanec, V. Rajkovic: Expert system for decision making. Sistemica 1(1), pp. 145-157, 1990.)
26 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data>
27 | #' <https://archive.ics.uci.edu/ml/datasets/Car+Evaluation>
28 | "car_eval"
29 | 


--------------------------------------------------------------------------------
/R/autompg_docs.R:
--------------------------------------------------------------------------------
 1 | #' Autompg Data Set
 2 | #'
 3 | #' This dataset is a slightly modified version of the dataset provided in
 4 | #' the StatLib library.  In line with the use by Ross Quinlan (1993) in
 5 | #' predicting the attribute "mpg", 8 of the original instances were removed
 6 | #' because they had unknown values for the "mpg" attribute.
 7 | #' @format A data frame with 398 observations on the following 9 variables.
 8 | #' - `mpg`: continuous
 9 | #' - `cylinders`:     multi-valued discrete
10 | #' - `displacement`:  continuous
11 | #' - `horsepower`:    continuous
12 | #' - `weight`:        continuous
13 | #' - `acceleration`:  continuous
14 | #' - `model_year`:    multi-valued discrete
15 | #' - `origin`:        multi-valued discrete
16 | #' - `car_name`:      string (unique for each instance)
17 | #' @source This dataset was taken from the StatLib library which is
18 | #' maintained at Carnegie Mellon University. The dataset was
19 | #' used in the 1983 American Statistical Association Exposition.
20 | #' @references
21 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.names>
22 | #' <https://archive.ics.uci.edu/ml/datasets/auto+mpg>
23 | "autompg"
24 | 


--------------------------------------------------------------------------------
/data-raw/autoimports_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | # Automobile (Imports) data https://archive.ics.uci.edu/ml/datasets/Automobile
 3 | 
 4 | url_autoimports = "http://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
 5 | 
 6 | autoimports = read.csv(url_autoimports,
 7 |                        header = FALSE, na.strings = "?")
 8 | 
 9 | # Label columns
10 | # Columns taken verbatim from ML page
11 | # Regex search with: [0-9]{1,2}\. (.*):.*
12 | # Replacement: "\1",
13 | 
14 | var_names = c(
15 |   "symboling",
16 |   "normalized-losses",
17 |   "make",
18 |   "fuel-type",
19 |   "aspiration",
20 |   "num-of-doors",
21 |   "body-style",
22 |   "drive-wheels",
23 |   "engine-location",
24 |   "wheel-base",
25 |   "length",
26 |   "width",
27 |   "height",
28 |   "curb-weight",
29 |   "engine-type",
30 |   "num-of-cylinders",
31 |   "engine-size",
32 |   "fuel-system",
33 |   "bore",
34 |   "stroke",
35 |   "compression-ratio",
36 |   "horsepower",
37 |   "peak-rpm",
38 |   "city-mpg",
39 |   "highway-mpg",
40 |   "price"
41 | )
42 | 
43 | var_names_safe = gsub("-", "_", var_names)
44 | 
45 | colnames(autoimports) = var_names_safe
46 | 
47 | usethis::use_data(autoimports, overwrite = TRUE)
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/data-raw/glass_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## Glass Data https://archive.ics.uci.edu/ml/datasets/Glass+Identification
 3 | 
 4 | url_glass = "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data"
 5 | 
 6 | glass = read.csv(url_glass, header = FALSE)
 7 | 
 8 | # Columns taken verbatim from ML page
 9 | # Regex search with: [0-9]{1,2}\. (.*) / .* / .* / .*
10 | # Replacement: "\1",
11 | var_names =  c("ID",
12 |                "RI",
13 |                "Na",
14 |                "Mg",
15 |                "Al",
16 |                "Si",
17 |                "K",
18 |                "Ca",
19 |                "Ba",
20 |                "Fe",
21 |                "Type")
22 | 
23 | # Label column names
24 | colnames(glass) = var_names
25 | 
26 | glass = within(glass, {
27 |   Type = factor(Type, labels = c(
28 |                 "building_windows_float_processed",
29 |                 "building_windows_non_float_processed",
30 |                 "vehicle_windows_float_processed",
31 |                 # "vehicle_windows_non_float_processed", # none in dataset
32 |                 "containers",
33 |                 "tableware",
34 |                 "headlamps"
35 |                 ))
36 | })
37 | 
38 | # Save dataset
39 | usethis::use_data(glass, overwrite = TRUE)
40 | 


--------------------------------------------------------------------------------
/R/bridges_docs.R:
--------------------------------------------------------------------------------
 1 | #' Pittsburgh Bridges Data Set
 2 | #'
 3 | #' Data containing examples of Pittsburgh bridges and the relevant surrounding
 4 | #' area.
 5 | #'
 6 | #' @format A data frame with 108 observations on the following 13 variables.
 7 | #' - `identif`
 8 | #'     - identifier of the examples
 9 | #' - `river`
10 | #'     - A, M, O, Y
11 | #' - `location`
12 | #'     - Location of Bridge
13 | #' - `erected`
14 | #'     - Year built
15 | #' - `purpose`
16 | #'     - WALK, AQUEDUCT, RR, HIGHWAY
17 | #' - `length`
18 | #'     - 804 - 4558
19 | #' - `lanes`
20 | #'     - 1, 2, 4, 6
21 | #' - `clear_g`
22 | #'     - N, G
23 | #' - `t_or_d`
24 | #'     - THROUGH, DECK
25 | #' - `material`
26 | #'     - WOOD, IRON, STEEL
27 | #' - `span`
28 | #'     - SHORT, MEDUIM, LONG
29 | #' - `rel_l`
30 | #'     - S, S-F, F
31 | #' - `type`
32 | #'     - WOOD, SUSPEN, SIMPLE-T, ARCH, CANTILEV, CONT-T
33 | #' @details
34 | #' This data set is non-discretized, meaning the numeric properties
35 | #' were left intact.
36 | #' @source
37 | #' Yoram Reich & Steven J. Fenves
38 | #' Department of Civil Engineering
39 | #' and
40 | #' Engineering Design Research Center
41 | #' Carnegie Mellon University
42 | #' Pittsburgh, PA 15213
43 | #' @references
44 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1>
45 | #' <https://archive.ics.uci.edu/ml/datasets/Pittsburgh+Bridges>
46 | "bridges"
47 | 


--------------------------------------------------------------------------------
/man/car_eval.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/car_eval_docs.R
 3 | \docType{data}
 4 | \name{car_eval}
 5 | \alias{car_eval}
 6 | \title{Car Evaluation Data Set}
 7 | \format{
 8 | A data frame with 1728 observations on the following 7 variables.
 9 | \itemize{
10 | \item \code{buying}
11 | \itemize{
12 | \item vhigh, high, med, low.
13 | }
14 | \item \code{maint}
15 | \itemize{
16 | \item vhigh, high, med, low.
17 | }
18 | \item \code{doors}
19 | \itemize{
20 | \item 2, 3, 4, 5more.
21 | }
22 | \item \code{persons}
23 | \itemize{
24 | \item 2, 4, more.
25 | }
26 | \item \code{lug_boot}
27 | \itemize{
28 | \item small, med, big.
29 | }
30 | \item \code{safety}
31 | \itemize{
32 | \item low, med, high.
33 | }
34 | \item \code{class_value}
35 | \itemize{
36 | \item unacc, acc, good, vgood
37 | }
38 | }
39 | }
40 | \source{
41 | Marko Bohanec (marko.bohanec '@' ijs.si)
42 | Blaz Zupan (blaz.zupan '@' ijs.si)
43 | }
44 | \usage{
45 | car_eval
46 | }
47 | \description{
48 | Car Evaluation Database was derived from a simple hierarchical decision model
49 | originally developed for the demonstration of DEX.
50 | }
51 | \references{
52 | M. Bohanec, V. Rajkovic: Expert system for decision making. Sistemica 1(1), pp. 145-157, 1990.)
53 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data}
54 | \url{https://archive.ics.uci.edu/ml/datasets/Car+Evaluation}
55 | }
56 | \keyword{datasets}
57 | 


--------------------------------------------------------------------------------
/R/glass_docs.R:
--------------------------------------------------------------------------------
 1 | #' Glass Identification Data Set
 2 | #'
 3 | #' The study of classification of types of glass was motivated by criminological
 4 | #' investigation. At the scene of the crime, the glass left can be used as
 5 | #' evidence...if it is correctly identified!
 6 | #'
 7 | #' @format A data frame with 214 observations on the following 11 variables.
 8 | #' - `ID`: 1 to 214
 9 | #' - `RI`: refractive index
10 | #' - `Na`: weight percent in corresponding oxide, as are attributes 4-10)
11 | #' - `Mg`: Magnesium
12 | #' - `Al`: Aluminum
13 | #' - `Si`: Silicon
14 | #' - `K`: Potassium
15 | #' - `Ca`: Calcium
16 | #' - `Ba`: Barium
17 | #' - `Fe`: Iron
18 | #' - `Type`: Class attribute
19 | #'    - 1: building_windows_float_processed
20 | #'    - 2: building_windows_non_float_processed
21 | #'    - 3: vehicle_windows_float_processed
22 | #'    - 4: vehicle_windows_non_float_processed (none in this database)
23 | #'    - 5: containers
24 | #'    - 6: tableware
25 | #'    - 7: headlamps
26 | #' @source
27 | #'
28 | #' B. German
29 | #' Central Research Establishment
30 | #' Home Office Forensic Science Service
31 | #' Aldermaston, Reading, Berkshire RG7 4PN
32 | #'
33 | #' Vina Spiehler, Ph.D., DABFT
34 | #' Diagnostic Products Corporation
35 | #' (213) 776-0180 (ext 3014)
36 | #'
37 | #' @references
38 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data>
39 | #' <https://archive.ics.uci.edu/ml/datasets/Glass+Identification>
40 | "glass"
41 | 


--------------------------------------------------------------------------------
/man/autompg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/autompg_docs.R
 3 | \docType{data}
 4 | \name{autompg}
 5 | \alias{autompg}
 6 | \title{Autompg Data Set}
 7 | \format{
 8 | A data frame with 398 observations on the following 9 variables.
 9 | \itemize{
10 | \item \code{mpg}: continuous
11 | \item \code{cylinders}:     multi-valued discrete
12 | \item \code{displacement}:  continuous
13 | \item \code{horsepower}:    continuous
14 | \item \code{weight}:        continuous
15 | \item \code{acceleration}:  continuous
16 | \item \code{model_year}:    multi-valued discrete
17 | \item \code{origin}:        multi-valued discrete
18 | \item \code{car_name}:      string (unique for each instance)
19 | }
20 | }
21 | \source{
22 | This dataset was taken from the StatLib library which is
23 | maintained at Carnegie Mellon University. The dataset was
24 | used in the 1983 American Statistical Association Exposition.
25 | }
26 | \usage{
27 | autompg
28 | }
29 | \description{
30 | This dataset is a slightly modified version of the dataset provided in
31 | the StatLib library.  In line with the use by Ross Quinlan (1993) in
32 | predicting the attribute "mpg", 8 of the original instances were removed
33 | because they had unknown values for the "mpg" attribute.
34 | }
35 | \references{
36 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.names}
37 | \url{https://archive.ics.uci.edu/ml/datasets/auto+mpg}
38 | }
39 | \keyword{datasets}
40 | 


--------------------------------------------------------------------------------
/data-raw/hepatitis_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | # Hepatitis Data http://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis/hepatitis.data
 3 | 
 4 | url_hepatitis = "http://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis/hepatitis.data"
 5 | 
 6 | hepatitis = read.csv(
 7 |   url_hepatitis,
 8 |   header = FALSE, na.strings = "?"
 9 | )
10 | 
11 | # Columns taken verbatim from ML page
12 | # Regex search with: [0-9]{1,2}\. (.*):.*
13 | # Replacement: "\1",
14 | var_names = c(
15 |   "Class",
16 |   "AGE",
17 |   "SEX",
18 |   "STEROID",
19 |   "ANTIVIRALS",
20 |   "FATIGUE",
21 |   "MALAISE",
22 |   "ANOREXIA",
23 |   "LIVER BIG",
24 |   "LIVER FIRM",
25 |   "SPLEEN PALPABLE",
26 |   "SPIDERS",
27 |   "ASCITES",
28 |   "VARICES",
29 |   "BILIRUBIN",
30 |   "ALK PHOSPHATE",
31 |   "SGOT",
32 |   "ALBUMIN",
33 |   "PROTIME",
34 |   "HISTOLOGY"
35 | )
36 | 
37 | var_names_safe = gsub("[[:space:]]", "_", var_names)
38 | 
39 | # Label columns
40 | colnames(hepatitis) = tolower(var_names_safe)
41 | 
42 | # Make into a dichotomous variable marked by a factor
43 | hepatitis[, c(4:14, 20)] = lapply(hepatitis[, c(4:14, 20)], factor, labels = c("No", "Yes"))
44 | 
45 | # Switch to being factor based
46 | hepatitis = within(hepatitis,{
47 |   class = factor(class, labels = c("Die", "Live"))
48 |   sex   = factor(sex, labels = c("Male", "Female"))
49 | })
50 | 
51 | usethis::use_data(hepatitis, overwrite = TRUE)
52 | 
53 | ## output colnames
54 | cat(paste0(colnames(hepatitis),"\n"), sep="")
55 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     permissions:
23 |       contents: write
24 |     steps:
25 |       - uses: actions/checkout@v3
26 | 
27 |       - uses: r-lib/actions/setup-pandoc@v2
28 | 
29 |       - uses: r-lib/actions/setup-r@v2
30 |         with:
31 |           use-public-rspm: true
32 | 
33 |       - uses: r-lib/actions/setup-r-dependencies@v2
34 |         with:
35 |           extra-packages: any::pkgdown, local::.
36 |           needs: website
37 | 
38 |       - name: Build site
39 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
40 |         shell: Rscript {0}
41 | 
42 |       - name: Deploy to GitHub pages 🚀
43 |         if: github.event_name != 'pull_request'
44 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
45 |         with:
46 |           clean: false
47 |           branch: gh-pages
48 |           folder: docs
49 | 


--------------------------------------------------------------------------------
/R/hepatitis_docs.R:
--------------------------------------------------------------------------------
 1 | #' Hepatitis Data Set
 2 | #'
 3 | #' This data set contains information on folks that suffer from hepatitis.
 4 | #'
 5 | #' @format A data frame with 6497 observations (1599 Red and 4898 White) on the following 12 variables.
 6 | #' - `class`
 7 | #'     - Die or Live
 8 | #' - `age`
 9 | #'     - Integer
10 | #' - `sex`
11 | #'     - Male, Female
12 | #' - `steroid`
13 | #'     - No, Yes
14 | #' - `antivirals`
15 | #'     - No, Yes
16 | #' - `fatigue`
17 | #'     - No, Yes
18 | #' - `malaise`
19 | #'     - No, Yes
20 | #' - `anorexia`
21 | #'     - No, Yes
22 | #' - `liver_big`
23 | #'     - No, Yes
24 | #' - `liver_firm`
25 | #'     - No, Yes
26 | #' - `spleen_palpable`
27 | #'     - No, Yes
28 | #' - `spiders`
29 | #'     - No, Yes
30 | #' - `ascites`
31 | #'     - No, Yes
32 | #' - `varices`
33 | #'     - No, Yes
34 | #' - `bilirubin`
35 | #'     - Numeric
36 | #'     - This can also be treated as a factor
37 | #' - `alk_phosphate`
38 | #'     - Integer
39 | #' - `sgot`
40 | #'     - Integer
41 | #' - `albumin`
42 | #'     - Numeric
43 | #' - `protime`
44 | #'     - Integer
45 | #' - `histology`
46 | #'     - No, Yes
47 | #' @source
48 | #' G.Gong (Carnegie-Mellon University) via
49 | #' Bojan Cestnik
50 | #' Jozef Stefan Institute
51 | #' Jamova 39
52 | #' 61000 Ljubljana
53 | #' Yugoslavia (tel.: (38)(+61) 214-399 ext.287)
54 | #'
55 | #' @references
56 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis/hepatitis.data>
57 | #' <https://archive.ics.uci.edu/ml/datasets/hepatitis>
58 | "hepatitis"
59 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/data-raw/bcw_original_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## Breast Cancer Wisonsin (Original) Data https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)
 3 | 
 4 | url_breast_cancer = "http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
 5 | 
 6 | breast_cancer_wis_data = read.csv(url_breast_cancer,
 7 |                                   header = FALSE,            # No header
 8 |                                   na.strings = "?",          # NA strings are `?` in the data (~16)
 9 |                                   stringsAsFactors = FALSE)
10 | 
11 | colnames(breast_cancer_wis_data) = c("sample_code_number",
12 |                                      "clump_thickness",
13 |                                      "uniformity_of_cell_size",
14 |                                      "uniformity_of_cell_shape",
15 |                                      "marginal_adhesion",
16 |                                      "single_epithelial_cell_size",
17 |                                      "bare_nuclei",
18 |                                      "bland_chromatin",
19 |                                      "normal_nucleoli",
20 |                                      "mitoses",
21 |                                      "class")
22 | 
23 | breast_cancer_wis_data = within(breast_cancer_wis_data, {
24 |   factor(class, labels = c("benign", "malignant"))
25 | })
26 | 
27 | bcw_original = breast_cancer_wis_data
28 | 
29 | rm(list="breast_cancer_wis_data")
30 | 
31 | usethis::use_data(bcw_original, overwrite = TRUE)
32 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # ucidata 0.0.3
 2 | 
 3 | ## Features
 4 | 
 5 | - Select datasets from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/index.php)
 6 |   as `data.frame` objects with appropriate type-casts.
 7 | - Accompanying documentation is available via `?dataset`.
 8 | 
 9 | ## Data Sets
10 | 
11 | - [`abalone`](https://archive.ics.uci.edu/ml/datasets/abalone)
12 | - [`adult`](https://archive.ics.uci.edu/ml/datasets/adult)
13 | - [`autoimports`](https://archive.ics.uci.edu/ml/datasets/Automobile)
14 | - [`autompg`](https://archive.ics.uci.edu/ml/datasets/auto+mpg)
15 | - Breast Cancer Wisconsin:
16 |     - [`bcw_original` (Breast Cancer Wisconsin Original)](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original))
17 | - Heart Disease
18 |     - [`heart_disease_cl`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
19 |     - [`heart_disease_hu`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
20 |     - [`heart_disease_va`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
21 |     - [`heart_disease_ch`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
22 | - [`bike_sharing_daily`](https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset)
23 | - [`bridges`](https://archive.ics.uci.edu/ml/datasets/Pittsburgh+Bridges)
24 | - [`car_eval`](https://archive.ics.uci.edu/ml/datasets/Car+Evaluation)
25 | - [`forest_fires`](https://archive.ics.uci.edu/ml/datasets/Forest+Fires)
26 | - [`glass`](https://archive.ics.uci.edu/ml/datasets/Glass+Identification)
27 | - [`hepatitis`](https://archive.ics.uci.edu/ml/datasets/hepatitis)
28 | - [`wine`](https://archive.ics.uci.edu/ml/datasets/wine)
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/R/wine_docs.R:
--------------------------------------------------------------------------------
 1 | #' Wine Data Set
 2 | #'
 3 | #' This data set is the combination of two datasets that were created, using red and white wine samples.
 4 | #' The inputs include objective tests (e.g. PH values) and the output is based on sensory data
 5 | #' (median of at least 3 evaluations made by wine experts). Each expert graded the wine quality
 6 | #' between 0 (very bad) and 10 (very excellent). Several data mining methods were applied to model
 7 | #' these datasets under a regression approach. The support vector machine model achieved the
 8 | #' best results. Several metrics were computed: MAD, confusion matrix for a fixed error tolerance (T),
 9 | #' etc. Also, we plot the relative importances of the input variables (as measured by a sensitivity
10 | #'                                                                     analysis procedure).
11 | #' @format A data frame with 6497 observations (1599 Red and 4898 White) on the following 12 variables.
12 | #' - fixed acidity
13 | #' - volatile acidity
14 | #' - citric acid
15 | #' - residual sugar
16 | #' - chlorides
17 | #' - free sulfur dioxide
18 | #' - total sulfur dioxide
19 | #' - density
20 | #' - pH
21 | #' - sulphates
22 | #' - alcohol
23 | #' - quality
24 | #'     - Score between 0 and 10 based on sensor reading
25 | #' - color
26 | #'     - `"White"` or `"Red"`
27 | #' @source P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
28 | #' Modeling wine preferences by data mining from physicochemical properties.
29 | #' In Decision Support Systems, Elsevier, 47(4):547-553. ISSN: 0167-9236.
30 | #' @references
31 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality.names>
32 | #' <https://archive.ics.uci.edu/ml/datasets/Wine+Quality>
33 | "wine"
34 | 


--------------------------------------------------------------------------------
/man/glass.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/glass_docs.R
 3 | \docType{data}
 4 | \name{glass}
 5 | \alias{glass}
 6 | \title{Glass Identification Data Set}
 7 | \format{
 8 | A data frame with 214 observations on the following 11 variables.
 9 | \itemize{
10 | \item \code{ID}: 1 to 214
11 | \item \code{RI}: refractive index
12 | \item \code{Na}: weight percent in corresponding oxide, as are attributes 4-10)
13 | \item \code{Mg}: Magnesium
14 | \item \code{Al}: Aluminum
15 | \item \code{Si}: Silicon
16 | \item \code{K}: Potassium
17 | \item \code{Ca}: Calcium
18 | \item \code{Ba}: Barium
19 | \item \code{Fe}: Iron
20 | \item \code{Type}: Class attribute
21 | \itemize{
22 | \item 1: building_windows_float_processed
23 | \item 2: building_windows_non_float_processed
24 | \item 3: vehicle_windows_float_processed
25 | \item 4: vehicle_windows_non_float_processed (none in this database)
26 | \item 5: containers
27 | \item 6: tableware
28 | \item 7: headlamps
29 | }
30 | }
31 | }
32 | \source{
33 | B. German
34 | Central Research Establishment
35 | Home Office Forensic Science Service
36 | Aldermaston, Reading, Berkshire RG7 4PN
37 | 
38 | Vina Spiehler, Ph.D., DABFT
39 | Diagnostic Products Corporation
40 | (213) 776-0180 (ext 3014)
41 | }
42 | \usage{
43 | glass
44 | }
45 | \description{
46 | The study of classification of types of glass was motivated by criminological
47 | investigation. At the scene of the crime, the glass left can be used as
48 | evidence...if it is correctly identified!
49 | }
50 | \references{
51 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data}
52 | \url{https://archive.ics.uci.edu/ml/datasets/Glass+Identification}
53 | }
54 | \keyword{datasets}
55 | 


--------------------------------------------------------------------------------
/R/bcw_original_docs.R:
--------------------------------------------------------------------------------
 1 | #' Breast Cancer Wisconsin (Original) Data Set
 2 | #'
 3 | #' Samples arrive periodically as Dr. Wolberg reports his clinical cases.
 4 | #' The database therefore reflects this chronological grouping of the data.
 5 | #'
 6 | #' @format A data frame with 699 observations on the following 11 variables.
 7 | #' - `sample_code_number`: id number
 8 | #' - `clump_thickness`: 1 - 10
 9 | #' - `uniformity_of_cell_size`:  1 - 10
10 | #' - `uniformity_of_cell_shape`: 1 - 10
11 | #' - `single_epithelial_cell_size`: 1 - 10
12 | #' - `bare_nuclei`:  1 - 10
13 | #' - `bland_chromatin`: 1 - 10
14 | #' - `normal_nucleoli`: 1 - 10
15 | #' - `mitoses`: 1 - 10
16 | #' - `class`: 2 for benign, 4 for malignant
17 | #'
18 | #' @source
19 | #' Dr. William H. Wolberg - Physician
20 | #' University of Wisconsin Hospitals
21 | #' Madison, Wisconsin, USA
22 | #'
23 | #' @references
24 | #'
25 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data>
26 | #'
27 | #' @details
28 | #' This grouping information appears immediately below, having been removed from the data itself:
29 | #'
30 | #' \tabular{rrr}{
31 | #'  Group  \tab Instances  \tab Date of Collection\cr
32 | #'    1    \tab 367        \tab January 1989\cr
33 | #'    2    \tab 70         \tab October 1989\cr
34 | #'    3    \tab 31         \tab February 1990\cr
35 | #'    4    \tab 17         \tab April 1990\cr
36 | #'    5    \tab 48         \tab August 1990\cr
37 | #'    6    \tab 49         \tab Updated January 1991\cr
38 | #'    7    \tab 31         \tab June 1991\cr
39 | #'    8    \tab 86         \tab November 1991\cr
40 | #'   Total \tab 699 points \tab 15 July 1992
41 | #' }
42 | #'
43 | #' Note that the results summarized above in Past Usage refer to a dataset of
44 | #' size 369, while Group 1 has only 367 instances. This is because it
45 | #' originally contained 369 instances; 2 were removed.
46 | "bcw_original"
47 | 


--------------------------------------------------------------------------------
/man/bridges.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bridges_docs.R
 3 | \docType{data}
 4 | \name{bridges}
 5 | \alias{bridges}
 6 | \title{Pittsburgh Bridges Data Set}
 7 | \format{
 8 | A data frame with 108 observations on the following 13 variables.
 9 | \itemize{
10 | \item \code{identif}
11 | \itemize{
12 | \item identifier of the examples
13 | }
14 | \item \code{river}
15 | \itemize{
16 | \item A, M, O, Y
17 | }
18 | \item \code{location}
19 | \itemize{
20 | \item Location of Bridge
21 | }
22 | \item \code{erected}
23 | \itemize{
24 | \item Year built
25 | }
26 | \item \code{purpose}
27 | \itemize{
28 | \item WALK, AQUEDUCT, RR, HIGHWAY
29 | }
30 | \item \code{length}
31 | \itemize{
32 | \item 804 - 4558
33 | }
34 | \item \code{lanes}
35 | \itemize{
36 | \item 1, 2, 4, 6
37 | }
38 | \item \code{clear_g}
39 | \itemize{
40 | \item N, G
41 | }
42 | \item \code{t_or_d}
43 | \itemize{
44 | \item THROUGH, DECK
45 | }
46 | \item \code{material}
47 | \itemize{
48 | \item WOOD, IRON, STEEL
49 | }
50 | \item \code{span}
51 | \itemize{
52 | \item SHORT, MEDUIM, LONG
53 | }
54 | \item \code{rel_l}
55 | \itemize{
56 | \item S, S-F, F
57 | }
58 | \item \code{type}
59 | \itemize{
60 | \item WOOD, SUSPEN, SIMPLE-T, ARCH, CANTILEV, CONT-T
61 | }
62 | }
63 | }
64 | \source{
65 | Yoram Reich & Steven J. Fenves
66 | Department of Civil Engineering
67 | and
68 | Engineering Design Research Center
69 | Carnegie Mellon University
70 | Pittsburgh, PA 15213
71 | }
72 | \usage{
73 | bridges
74 | }
75 | \description{
76 | Data containing examples of Pittsburgh bridges and the relevant surrounding
77 | area.
78 | }
79 | \details{
80 | This data set is non-discretized, meaning the numeric properties
81 | were left intact.
82 | }
83 | \references{
84 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1}
85 | \url{https://archive.ics.uci.edu/ml/datasets/Pittsburgh+Bridges}
86 | }
87 | \keyword{datasets}
88 | 


--------------------------------------------------------------------------------
/man/wine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wine_docs.R
 3 | \docType{data}
 4 | \name{wine}
 5 | \alias{wine}
 6 | \title{Wine Data Set}
 7 | \format{
 8 | A data frame with 6497 observations (1599 Red and 4898 White) on the following 12 variables.
 9 | \itemize{
10 | \item fixed acidity
11 | \item volatile acidity
12 | \item citric acid
13 | \item residual sugar
14 | \item chlorides
15 | \item free sulfur dioxide
16 | \item total sulfur dioxide
17 | \item density
18 | \item pH
19 | \item sulphates
20 | \item alcohol
21 | \item quality
22 | \itemize{
23 | \item Score between 0 and 10 based on sensor reading
24 | }
25 | \item color
26 | \itemize{
27 | \item \code{"White"} or \code{"Red"}
28 | }
29 | }
30 | }
31 | \source{
32 | P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
33 | Modeling wine preferences by data mining from physicochemical properties.
34 | In Decision Support Systems, Elsevier, 47(4):547-553. ISSN: 0167-9236.
35 | }
36 | \usage{
37 | wine
38 | }
39 | \description{
40 | This data set is the combination of two datasets that were created, using red and white wine samples.
41 | The inputs include objective tests (e.g. PH values) and the output is based on sensory data
42 | (median of at least 3 evaluations made by wine experts). Each expert graded the wine quality
43 | between 0 (very bad) and 10 (very excellent). Several data mining methods were applied to model
44 | these datasets under a regression approach. The support vector machine model achieved the
45 | best results. Several metrics were computed: MAD, confusion matrix for a fixed error tolerance (T),
46 | etc. Also, we plot the relative importances of the input variables (as measured by a sensitivity
47 | analysis procedure).
48 | }
49 | \references{
50 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality.names}
51 | \url{https://archive.ics.uci.edu/ml/datasets/Wine+Quality}
52 | }
53 | \keyword{datasets}
54 | 


--------------------------------------------------------------------------------
/R/abalone_docs.R:
--------------------------------------------------------------------------------
 1 | #' Abalone Data Set
 2 | #'
 3 | #' Predicting the age of abalone from physical measurements. The age of abalone
 4 | #' is determined by cutting the shell through the cone, staining it, and
 5 | #' counting the number of rings through a microscope -- a boring and
 6 | #' time-consuming task. Other measurements, which are easier to obtain, are
 7 | #' used to predict the age. Further information, such as weather patterns and
 8 | #' location (hence food availability) may be required to solve the problem.
 9 | #'
10 | #' @format A data frame with 4177 observations on the following 9 variables.
11 | #' - `sex`: Factor
12 | #'    - `M` (Male), `F` (Female), and `I` (Infant)
13 | #' - `length`: Numeric
14 | #'    - Longest shell measurement (mm)
15 | #' - `diameter`: Numeric
16 | #'    - Perpendicular to length (mm)
17 | #' - `height`: Numeric
18 | #'    - With meat in shell (mm)
19 | #' - `whole_weight`: Numeric
20 | #'    - Whole abalone weight (grams)
21 | #' - `shucked_weight`: Numeric
22 | #'    - Weight of meat (grams)
23 | #' - `viscera_weight`: Numeric
24 | #'    - Gut weight after bleeding (grams)
25 | #' - `shell_weight`: Numeric
26 | #'    - Shell weight after being dried (grams)
27 | #' - `rings`: Integer
28 | #'    - Adding 1.5 gives the age in years
29 | #' @references
30 | #' Warwick J Nash, Tracy L Sellers, Simon R Talbot, Andrew J Cawthorn and Wes B Ford (1994)
31 | #' "The Population Biology of Abalone (_Haliotis_ species) in Tasmania. I. Blacklip Abalone (_H. rubra_) from the North Coast and Islands of Bass Strait",
32 | #' Sea Fisheries Division, Technical Report No. 48 (ISSN 1034-3288)
33 | #'
34 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/>
35 | #'
36 | #' <https://archive.ics.uci.edu/ml/datasets/abalone>
37 | #' @source
38 | #' Marine Resources Division
39 | #' Marine Research Laboratories - Taroona
40 | #' Department of Primary Industry and Fisheries, Tasmania
41 | #' GPO Box 619F, Hobart, Tasmania 7001, Australia
42 | #' (contact: Warwick Nash +61 02 277277, wnash '@' dpi.tas.gov.au)
43 | "abalone"
44 | 


--------------------------------------------------------------------------------
/R/forest_fires_docs.R:
--------------------------------------------------------------------------------
 1 | #' Forest Fires Data Set
 2 | #'
 3 | #' The aim is to predict the burned area of forest fires, in the northeast
 4 | #' region of Portugal, by using meteorological and other data
 5 | #'
 6 | #' @format A data frame with 517 observations on the following 13 variables.
 7 | #' - `X`
 8 | #'     - x-axis spatial coordinate within the Montesinho park map: 1 to 9
 9 | #' - `Y`
10 | #'     - y-axis spatial coordinate within the Montesinho park map: 2 to 9
11 | #' - `month`
12 | #'     - month of the year: "jan" to "dec"
13 | #' - `day`
14 | #'     - day of the week: "mon" to "sun"
15 | #' - `FFMC`
16 | #'     - FFMC index from the FWI system: 18.7 to 96.20
17 | #' - `DMC`
18 | #'     - DMC index from the FWI system: 1.1 to 291.3
19 | #' - `DC`
20 | #'     - DC index from the FWI system: 7.9 to 860.6
21 | #' - `ISI`
22 | #'     - ISI index from the FWI system: 0.0 to 56.10
23 | #' - `temp`
24 | #'     - temperature in Celsius degrees: 2.2 to 33.30
25 | #' - `RH`
26 | #'     - relative humidity in %: 15.0 to 100
27 | #' - `wind`
28 | #'     - wind speed in km/h: 0.40 to 9.40
29 | #' - `rain`
30 | #'     - outside rain in mm/m2 : 0.0 to 6.4
31 | #' - `area`
32 | #'     - the burned area of the forest (in ha): 0.00 to 1090.84#'
33 | #' @source
34 | #' Paulo Cortez, pcortez '@' dsi.uminho.pt, Department of Information Systems, University of Minho, Portugal.
35 | #' Aníbal Morais, araimorais '@' gmail.com, Department of Information Systems, University of Minho, Portugal.
36 | #' @references
37 | #' [ P. Cortez and A. Morais. A Data Mining Approach to Predict Forest Fires using Meteorological Data. In J. Neves, M. F. Santos and J. Machado Eds., New Trends in Artificial Intelligence, Proceedings of the 13th EPIA 2007 - Portuguese Conference on Artificial Intelligence, December, Guimarães, Portugal, pp. 512-523, 2007. APPIA, ISBN-13 978-989-95618-0-9](http://www.dsi.uminho.pt/~pcortez/fires.pdf)
38 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv>
39 | #' <https://archive.ics.uci.edu/ml/datasets/Forest+Fires>
40 | "forest_fires"
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/man/bcw_original.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcw_original_docs.R
 3 | \docType{data}
 4 | \name{bcw_original}
 5 | \alias{bcw_original}
 6 | \title{Breast Cancer Wisconsin (Original) Data Set}
 7 | \format{
 8 | A data frame with 699 observations on the following 11 variables.
 9 | \itemize{
10 | \item \code{sample_code_number}: id number
11 | \item \code{clump_thickness}: 1 - 10
12 | \item \code{uniformity_of_cell_size}:  1 - 10
13 | \item \code{uniformity_of_cell_shape}: 1 - 10
14 | \item \code{single_epithelial_cell_size}: 1 - 10
15 | \item \code{bare_nuclei}:  1 - 10
16 | \item \code{bland_chromatin}: 1 - 10
17 | \item \code{normal_nucleoli}: 1 - 10
18 | \item \code{mitoses}: 1 - 10
19 | \item \code{class}: 2 for benign, 4 for malignant
20 | }
21 | }
22 | \source{
23 | Dr. William H. Wolberg - Physician
24 | University of Wisconsin Hospitals
25 | Madison, Wisconsin, USA
26 | }
27 | \usage{
28 | bcw_original
29 | }
30 | \description{
31 | Samples arrive periodically as Dr. Wolberg reports his clinical cases.
32 | The database therefore reflects this chronological grouping of the data.
33 | }
34 | \details{
35 | This grouping information appears immediately below, having been removed from the data itself:
36 | 
37 | \tabular{rrr}{
38 | Group  \tab Instances  \tab Date of Collection\cr
39 | 1    \tab 367        \tab January 1989\cr
40 | 2    \tab 70         \tab October 1989\cr
41 | 3    \tab 31         \tab February 1990\cr
42 | 4    \tab 17         \tab April 1990\cr
43 | 5    \tab 48         \tab August 1990\cr
44 | 6    \tab 49         \tab Updated January 1991\cr
45 | 7    \tab 31         \tab June 1991\cr
46 | 8    \tab 86         \tab November 1991\cr
47 | Total \tab 699 points \tab 15 July 1992
48 | }
49 | 
50 | Note that the results summarized above in Past Usage refer to a dataset of
51 | size 369, while Group 1 has only 367 instances. This is because it
52 | originally contained 369 instances; 2 were removed.
53 | }
54 | \references{
55 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data}
56 | }
57 | \keyword{datasets}
58 | 


--------------------------------------------------------------------------------
/man/hepatitis.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hepatitis_docs.R
  3 | \docType{data}
  4 | \name{hepatitis}
  5 | \alias{hepatitis}
  6 | \title{Hepatitis Data Set}
  7 | \format{
  8 | A data frame with 6497 observations (1599 Red and 4898 White) on the following 12 variables.
  9 | \itemize{
 10 | \item \code{class}
 11 | \itemize{
 12 | \item Die or Live
 13 | }
 14 | \item \code{age}
 15 | \itemize{
 16 | \item Integer
 17 | }
 18 | \item \code{sex}
 19 | \itemize{
 20 | \item Male, Female
 21 | }
 22 | \item \code{steroid}
 23 | \itemize{
 24 | \item No, Yes
 25 | }
 26 | \item \code{antivirals}
 27 | \itemize{
 28 | \item No, Yes
 29 | }
 30 | \item \code{fatigue}
 31 | \itemize{
 32 | \item No, Yes
 33 | }
 34 | \item \code{malaise}
 35 | \itemize{
 36 | \item No, Yes
 37 | }
 38 | \item \code{anorexia}
 39 | \itemize{
 40 | \item No, Yes
 41 | }
 42 | \item \code{liver_big}
 43 | \itemize{
 44 | \item No, Yes
 45 | }
 46 | \item \code{liver_firm}
 47 | \itemize{
 48 | \item No, Yes
 49 | }
 50 | \item \code{spleen_palpable}
 51 | \itemize{
 52 | \item No, Yes
 53 | }
 54 | \item \code{spiders}
 55 | \itemize{
 56 | \item No, Yes
 57 | }
 58 | \item \code{ascites}
 59 | \itemize{
 60 | \item No, Yes
 61 | }
 62 | \item \code{varices}
 63 | \itemize{
 64 | \item No, Yes
 65 | }
 66 | \item \code{bilirubin}
 67 | \itemize{
 68 | \item Numeric
 69 | \item This can also be treated as a factor
 70 | }
 71 | \item \code{alk_phosphate}
 72 | \itemize{
 73 | \item Integer
 74 | }
 75 | \item \code{sgot}
 76 | \itemize{
 77 | \item Integer
 78 | }
 79 | \item \code{albumin}
 80 | \itemize{
 81 | \item Numeric
 82 | }
 83 | \item \code{protime}
 84 | \itemize{
 85 | \item Integer
 86 | }
 87 | \item \code{histology}
 88 | \itemize{
 89 | \item No, Yes
 90 | }
 91 | }
 92 | }
 93 | \source{
 94 | G.Gong (Carnegie-Mellon University) via
 95 | Bojan Cestnik
 96 | Jozef Stefan Institute
 97 | Jamova 39
 98 | 61000 Ljubljana
 99 | Yugoslavia (tel.: (38)(+61) 214-399 ext.287)
100 | }
101 | \usage{
102 | hepatitis
103 | }
104 | \description{
105 | This data set contains information on folks that suffer from hepatitis.
106 | }
107 | \references{
108 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis/hepatitis.data}
109 | \url{https://archive.ics.uci.edu/ml/datasets/hepatitis}
110 | }
111 | \keyword{datasets}
112 | 


--------------------------------------------------------------------------------
/man/abalone.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abalone_docs.R
 3 | \docType{data}
 4 | \name{abalone}
 5 | \alias{abalone}
 6 | \title{Abalone Data Set}
 7 | \format{
 8 | A data frame with 4177 observations on the following 9 variables.
 9 | \itemize{
10 | \item \code{sex}: Factor
11 | \itemize{
12 | \item \code{M} (Male), \code{F} (Female), and \code{I} (Infant)
13 | }
14 | \item \code{length}: Numeric
15 | \itemize{
16 | \item Longest shell measurement (mm)
17 | }
18 | \item \code{diameter}: Numeric
19 | \itemize{
20 | \item Perpendicular to length (mm)
21 | }
22 | \item \code{height}: Numeric
23 | \itemize{
24 | \item With meat in shell (mm)
25 | }
26 | \item \code{whole_weight}: Numeric
27 | \itemize{
28 | \item Whole abalone weight (grams)
29 | }
30 | \item \code{shucked_weight}: Numeric
31 | \itemize{
32 | \item Weight of meat (grams)
33 | }
34 | \item \code{viscera_weight}: Numeric
35 | \itemize{
36 | \item Gut weight after bleeding (grams)
37 | }
38 | \item \code{shell_weight}: Numeric
39 | \itemize{
40 | \item Shell weight after being dried (grams)
41 | }
42 | \item \code{rings}: Integer
43 | \itemize{
44 | \item Adding 1.5 gives the age in years
45 | }
46 | }
47 | }
48 | \source{
49 | Marine Resources Division
50 | Marine Research Laboratories - Taroona
51 | Department of Primary Industry and Fisheries, Tasmania
52 | GPO Box 619F, Hobart, Tasmania 7001, Australia
53 | (contact: Warwick Nash +61 02 277277, wnash '@' dpi.tas.gov.au)
54 | }
55 | \usage{
56 | abalone
57 | }
58 | \description{
59 | Predicting the age of abalone from physical measurements. The age of abalone
60 | is determined by cutting the shell through the cone, staining it, and
61 | counting the number of rings through a microscope -- a boring and
62 | time-consuming task. Other measurements, which are easier to obtain, are
63 | used to predict the age. Further information, such as weather patterns and
64 | location (hence food availability) may be required to solve the problem.
65 | }
66 | \references{
67 | Warwick J Nash, Tracy L Sellers, Simon R Talbot, Andrew J Cawthorn and Wes B Ford (1994)
68 | "The Population Biology of Abalone (\emph{Haliotis} species) in Tasmania. I. Blacklip Abalone (\emph{H. rubra}) from the North Coast and Islands of Bass Strait",
69 | Sea Fisheries Division, Technical Report No. 48 (ISSN 1034-3288)
70 | 
71 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/}
72 | 
73 | \url{https://archive.ics.uci.edu/ml/datasets/abalone}
74 | }
75 | \keyword{datasets}
76 | 


--------------------------------------------------------------------------------
/data-raw/bike_sharing_daily_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## Bike Sharing (Daily) Data https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset
 3 | 
 4 | # Download the zip file
 5 | download.file("http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip",
 6 |               "data-raw/Bike-Sharing-Dataset.zip")
 7 | 
 8 | # Unzip and load bike sharing data into R
 9 | # Note, data has a header in it!
10 | bike_sharing_daily = read.csv(
11 |   unz("data-raw/Bike-Sharing-Dataset.zip", "day.csv"),
12 |   header = TRUE,
13 |   colClasses = c(
14 |     "character",  # instant
15 |     "Date",       # dteday
16 |     "factor",     # season
17 |     "factor",     # yr
18 |     "factor",     # mnth
19 |     "factor",     # holiday
20 |     "factor",     # weekday
21 |     "factor",     # workingday
22 |     "factor",     # weathersit
23 |     "numeric",    # temp
24 |     "numeric",    # atemp
25 |     "numeric",    # hum
26 |     "numeric",    # windspeed
27 |     "integer",    # casual
28 |     "integer",    # registered
29 |     "integer"     # cnt
30 |   )
31 | )
32 | 
33 | # Improve factor labels
34 | bike_sharing_daily = within(bike_sharing_daily, {
35 |      levels(season)     = c("Winter", "Spring", "Summer", "Fall")
36 |      levels(yr)         = c(2011, 2012)
37 |      mnth               = ordered(mnth, 1:12) # Order temporally
38 |      levels(mnth)       = c(month.abb)
39 |      levels(holiday)    = c("No", "Yes")
40 |      levels(weekday)    = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")
41 |      levels(workingday) = c("No", "Yes")
42 |      levels(weathersit) = c("Clear, Few clouds, Partly cloudy, Partly cloudy",
43 |                             "Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist",
44 |                             "Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds",
45 |                             "Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog")
46 | })
47 | 
48 | ## Add in normalized variables
49 | # bike_sharing_daily = within(bike_sharing_daily, {
50 | #   actual_temp_celsius = denormalize_temp(temp, -8, 39)    # Not sure if accurate
51 | #   actual_atemp_celsius = denormalize_temp(atemp, -16, 50) # Not sure if accurate
52 | #   actual_hum = hum * 100
53 | #   actual_windspeed = windspeed * 67
54 | # })
55 | 
56 | # Write the bike_sharing_daily dataset
57 | usethis::use_data(bike_sharing_daily, overwrite = TRUE)
58 | 
59 | # Remove the zip + csv after read in.
60 | file.remove("data-raw/Bike-Sharing-Dataset.zip")
61 | 


--------------------------------------------------------------------------------
/data-raw/heart_disease_build.R:
--------------------------------------------------------------------------------
 1 | ### UCI Irvine
 2 | ## Heart Disease Data https://archive.ics.uci.edu/ml/datasets/Heart+Disease
 3 | 
 4 | # Named entries correspond to suffix of exported data set
 5 | 
 6 | heart_disease_locs = c(
 7 |   "cl" = "cleveland",
 8 |   "hu" = "hungarian",
 9 |   "ch" = "switzerland",
10 |   "va" = "va"
11 | )
12 | 
13 | # Data names
14 | heart_disease_names = paste0("heart_disease_", names(heart_disease_locs))
15 | 
16 | read_heart_disease_data = function(loc, url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.") {
17 |   read.csv(
18 |     paste0(url, loc, ".data"),
19 |     header = FALSE,
20 |     sep = ",",
21 |     na.strings = "?"
22 |   )
23 | }
24 | 
25 | 
26 | cast_heart_disease_data = function(data) {
27 |   names(data) = c(
28 |     "age",
29 |     "sex",
30 |     "cp",
31 |     "trestbps",
32 |     "chol",
33 |     "fbs",
34 |     "restecg",
35 |     "thalach",
36 |     "exang",
37 |     "oldpeak",
38 |     "slope",
39 |     "ca",
40 |     "thal",
41 |     "num"
42 |   )
43 | 
44 |   data = within(data, {
45 |     sex = factor(sex, labels = c("Female", "Male"))
46 |     cp  = factor(
47 |       cp,
48 |       labels = c(
49 |         "typical angina",
50 |         "atypical angina",
51 |         "non-anginal pain",
52 |         "asymptomatic"
53 |       )
54 |     )
55 |     restecg = factor(
56 |       restecg,
57 |       labels = c(
58 |         "normal",
59 |         "ST-T wave abnormality",
60 |         "probable/definite hypertrophy"
61 |       )
62 |     )
63 |     exang = factor(exang, labels = c("No", "Yes"))
64 |     slope = factor(slope, labels = c("upsloping",
65 |                                      "flat",
66 |                                      "downsloping"))
67 |     thal = factor(thal,
68 |                   labels = c("normal",
69 |                              "fixed defect",
70 |                              "reversable defect"))
71 |   })
72 | 
73 |   data
74 | }
75 | 
76 | 
77 | heart_disease_data = lapply(heart_disease_locs, read_heart_disease_data)
78 | heart_disease_data = lapply(heart_disease_data, cast_heart_disease_data)
79 | 
80 | names(heart_disease_data) = heart_disease_names
81 | 
82 | # Convert to global environment
83 | # See https://stackoverflow.com/questions/30516325/converting-a-list-of-data-frames-into-individual-data-frames-in-r
84 | list2env(heart_disease_data, envir = .GlobalEnv)
85 | 
86 | # Poor man's devtools::use_data
87 | sapply(heart_disease_names, FUN = function(ds_name) {
88 |   save(list = ds_name,
89 |        file = paste0("data/", ds_name, ".rda"))
90 | })
91 | 


--------------------------------------------------------------------------------
/man/forest_fires.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forest_fires_docs.R
 3 | \docType{data}
 4 | \name{forest_fires}
 5 | \alias{forest_fires}
 6 | \title{Forest Fires Data Set}
 7 | \format{
 8 | A data frame with 517 observations on the following 13 variables.
 9 | \itemize{
10 | \item \code{X}
11 | \itemize{
12 | \item x-axis spatial coordinate within the Montesinho park map: 1 to 9
13 | }
14 | \item \code{Y}
15 | \itemize{
16 | \item y-axis spatial coordinate within the Montesinho park map: 2 to 9
17 | }
18 | \item \code{month}
19 | \itemize{
20 | \item month of the year: "jan" to "dec"
21 | }
22 | \item \code{day}
23 | \itemize{
24 | \item day of the week: "mon" to "sun"
25 | }
26 | \item \code{FFMC}
27 | \itemize{
28 | \item FFMC index from the FWI system: 18.7 to 96.20
29 | }
30 | \item \code{DMC}
31 | \itemize{
32 | \item DMC index from the FWI system: 1.1 to 291.3
33 | }
34 | \item \code{DC}
35 | \itemize{
36 | \item DC index from the FWI system: 7.9 to 860.6
37 | }
38 | \item \code{ISI}
39 | \itemize{
40 | \item ISI index from the FWI system: 0.0 to 56.10
41 | }
42 | \item \code{temp}
43 | \itemize{
44 | \item temperature in Celsius degrees: 2.2 to 33.30
45 | }
46 | \item \code{RH}
47 | \itemize{
48 | \item relative humidity in \%: 15.0 to 100
49 | }
50 | \item \code{wind}
51 | \itemize{
52 | \item wind speed in km/h: 0.40 to 9.40
53 | }
54 | \item \code{rain}
55 | \itemize{
56 | \item outside rain in mm/m2 : 0.0 to 6.4
57 | }
58 | \item \code{area}
59 | \itemize{
60 | \item the burned area of the forest (in ha): 0.00 to 1090.84#'
61 | }
62 | }
63 | }
64 | \source{
65 | Paulo Cortez, pcortez '@' dsi.uminho.pt, Department of Information Systems, University of Minho, Portugal.
66 | Aníbal Morais, araimorais '@' gmail.com, Department of Information Systems, University of Minho, Portugal.
67 | }
68 | \usage{
69 | forest_fires
70 | }
71 | \description{
72 | The aim is to predict the burned area of forest fires, in the northeast
73 | region of Portugal, by using meteorological and other data
74 | }
75 | \references{
76 | \href{http://www.dsi.uminho.pt/~pcortez/fires.pdf}{ P. Cortez and A. Morais. A Data Mining Approach to Predict Forest Fires using Meteorological Data. In J. Neves, M. F. Santos and J. Machado Eds., New Trends in Artificial Intelligence, Proceedings of the 13th EPIA 2007 - Portuguese Conference on Artificial Intelligence, December, Guimarães, Portugal, pp. 512-523, 2007. APPIA, ISBN-13 978-989-95618-0-9}
77 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv}
78 | \url{https://archive.ics.uci.edu/ml/datasets/Forest+Fires}
79 | }
80 | \keyword{datasets}
81 | 


--------------------------------------------------------------------------------
/R/adult_docs.R:
--------------------------------------------------------------------------------
 1 | #' Adult Data Set
 2 | #'
 3 | #' Extraction was done by Barry Becker from the 1994 Census database.
 4 | #' A set of reasonably clean records was extracted using the following
 5 | #' conditions: ((AAGE > 16) && (AGI > 100) && (AFNLWGT > 1) && (HRSWK > 0))
 6 | #'
 7 | #' @format A data frame with 32561 observations on the following 15 variables.
 8 | #' - `age`: Integer
 9 | #'    - Number of years alive
10 | #' - `workclass`: Factor
11 | #'    - Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov,
12 | #'      State-gov, Without-pay, Never-worked.
13 | #' - `fnlwgt`: Numeric
14 | #'    - The variable represents the Final Weight, which is more so a sampling weight.
15 | #'      See the names file listed in references for more details.
16 | #' - `education`: Factor
17 | #'    - Highest level of education attained
18 | #'    - Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm,
19 | #'      Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate,
20 | #'      5th-6th, Preschool.
21 | #' - `education_num`: Numeric
22 | #'    - Number of years of education
23 | #' - `marital_status`: Factor
24 | #'    - Married-civ-spouse, Divorced, Never-married, Separated, Widowed,
25 | #'      Married-spouse-absent, Married-AF-spouse
26 | #' - `occupation`: Factor
27 | #'    - Tech-support, Craft-repair, Other-service, Sales, Exec-managerial,
28 | #'      Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical,
29 | #'      Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv,
30 | #'      Armed-Forces.
31 | #' - `relationship`: Factor
32 | #'    - Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.
33 | #' - `race`: Factor
34 | #'    - White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.
35 | #' - `sex`: Factor
36 | #'    - Female, Male
37 | #' - `capital_gain`: Integer
38 | #'    - Income from investment sources, apart from wages/salary
39 | #' - `capital_loss`: Integer
40 | #'    - Losses from investment sources, apart from wages/salary
41 | #' - `hours_per_week`: Integer
42 | #'    - Amount of hours worked per week
43 | #' - `native_country`: Factor
44 | #'    - Country of origin
45 | #'    - United-States, Cambodia, England, Puerto-Rico, Canada, Germany,
46 | #'      Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba,
47 | #'      Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico,
48 | #'      Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan,
49 | #'      Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand,
50 | #'      Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands.
51 | #' - `income`: Factor
52 | #'    - Whether the income greater than $50,000 or not.
53 | #'    - <=50K, >50K
54 | #' @details
55 | #' Prediction task is to determine whether a person makes over 50K a year.
56 | #' @references
57 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/adult/>
58 | #'
59 | #' <http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names>
60 | #'
61 | #' <https://archive.ics.uci.edu/ml/datasets/adult>
62 | #' @source
63 | #' Ronny Kohavi and Barry Becker
64 | #' Data Mining and Visualization
65 | #' Silicon Graphics.
66 | #' e-mail: ronnyk '@' live.com for questions.
67 | "adult"
68 | 


--------------------------------------------------------------------------------
/R/heart_disease_processed_docs.R:
--------------------------------------------------------------------------------
 1 | #' Heart Disease Processed Data Sets
 2 | #'
 3 | #' Detecting the presence of heart disease in patients.
 4 | #'
 5 | #' @format Four `data.frames` with a varying number of observations that contain
 6 | #'  the following 14 variables.
 7 | #' - `age`: age in years
 8 | #' - `sex`: sex (1 = male; 0 = female)
 9 | #' - `cp`: chest pain type
10 | #'     - Value 1: typical angina
11 | #'     - Value 2: atypical angina
12 | #'     - Value 3: non-anginal pain
13 | #'     - Value 4: asymptomatic
14 | #' - `trestbps`: resting blood pressure (in mm Hg on admission to the hospital)
15 | #' - `chol`: serum cholestoral in mg/dl
16 | #' - `fbs`: fasting blood sugar > 120 mg/dl (1 = true; 0 = false)
17 | #' - `restecg`: resting electrocardiographic results
18 | #'     - Value 0: normal
19 | #'     - Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
20 | #'     - Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
21 | #' - `thalach`: maximum heart rate achieved
22 | #' - `exang`: exercise induced angina (1 = yes; 0 = no)
23 | #' - `oldpeak`: ST depression induced by exercise relative to rest
24 | #' - `slope`: the slope of the peak exercise ST segment
25 | #'     - Value 1: upsloping
26 | #'     - Value 2: flat
27 | #'     - Value 3: downsloping
28 | #' - `ca`: number of major vessels (0-3) colored by flourosopy
29 | #' - `thal`: See below
30 | #'     - Value 3: normal
31 | #'     - Value 6: fixed defect
32 | #'     - Value 7: reversable defect
33 | #' - `num`: diagnosis of heart disease (angiographic disease status)
34 | #'     - Value 0: < 50% diameter narrowing
35 | #'     - Value 1: > 50% diameter narrowing
36 | #'
37 | #' @rdname heart_disease
38 | #' @source
39 | #' 1. Hungarian Institute of Cardiology. Budapest: Andras Janosi, M.D.
40 | #' 2. University Hospital, Zurich, Switzerland: William Steinbrunn, M.D.
41 | #' 3. University Hospital, Basel, Switzerland: Matthias Pfisterer, M.D.
42 | #' 4. V.A. Medical Center, Long Beach and Cleveland Clinic Foundation: Robert Detrano, M.D., Ph.D.
43 | #'
44 | #' @references
45 | #'
46 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data>
47 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data>
48 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.switzerland.data>
49 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.va.data>
50 | #' <https://archive.ics.uci.edu/ml/datasets/Heart+Disease>
51 | #'
52 | #' @details
53 | #' The data was collected from the four following locations:
54 | #'
55 | #' 1. `heart_disease_cl`: Cleveland Clinic Foundation
56 | #' 2. `heart_disease_hu`: Hungarian Institute of Cardiology, Budapest
57 | #' 3. `heart_disease_va`: V.A. Medical Center, Long Beach, CA
58 | #' 4. `heart_disease_ch`: University Hospital, Zurich, Switzerland
59 | #'
60 | #' \tabular{rrr}{
61 | #'  Database        \tab Instances  \cr
62 | #'    Cleveland     \tab 303        \cr
63 | #'    Hungarian     \tab 294        \cr
64 | #'    Switzerland   \tab 123        \cr
65 | #'    Long Beach VA \tab 200        \cr
66 | #' }
67 | #'
68 | "heart_disease_cl"
69 | 
70 | #' @rdname heart_disease
71 | "heart_disease_hu"
72 | 
73 | #' @rdname heart_disease
74 | "heart_disease_va"
75 | 
76 | #' @rdname heart_disease
77 | "heart_disease_ch"
78 | 


--------------------------------------------------------------------------------
/R/autoimports_docs.R:
--------------------------------------------------------------------------------
 1 | #' Autoimports Dataset
 2 | #'
 3 | #' This data set consists of three types of entities:
 4 | #' (a) the specification of an auto in terms of various characteristics,
 5 | #' (b) its assigned insurance risk rating,
 6 | #' (c) its normalized losses in use as compared to other cars.
 7 | #' The second rating corresponds to the degree to which the auto is more risky
 8 | #' than its price indicates. Cars are initially assigned a risk factor symbol
 9 | #' associated with its price. Then, if it is more risky (or less), this symbol
10 | #' is adjusted by moving it up (or down) the scale. Actuarians call this
11 | #' process "symboling". A value of +3 indicates that the auto is risky, -3
12 | #' that it is probably pretty safe. The third factor is the relative average
13 | #' loss payment per insured vehicle year. This value is normalized for all autos
14 | #' within a particular size classification (two-door small, station wagons,
15 | #' sports/speciality, etc...), and represents the average loss per car per year.
16 | #'
17 | #' @format A data frame with 205 observations on the following 26 variables.
18 | #' - `symboling`:
19 | #'     - -3, -2, -1, 0, 1, 2, 3.
20 | #' - `normalized_losses`:
21 | #'     - continuous from 65 to 256.
22 | #' - `make`:
23 | #'     - alfa-romero, audi, bmw, chevrolet, dodge, honda, isuzu,
24 | #'       jaguar, mazda, mercedes-benz, mercury, mitsubishi, nissan,
25 | #'       peugot, plymouth, porsche, renault, saab, subaru, toyota, volkswagen,
26 | #'       volvo
27 | #' - `fuel_type`:
28 | #'     - diesel, gas.
29 | #' - `aspiration`:
30 | #'     - std, turbo.
31 | #' - `num_of_doors`:
32 | #'     - four, two.
33 | #' - `body_style`:
34 | #'     - hardtop, wagon, sedan, hatchback, convertible.
35 | #' - `drive_wheels`:
36 | #'     - 4wd, fwd, rwd.
37 | #' - `engine_location`:
38 | #'     - front, rear.
39 | #' - `wheel_base`:
40 | #'     - continuous from 86.6 120.9.
41 | #' - `length`:
42 | #'     - continuous from 141.1 to 208.1.
43 | #' - `width`:
44 | #'     - continuous from 60.3 to 72.3.
45 | #' - `height`:
46 | #'     - continuous from 47.8 to 59.8.
47 | #' - `curb_weight`:
48 | #'     - continuous from 1488 to 4066.
49 | #' - `engine_type`:
50 | #'     - dohc, dohcv, l, ohc, ohcf, ohcv, rotor.
51 | #' - `num_of_cylinders`:
52 | #'     - eight, five, four, six, three, twelve, two.
53 | #' - `engine_size`:
54 | #'     - continuous from 61 to 326.
55 | #' - `fuel_system`:
56 | #'     - 1bbl, 2bbl, 4bbl, idi, mfi, mpfi, spdi, spfi.
57 | #' - `bore`:
58 | #'     - continuous from 2.54 to 3.94.
59 | #' - `stroke`:
60 | #'     - continuous from 2.07 to 4.17.
61 | #' - `compression_ratio`:
62 | #'     - continuous from 7 to 23.
63 | #' - `horsepower`:
64 | #'     - continuous from 48 to 288.
65 | #' - `peak_rpm`:
66 | #'     - continuous from 4150 to 6600.
67 | #' - `city_mpg`:
68 | #'     - continuous from 13 to 49.
69 | #' - `highway_mpg`:
70 | #'     - continuous from 16 to 54.
71 | #' - `price`:
72 | #'     - continuous from 5118 to 45400.
73 | #'
74 | #' @source
75 | #'
76 | #' Donor: Jeffrey C. Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)
77 | #'
78 | #' 1985 Model Import Car and Truck Specifications, 1985 Ward's Automotive Yearbook.
79 | #'
80 | #' Personal Auto Manuals, Insurance Services Office, 160 Water Street, New York, NY 10038
81 | #'
82 | #' Insurance Collision Report, Insurance Institute for Highway Safety, Watergate 600, Washington, DC 20037
83 | #'
84 | #' @references
85 | #' <https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data>
86 | #' <https://archive.ics.uci.edu/ml/datasets/Automobile>
87 | "autoimports"
88 | 


--------------------------------------------------------------------------------
/man/adult.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/adult_docs.R
  3 | \docType{data}
  4 | \name{adult}
  5 | \alias{adult}
  6 | \title{Adult Data Set}
  7 | \format{
  8 | A data frame with 32561 observations on the following 15 variables.
  9 | \itemize{
 10 | \item \code{age}: Integer
 11 | \itemize{
 12 | \item Number of years alive
 13 | }
 14 | \item \code{workclass}: Factor
 15 | \itemize{
 16 | \item Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov,
 17 | State-gov, Without-pay, Never-worked.
 18 | }
 19 | \item \code{fnlwgt}: Numeric
 20 | \itemize{
 21 | \item The variable represents the Final Weight, which is more so a sampling weight.
 22 | See the names file listed in references for more details.
 23 | }
 24 | \item \code{education}: Factor
 25 | \itemize{
 26 | \item Highest level of education attained
 27 | \item Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm,
 28 | Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate,
 29 | 5th-6th, Preschool.
 30 | }
 31 | \item \code{education_num}: Numeric
 32 | \itemize{
 33 | \item Number of years of education
 34 | }
 35 | \item \code{marital_status}: Factor
 36 | \itemize{
 37 | \item Married-civ-spouse, Divorced, Never-married, Separated, Widowed,
 38 | Married-spouse-absent, Married-AF-spouse
 39 | }
 40 | \item \code{occupation}: Factor
 41 | \itemize{
 42 | \item Tech-support, Craft-repair, Other-service, Sales, Exec-managerial,
 43 | Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical,
 44 | Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv,
 45 | Armed-Forces.
 46 | }
 47 | \item \code{relationship}: Factor
 48 | \itemize{
 49 | \item Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.
 50 | }
 51 | \item \code{race}: Factor
 52 | \itemize{
 53 | \item White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.
 54 | }
 55 | \item \code{sex}: Factor
 56 | \itemize{
 57 | \item Female, Male
 58 | }
 59 | \item \code{capital_gain}: Integer
 60 | \itemize{
 61 | \item Income from investment sources, apart from wages/salary
 62 | }
 63 | \item \code{capital_loss}: Integer
 64 | \itemize{
 65 | \item Losses from investment sources, apart from wages/salary
 66 | }
 67 | \item \code{hours_per_week}: Integer
 68 | \itemize{
 69 | \item Amount of hours worked per week
 70 | }
 71 | \item \code{native_country}: Factor
 72 | \itemize{
 73 | \item Country of origin
 74 | \item United-States, Cambodia, England, Puerto-Rico, Canada, Germany,
 75 | Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba,
 76 | Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico,
 77 | Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan,
 78 | Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand,
 79 | Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands.
 80 | }
 81 | \item \code{income}: Factor
 82 | \itemize{
 83 | \item Whether the income greater than $50,000 or not.
 84 | \item <=50K, >50K
 85 | }
 86 | }
 87 | }
 88 | \source{
 89 | Ronny Kohavi and Barry Becker
 90 | Data Mining and Visualization
 91 | Silicon Graphics.
 92 | e-mail: ronnyk '@' live.com for questions.
 93 | }
 94 | \usage{
 95 | adult
 96 | }
 97 | \description{
 98 | Extraction was done by Barry Becker from the 1994 Census database.
 99 | A set of reasonably clean records was extracted using the following
100 | conditions: ((AAGE > 16) && (AGI > 100) && (AFNLWGT > 1) && (HRSWK > 0))
101 | }
102 | \details{
103 | Prediction task is to determine whether a person makes over 50K a year.
104 | }
105 | \references{
106 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/adult/}
107 | 
108 | \url{http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names}
109 | 
110 | \url{https://archive.ics.uci.edu/ml/datasets/adult}
111 | }
112 | \keyword{datasets}
113 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # `ucidata` - Data Sets from UC Irvine’s ML Library
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![R-CMD-check](https://github.com/coatless-rpkg/ucidata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/coatless-rpkg/ucidata/actions/workflows/R-CMD-check.yaml)
  9 | <!-- badges: end -->
 10 | 
 11 | The following is an *R* data package that features certain data sets
 12 | from the [Machine Learning Library at UC
 13 | Irvine](https://archive.ics.uci.edu/ml/). These data sets have been
 14 | cleaned up and provide documentation via *R*’s help system.
 15 | 
 16 | > \[!NOTE\]
 17 | >
 18 | > Want to easily access data sets not included in this package?
 19 | >
 20 | > Check out the
 21 | > [`{ucimlrepo}`](https://github.com/coatless-rpkg/ucimlrepo) R package!
 22 | > The package provides an interface to download and automatically load
 23 | > data sets from the UC Irvine Machine Learning Repository.
 24 | 
 25 | ## Installation
 26 | 
 27 | You can install `ucidata` from github with:
 28 | 
 29 | ``` r
 30 | # install.packages("remotes")
 31 | remotes::install_github("coatless-rpkg/ucidata")
 32 | ```
 33 | 
 34 | ## Using data in the package
 35 | 
 36 | There are two ways to access the data contained within this package.
 37 | 
 38 | The first is to load the package itself and type the name of a data set.
 39 | This approach takes advantage of *R*’s lazy loading mechansim, which
 40 | avoids loading the data until it is used in *R* session. For details on
 41 | how lazy loading works, please see [Section 1.17: Lazy
 42 | Loading](https://cran.r-project.org/doc/manuals/r-release/R-ints.html#Lazy-loading)
 43 | of the [R
 44 | Internals](https://cran.r-project.org/doc/manuals/r-release/R-ints.html)
 45 | manual.
 46 | 
 47 | ``` r
 48 | # Load the `ucidata` package
 49 | library("ucidata")
 50 | 
 51 | # See the first 10 observations of the `autompg` dataset
 52 | head(autompg)
 53 | 
 54 | # View the help documentation for `autompg`
 55 | ?autompg
 56 | ```
 57 | 
 58 | The second approach is to use the `data()` command to load data on the
 59 | fly without and type the name of a data set.
 60 | 
 61 | ``` r
 62 | # Loading `autompg` without a `library(ucidata)` call
 63 | data("autompg", package = "ucidata")
 64 | 
 65 | # See the first 10 observations of the `autompg` dataset
 66 | head(autompg)
 67 | 
 68 | # View the help documentation for `autompg`
 69 | ?autompg
 70 | ```
 71 | 
 72 | ## Included Data Sets
 73 | 
 74 | The following data sets are included in the `ucidata` package:
 75 | 
 76 | - [`abalone`](https://archive.ics.uci.edu/ml/datasets/abalone)
 77 | - [`adult`](https://archive.ics.uci.edu/ml/datasets/adult)
 78 | - [`autoimports`](https://archive.ics.uci.edu/ml/datasets/Automobile)
 79 | - [`autompg`](https://archive.ics.uci.edu/ml/datasets/auto+mpg)
 80 | - Breast Cancer Wisconsin:
 81 |   - [`bcw_original` (Breast Cancer Wisconsin
 82 |     Original)](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original))
 83 | - Heart Disease
 84 |   - [`heart_disease_cl`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 85 |   - [`heart_disease_hu`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 86 |   - [`heart_disease_va`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 87 |   - [`heart_disease_ch`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 88 | - [`bike_sharing_daily`](https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset)
 89 | - [`bridges`](https://archive.ics.uci.edu/ml/datasets/Pittsburgh+Bridges)
 90 | - [`car_eval`](https://archive.ics.uci.edu/ml/datasets/Car+Evaluation)
 91 | - [`forest_fires`](https://archive.ics.uci.edu/ml/datasets/Forest+Fires)
 92 | - [`glass`](https://archive.ics.uci.edu/ml/datasets/Glass+Identification)
 93 | - [`hepatitis`](https://archive.ics.uci.edu/ml/datasets/hepatitis)
 94 | - [`wine`](https://archive.ics.uci.edu/ml/datasets/wine)
 95 | 
 96 | ## Build Scripts
 97 | 
 98 | Want to see how each data set was imported? Check out the
 99 | [`data-raw`](https://github.com/coatless-rpkg/ucidata/tree/master/data-raw)
100 | folder!
101 | 


--------------------------------------------------------------------------------
/man/heart_disease.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/heart_disease_processed_docs.R
  3 | \docType{data}
  4 | \name{heart_disease_cl}
  5 | \alias{heart_disease_cl}
  6 | \alias{heart_disease_hu}
  7 | \alias{heart_disease_va}
  8 | \alias{heart_disease_ch}
  9 | \title{Heart Disease Processed Data Sets}
 10 | \format{
 11 | Four \code{data.frames} with a varying number of observations that contain
 12 | the following 14 variables.
 13 | \itemize{
 14 | \item \code{age}: age in years
 15 | \item \code{sex}: sex (1 = male; 0 = female)
 16 | \item \code{cp}: chest pain type
 17 | \itemize{
 18 | \item Value 1: typical angina
 19 | \item Value 2: atypical angina
 20 | \item Value 3: non-anginal pain
 21 | \item Value 4: asymptomatic
 22 | }
 23 | \item \code{trestbps}: resting blood pressure (in mm Hg on admission to the hospital)
 24 | \item \code{chol}: serum cholestoral in mg/dl
 25 | \item \code{fbs}: fasting blood sugar > 120 mg/dl (1 = true; 0 = false)
 26 | \item \code{restecg}: resting electrocardiographic results
 27 | \itemize{
 28 | \item Value 0: normal
 29 | \item Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
 30 | \item Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
 31 | }
 32 | \item \code{thalach}: maximum heart rate achieved
 33 | \item \code{exang}: exercise induced angina (1 = yes; 0 = no)
 34 | \item \code{oldpeak}: ST depression induced by exercise relative to rest
 35 | \item \code{slope}: the slope of the peak exercise ST segment
 36 | \itemize{
 37 | \item Value 1: upsloping
 38 | \item Value 2: flat
 39 | \item Value 3: downsloping
 40 | }
 41 | \item \code{ca}: number of major vessels (0-3) colored by flourosopy
 42 | \item \code{thal}: See below
 43 | \itemize{
 44 | \item Value 3: normal
 45 | \item Value 6: fixed defect
 46 | \item Value 7: reversable defect
 47 | }
 48 | \item \code{num}: diagnosis of heart disease (angiographic disease status)
 49 | \itemize{
 50 | \item Value 0: < 50\% diameter narrowing
 51 | \item Value 1: > 50\% diameter narrowing
 52 | }
 53 | }
 54 | 
 55 | An object of class \code{data.frame} with 294 rows and 14 columns.
 56 | 
 57 | An object of class \code{data.frame} with 200 rows and 14 columns.
 58 | 
 59 | An object of class \code{data.frame} with 123 rows and 14 columns.
 60 | }
 61 | \source{
 62 | \enumerate{
 63 | \item Hungarian Institute of Cardiology. Budapest: Andras Janosi, M.D.
 64 | \item University Hospital, Zurich, Switzerland: William Steinbrunn, M.D.
 65 | \item University Hospital, Basel, Switzerland: Matthias Pfisterer, M.D.
 66 | \item V.A. Medical Center, Long Beach and Cleveland Clinic Foundation: Robert Detrano, M.D., Ph.D.
 67 | }
 68 | }
 69 | \usage{
 70 | heart_disease_cl
 71 | 
 72 | heart_disease_hu
 73 | 
 74 | heart_disease_va
 75 | 
 76 | heart_disease_ch
 77 | }
 78 | \description{
 79 | Detecting the presence of heart disease in patients.
 80 | }
 81 | \details{
 82 | The data was collected from the four following locations:
 83 | \enumerate{
 84 | \item \code{heart_disease_cl}: Cleveland Clinic Foundation
 85 | \item \code{heart_disease_hu}: Hungarian Institute of Cardiology, Budapest
 86 | \item \code{heart_disease_va}: V.A. Medical Center, Long Beach, CA
 87 | \item \code{heart_disease_ch}: University Hospital, Zurich, Switzerland
 88 | }
 89 | 
 90 | \tabular{rrr}{
 91 | Database        \tab Instances  \cr
 92 | Cleveland     \tab 303        \cr
 93 | Hungarian     \tab 294        \cr
 94 | Switzerland   \tab 123        \cr
 95 | Long Beach VA \tab 200        \cr
 96 | }
 97 | }
 98 | \references{
 99 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data}
100 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data}
101 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.switzerland.data}
102 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.va.data}
103 | \url{https://archive.ics.uci.edu/ml/datasets/Heart+Disease}
104 | }
105 | \keyword{datasets}
106 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, echo = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "README-"
 12 | )
 13 | ```
 14 | 
 15 | # `ucidata` - Data Sets from UC Irvine's ML Library 
 16 | 
 17 | <!-- badges: start -->
 18 | [![R-CMD-check](https://github.com/coatless-rpkg/ucidata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/coatless-rpkg/ucidata/actions/workflows/R-CMD-check.yaml)
 19 | <!-- badges: end -->
 20 | 
 21 | The following is an _R_ data package that features certain data sets from 
 22 | the [Machine Learning Library at UC Irvine](https://archive.ics.uci.edu/ml/).
 23 | These data sets have been cleaned up and provide documentation via _R_'s help system. 
 24 | 
 25 | > [!NOTE]
 26 | >
 27 | > Want to easily access data sets not included in this package? 
 28 | > 
 29 | > Check out the [`{ucimlrepo}`](https://github.com/coatless-rpkg/ucimlrepo) R package!
 30 | > The package provides an interface to download and automatically load data 
 31 | > sets from the UC Irvine Machine Learning Repository.
 32 | 
 33 | ## Installation
 34 | 
 35 | You can install `ucidata` from github with:
 36 | 
 37 | ```{r gh-installation, eval = FALSE}
 38 | # install.packages("remotes")
 39 | remotes::install_github("coatless-rpkg/ucidata")
 40 | ```
 41 | 
 42 | ## Using data in the package
 43 | 
 44 | There are two ways to access the data contained within this package. 
 45 | 
 46 | The first is to load the package itself and type the name of a data set.
 47 | This approach takes advantage of _R_'s lazy loading mechansim, which avoids
 48 | loading the data until it is used in _R_ session. For details on 
 49 | how lazy loading works, please see [Section 1.17: Lazy Loading](https://cran.r-project.org/doc/manuals/r-release/R-ints.html#Lazy-loading)
 50 | of the [R Internals](https://cran.r-project.org/doc/manuals/r-release/R-ints.html)
 51 | manual.
 52 | 
 53 | ```{r use-data-package, eval = FALSE}
 54 | # Load the `ucidata` package
 55 | library("ucidata")
 56 | 
 57 | # See the first 10 observations of the `autompg` dataset
 58 | head(autompg)
 59 | 
 60 | # View the help documentation for `autompg`
 61 | ?autompg
 62 | ```
 63 | 
 64 | The second approach is to use the `data()` command to load data on the 
 65 | fly without and type the name of a data set.
 66 | 
 67 | ```{r use-data-call, eval = FALSE}
 68 | # Loading `autompg` without a `library(ucidata)` call
 69 | data("autompg", package = "ucidata")
 70 | 
 71 | # See the first 10 observations of the `autompg` dataset
 72 | head(autompg)
 73 | 
 74 | # View the help documentation for `autompg`
 75 | ?autompg
 76 | ```
 77 | 
 78 | ## Included Data Sets
 79 | 
 80 | The following data sets are included in the `ucidata` package:
 81 | 
 82 | - [`abalone`](https://archive.ics.uci.edu/ml/datasets/abalone)
 83 | - [`adult`](https://archive.ics.uci.edu/ml/datasets/adult)
 84 | - [`autoimports`](https://archive.ics.uci.edu/ml/datasets/Automobile)
 85 | - [`autompg`](https://archive.ics.uci.edu/ml/datasets/auto+mpg)
 86 | - Breast Cancer Wisconsin:
 87 |     - [`bcw_original` (Breast Cancer Wisconsin Original)](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original))
 88 | - Heart Disease
 89 |     - [`heart_disease_cl`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 90 |     - [`heart_disease_hu`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 91 |     - [`heart_disease_va`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 92 |     - [`heart_disease_ch`](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)
 93 | - [`bike_sharing_daily`](https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset)
 94 | - [`bridges`](https://archive.ics.uci.edu/ml/datasets/Pittsburgh+Bridges)
 95 | - [`car_eval`](https://archive.ics.uci.edu/ml/datasets/Car+Evaluation)
 96 | - [`forest_fires`](https://archive.ics.uci.edu/ml/datasets/Forest+Fires)
 97 | - [`glass`](https://archive.ics.uci.edu/ml/datasets/Glass+Identification)
 98 | - [`hepatitis`](https://archive.ics.uci.edu/ml/datasets/hepatitis)
 99 | - [`wine`](https://archive.ics.uci.edu/ml/datasets/wine)
100 | 
101 | ## Build Scripts
102 | 
103 | Want to see how each data set was imported? Check out the [`data-raw`](https://github.com/coatless-rpkg/ucidata/tree/master/data-raw) folder!
104 | 


--------------------------------------------------------------------------------
/man/autoimports.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/autoimports_docs.R
  3 | \docType{data}
  4 | \name{autoimports}
  5 | \alias{autoimports}
  6 | \title{Autoimports Dataset}
  7 | \format{
  8 | A data frame with 205 observations on the following 26 variables.
  9 | \itemize{
 10 | \item \code{symboling}:
 11 | \itemize{
 12 | \item -3, -2, -1, 0, 1, 2, 3.
 13 | }
 14 | \item \code{normalized_losses}:
 15 | \itemize{
 16 | \item continuous from 65 to 256.
 17 | }
 18 | \item \code{make}:
 19 | \itemize{
 20 | \item alfa-romero, audi, bmw, chevrolet, dodge, honda, isuzu,
 21 | jaguar, mazda, mercedes-benz, mercury, mitsubishi, nissan,
 22 | peugot, plymouth, porsche, renault, saab, subaru, toyota, volkswagen,
 23 | volvo
 24 | }
 25 | \item \code{fuel_type}:
 26 | \itemize{
 27 | \item diesel, gas.
 28 | }
 29 | \item \code{aspiration}:
 30 | \itemize{
 31 | \item std, turbo.
 32 | }
 33 | \item \code{num_of_doors}:
 34 | \itemize{
 35 | \item four, two.
 36 | }
 37 | \item \code{body_style}:
 38 | \itemize{
 39 | \item hardtop, wagon, sedan, hatchback, convertible.
 40 | }
 41 | \item \code{drive_wheels}:
 42 | \itemize{
 43 | \item 4wd, fwd, rwd.
 44 | }
 45 | \item \code{engine_location}:
 46 | \itemize{
 47 | \item front, rear.
 48 | }
 49 | \item \code{wheel_base}:
 50 | \itemize{
 51 | \item continuous from 86.6 120.9.
 52 | }
 53 | \item \code{length}:
 54 | \itemize{
 55 | \item continuous from 141.1 to 208.1.
 56 | }
 57 | \item \code{width}:
 58 | \itemize{
 59 | \item continuous from 60.3 to 72.3.
 60 | }
 61 | \item \code{height}:
 62 | \itemize{
 63 | \item continuous from 47.8 to 59.8.
 64 | }
 65 | \item \code{curb_weight}:
 66 | \itemize{
 67 | \item continuous from 1488 to 4066.
 68 | }
 69 | \item \code{engine_type}:
 70 | \itemize{
 71 | \item dohc, dohcv, l, ohc, ohcf, ohcv, rotor.
 72 | }
 73 | \item \code{num_of_cylinders}:
 74 | \itemize{
 75 | \item eight, five, four, six, three, twelve, two.
 76 | }
 77 | \item \code{engine_size}:
 78 | \itemize{
 79 | \item continuous from 61 to 326.
 80 | }
 81 | \item \code{fuel_system}:
 82 | \itemize{
 83 | \item 1bbl, 2bbl, 4bbl, idi, mfi, mpfi, spdi, spfi.
 84 | }
 85 | \item \code{bore}:
 86 | \itemize{
 87 | \item continuous from 2.54 to 3.94.
 88 | }
 89 | \item \code{stroke}:
 90 | \itemize{
 91 | \item continuous from 2.07 to 4.17.
 92 | }
 93 | \item \code{compression_ratio}:
 94 | \itemize{
 95 | \item continuous from 7 to 23.
 96 | }
 97 | \item \code{horsepower}:
 98 | \itemize{
 99 | \item continuous from 48 to 288.
100 | }
101 | \item \code{peak_rpm}:
102 | \itemize{
103 | \item continuous from 4150 to 6600.
104 | }
105 | \item \code{city_mpg}:
106 | \itemize{
107 | \item continuous from 13 to 49.
108 | }
109 | \item \code{highway_mpg}:
110 | \itemize{
111 | \item continuous from 16 to 54.
112 | }
113 | \item \code{price}:
114 | \itemize{
115 | \item continuous from 5118 to 45400.
116 | }
117 | }
118 | }
119 | \source{
120 | Donor: Jeffrey C. Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)
121 | 
122 | 1985 Model Import Car and Truck Specifications, 1985 Ward's Automotive Yearbook.
123 | 
124 | Personal Auto Manuals, Insurance Services Office, 160 Water Street, New York, NY 10038
125 | 
126 | Insurance Collision Report, Insurance Institute for Highway Safety, Watergate 600, Washington, DC 20037
127 | }
128 | \usage{
129 | autoimports
130 | }
131 | \description{
132 | This data set consists of three types of entities:
133 | (a) the specification of an auto in terms of various characteristics,
134 | (b) its assigned insurance risk rating,
135 | (c) its normalized losses in use as compared to other cars.
136 | The second rating corresponds to the degree to which the auto is more risky
137 | than its price indicates. Cars are initially assigned a risk factor symbol
138 | associated with its price. Then, if it is more risky (or less), this symbol
139 | is adjusted by moving it up (or down) the scale. Actuarians call this
140 | process "symboling". A value of +3 indicates that the auto is risky, -3
141 | that it is probably pretty safe. The third factor is the relative average
142 | loss payment per insured vehicle year. This value is normalized for all autos
143 | within a particular size classification (two-door small, station wagons,
144 | sports/speciality, etc...), and represents the average loss per car per year.
145 | }
146 | \references{
147 | \url{https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data}
148 | \url{https://archive.ics.uci.edu/ml/datasets/Automobile}
149 | }
150 | \keyword{datasets}
151 | 


--------------------------------------------------------------------------------
/R/bike_sharing_daily_docs.R:
--------------------------------------------------------------------------------
  1 | #' Bike Sharing (Daily) Data Set
  2 | #'
  3 | #' Bike sharing systems are new generation of traditional bike rentals where
  4 | #' whole process from membership, rental and return back has become automatic.
  5 | #' Through these systems, user is able to easily rent a bike from a particular
  6 | #' position and return back at another position. Currently, there are about
  7 | #' over 500 bike-sharing programs around the world which is composed of over
  8 | #' 500 thousands bicycles. Today, there exists great interest in these systems
  9 | #' due to their important role in traffic, environmental and health issues.
 10 | #'
 11 | #' Apart from interesting real world applications of bike sharing systems, the
 12 | #' characteristics of data being generated by these systems make them attractive
 13 | #' for the research. Opposed to other transport services such as bus or subway,
 14 | #' the duration of travel, departure and arrival position is explicitly recorded
 15 | #' in these systems. This feature turns bike sharing system into a virtual
 16 | #' sensor network that can be used for sensing mobility in the city. Hence, it
 17 | #' is expected that most of important events in the city could be detected via
 18 | #' monitoring these data.
 19 | #'
 20 | #' @format A data frame with 731 observations on the following 16 variables.
 21 | #' - `instant`: Record index
 22 | #' - `dteday`:     Date
 23 | #' - `season`:
 24 | #'     - 1: Spring
 25 | #'     - 2: Summer
 26 | #'     - 3: Fall
 27 | #'     - 4: Winter
 28 | #' - `yr`:
 29 | #'     - 0: 2011
 30 | #'     - 1: 2012
 31 | #' - `mnth`:
 32 | #'     -  1: Jan
 33 | #'     -  2: Feb
 34 | #'     -  3: Mar
 35 | #'     -  4: Apr
 36 | #'     -  5: May
 37 | #'     -  6: Jun
 38 | #'     -  7: Jul
 39 | #'     -  8: Aug
 40 | #'     -  9: Sep
 41 | #'     - 10: Oct
 42 | #'     - 11: Nov
 43 | #'     - 12: Dec
 44 | #' - `hr`:
 45 | #'     -  0: 12 AM
 46 | #'     -  1:  1 AM
 47 | #'     -  2:  2 AM
 48 | #'     -  3:  3 AM
 49 | #'     -  4:  4 AM
 50 | #'     -  5:  5 AM
 51 | #'     -  6:  6 AM
 52 | #'     -  7:  7 AM
 53 | #'     -  8:  8 AM
 54 | #'     -  9:  9 AM
 55 | #'     - 10: 10 AM
 56 | #'     - 11: 11 AM
 57 | #'     - 12: 12 PM
 58 | #'     - 13:  1 PM
 59 | #'     - 14:  2 PM
 60 | #'     - 15:  3 PM
 61 | #'     - 16:  4 PM
 62 | #'     - 17:  5 PM
 63 | #'     - 18:  6 PM
 64 | #'     - 19:  7 PM
 65 | #'     - 20:  8 PM
 66 | #'     - 21:  9 PM
 67 | #'     - 22: 10 PM
 68 | #'     - 23: 11 PM
 69 | #' - `holiday`:
 70 | #'     - Whether the day is a holiday or not according to the [Human Resources page of DC](http://dchr.dc.gov/page/holiday-schedule).
 71 | #'     - 0: No
 72 | #'     - 1: Yes
 73 | #' - `weekday`:
 74 | #'     - The day of a week
 75 | #'     - 0: Sunday
 76 | #'     - 1: Monday
 77 | #'     - 2: Tuesday
 78 | #'     - 3: Wednesday
 79 | #'     - 4: Thursday
 80 | #'     - 5: Friday
 81 | #'     - 6: Saturday
 82 | #' - `workingday`:
 83 | #'     - Whether the day is a workday (Monday - Friday)
 84 | #'     - 0: No
 85 | #'     - 1: Yes
 86 | #' - `weathersit`:
 87 | #'     - 1: Clear, Few clouds, Partly cloudy, Partly cloudy
 88 | #'     - 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
 89 | #'     - 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
 90 | #'     - 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog
 91 | #' - `temp`:
 92 | #'     - Normalized temperature in Celsius.
 93 | #'     - The values are derived via \eqn{\frac{(t-t_{min})}{(t_{max}-t_{min})}}{(t-t[min])/(t[max]-t[min])}, t_min=-8, t_max=+39
 94 | #' - `atemp`:
 95 | #'     - Normalized feeling temperature in Celsius.
 96 | #'     - The values are derived via \eqn{\frac{(t-t_{min})}{(t_{max}-t_{min})}}{(t-t[min])/(t[max]-t[min])}, t_min=-16, t_max=+50
 97 | #' - `hum`:
 98 | #'     - Normalized humidity.
 99 | #'     - The values are divided to 100 (max)
100 | #' - `windspeed`:
101 | #'     - Normalized wind speed.
102 | #'     - The values are divided to 67 (max)
103 | #' - `casual`:
104 | #'     - Count of casual users
105 | #' - `registered`:
106 | #'     - Count of registered users
107 | #' - `cnt`:
108 | #'     - Count of total rental bikes including both casual and registered
109 | #' @source
110 | #' Hadi Fanaee-T
111 | #'
112 | #' Laboratory of Artificial Intelligence and Decision Support (LIAAD), University of Porto
113 | #'
114 | #' INESC Porto, Campus da FEUP
115 | #'
116 | #' Rua Dr. Roberto Frias, 378
117 | #'
118 | #' 4200 - 465 Porto, Portugal
119 | #' @references
120 | #' Original Source: <http://capitalbikeshare.com/system-data>
121 | #'
122 | #' Weather Information: <http://www.freemeteo.com>
123 | #'
124 | #' Holiday Schedule: <http://dchr.dc.gov/page/holiday-schedule>
125 | "bike_sharing_daily"
126 | 


--------------------------------------------------------------------------------
/man/bike_sharing_daily.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/bike_sharing_daily_docs.R
  3 | \docType{data}
  4 | \name{bike_sharing_daily}
  5 | \alias{bike_sharing_daily}
  6 | \title{Bike Sharing (Daily) Data Set}
  7 | \format{
  8 | A data frame with 731 observations on the following 16 variables.
  9 | \itemize{
 10 | \item \code{instant}: Record index
 11 | \item \code{dteday}:     Date
 12 | \item \code{season}:
 13 | \itemize{
 14 | \item 1: Spring
 15 | \item 2: Summer
 16 | \item 3: Fall
 17 | \item 4: Winter
 18 | }
 19 | \item \code{yr}:
 20 | \itemize{
 21 | \item 0: 2011
 22 | \item 1: 2012
 23 | }
 24 | \item \code{mnth}:
 25 | \itemize{
 26 | \item 1: Jan
 27 | \item 2: Feb
 28 | \item 3: Mar
 29 | \item 4: Apr
 30 | \item 5: May
 31 | \item 6: Jun
 32 | \item 7: Jul
 33 | \item 8: Aug
 34 | \item 9: Sep
 35 | \item 10: Oct
 36 | \item 11: Nov
 37 | \item 12: Dec
 38 | }
 39 | \item \code{hr}:
 40 | \itemize{
 41 | \item 0: 12 AM
 42 | \item 1:  1 AM
 43 | \item 2:  2 AM
 44 | \item 3:  3 AM
 45 | \item 4:  4 AM
 46 | \item 5:  5 AM
 47 | \item 6:  6 AM
 48 | \item 7:  7 AM
 49 | \item 8:  8 AM
 50 | \item 9:  9 AM
 51 | \item 10: 10 AM
 52 | \item 11: 11 AM
 53 | \item 12: 12 PM
 54 | \item 13:  1 PM
 55 | \item 14:  2 PM
 56 | \item 15:  3 PM
 57 | \item 16:  4 PM
 58 | \item 17:  5 PM
 59 | \item 18:  6 PM
 60 | \item 19:  7 PM
 61 | \item 20:  8 PM
 62 | \item 21:  9 PM
 63 | \item 22: 10 PM
 64 | \item 23: 11 PM
 65 | }
 66 | \item \code{holiday}:
 67 | \itemize{
 68 | \item Whether the day is a holiday or not according to the \href{http://dchr.dc.gov/page/holiday-schedule}{Human Resources page of DC}.
 69 | \item 0: No
 70 | \item 1: Yes
 71 | }
 72 | \item \code{weekday}:
 73 | \itemize{
 74 | \item The day of a week
 75 | \item 0: Sunday
 76 | \item 1: Monday
 77 | \item 2: Tuesday
 78 | \item 3: Wednesday
 79 | \item 4: Thursday
 80 | \item 5: Friday
 81 | \item 6: Saturday
 82 | }
 83 | \item \code{workingday}:
 84 | \itemize{
 85 | \item Whether the day is a workday (Monday - Friday)
 86 | \item 0: No
 87 | \item 1: Yes
 88 | }
 89 | \item \code{weathersit}:
 90 | \itemize{
 91 | \item 1: Clear, Few clouds, Partly cloudy, Partly cloudy
 92 | \item 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
 93 | \item 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
 94 | \item 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog
 95 | }
 96 | \item \code{temp}:
 97 | \itemize{
 98 | \item Normalized temperature in Celsius.
 99 | \item The values are derived via \eqn{\frac{(t-t_{min})}{(t_{max}-t_{min})}}{(t-t[min])/(t[max]-t[min])}, t_min=-8, t_max=+39
100 | }
101 | \item \code{atemp}:
102 | \itemize{
103 | \item Normalized feeling temperature in Celsius.
104 | \item The values are derived via \eqn{\frac{(t-t_{min})}{(t_{max}-t_{min})}}{(t-t[min])/(t[max]-t[min])}, t_min=-16, t_max=+50
105 | }
106 | \item \code{hum}:
107 | \itemize{
108 | \item Normalized humidity.
109 | \item The values are divided to 100 (max)
110 | }
111 | \item \code{windspeed}:
112 | \itemize{
113 | \item Normalized wind speed.
114 | \item The values are divided to 67 (max)
115 | }
116 | \item \code{casual}:
117 | \itemize{
118 | \item Count of casual users
119 | }
120 | \item \code{registered}:
121 | \itemize{
122 | \item Count of registered users
123 | }
124 | \item \code{cnt}:
125 | \itemize{
126 | \item Count of total rental bikes including both casual and registered
127 | }
128 | }
129 | }
130 | \source{
131 | Hadi Fanaee-T
132 | 
133 | Laboratory of Artificial Intelligence and Decision Support (LIAAD), University of Porto
134 | 
135 | INESC Porto, Campus da FEUP
136 | 
137 | Rua Dr. Roberto Frias, 378
138 | 
139 | 4200 - 465 Porto, Portugal
140 | }
141 | \usage{
142 | bike_sharing_daily
143 | }
144 | \description{
145 | Bike sharing systems are new generation of traditional bike rentals where
146 | whole process from membership, rental and return back has become automatic.
147 | Through these systems, user is able to easily rent a bike from a particular
148 | position and return back at another position. Currently, there are about
149 | over 500 bike-sharing programs around the world which is composed of over
150 | 500 thousands bicycles. Today, there exists great interest in these systems
151 | due to their important role in traffic, environmental and health issues.
152 | }
153 | \details{
154 | Apart from interesting real world applications of bike sharing systems, the
155 | characteristics of data being generated by these systems make them attractive
156 | for the research. Opposed to other transport services such as bus or subway,
157 | the duration of travel, departure and arrival position is explicitly recorded
158 | in these systems. This feature turns bike sharing system into a virtual
159 | sensor network that can be used for sensing mobility in the city. Hence, it
160 | is expected that most of important events in the city could be detected via
161 | monitoring these data.
162 | }
163 | \references{
164 | Original Source: \url{http://capitalbikeshare.com/system-data}
165 | 
166 | Weather Information: \url{http://www.freemeteo.com}
167 | 
168 | Holiday Schedule: \url{http://dchr.dc.gov/page/holiday-schedule}
169 | }
170 | \keyword{datasets}
171 | 


--------------------------------------------------------------------------------