├── .Rbuildignore
├── .github
    ├── .gitignore
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    └── workflows
    │   ├── R-CMD-check-hard.yaml
    │   ├── R-CMD-check.yaml
    │   ├── lock.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── .vscode
    ├── extensions.json
    └── settings.json
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── blueprint-formula-default.R
    ├── blueprint-formula.R
    ├── blueprint-recipe-default.R
    ├── blueprint-recipe.R
    ├── blueprint-xy-default.R
    ├── blueprint-xy.R
    ├── blueprint.R
    ├── case-weights.R
    ├── classes.R
    ├── compost.R
    ├── constructor.R
    ├── delete-response.R
    ├── encoding.R
    ├── extract.R
    ├── forge.R
    ├── hardhat-example-data.R
    ├── hardhat-package.R
    ├── import-standalone-obj-type.R
    ├── import-standalone-purrr.R
    ├── import-standalone-types-check.R
    ├── intercept.R
    ├── levels.R
    ├── model-frame.R
    ├── model-matrix.R
    ├── model-offset.R
    ├── mold.R
    ├── new.R
    ├── print.R
    ├── ptype.R
    ├── quantile-pred.R
    ├── recompose.R
    ├── scream.R
    ├── shrink.R
    ├── sparsevctrs.R
    ├── spruce.R
    ├── standardize.R
    ├── table.R
    ├── tune.R
    ├── use.R
    ├── util.R
    └── validation.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── air.toml
├── codecov.yml
├── cran-comments.md
├── data
    └── hardhat-example-data.RData
├── graphics
    ├── factor-handling
    │   ├── factor-handling.graffle
    │   └── factor-handling.png
    ├── modeling-package-design.graffle
    └── modeling-package-design
    │   ├── Fitting.png
    │   └── Prediction.png
├── hardhat.Rproj
├── inst
    └── templates
    │   └── R
    │       ├── constructor.R
    │       ├── fit.R
    │       └── predict.R
├── man-roxygen
    └── section-validation.R
├── man
    ├── add_intercept_column.Rd
    ├── check_quantile_levels.Rd
    ├── contr_one_hot.Rd
    ├── default_formula_blueprint.Rd
    ├── default_recipe_blueprint.Rd
    ├── default_xy_blueprint.Rd
    ├── delete_response.Rd
    ├── extract_ptype.Rd
    ├── fct_encode_one_hot.Rd
    ├── figures
    │   ├── Fitting.png
    │   ├── Prediction.png
    │   ├── factor-handling.png
    │   ├── lifecycle-archived.svg
    │   ├── lifecycle-defunct.svg
    │   ├── lifecycle-deprecated.svg
    │   ├── lifecycle-experimental.svg
    │   ├── lifecycle-maturing.svg
    │   ├── lifecycle-questioning.svg
    │   ├── lifecycle-soft-deprecated.svg
    │   ├── lifecycle-stable.svg
    │   ├── lifecycle-superseded.svg
    │   └── logo.png
    ├── forge.Rd
    ├── frequency_weights.Rd
    ├── get_data_classes.Rd
    ├── get_levels.Rd
    ├── hardhat-example-data.Rd
    ├── hardhat-extract.Rd
    ├── hardhat-package.Rd
    ├── importance_weights.Rd
    ├── is_blueprint.Rd
    ├── is_case_weights.Rd
    ├── is_frequency_weights.Rd
    ├── is_importance_weights.Rd
    ├── model_frame.Rd
    ├── model_matrix.Rd
    ├── model_offset.Rd
    ├── modeling-usethis.Rd
    ├── mold.Rd
    ├── new-blueprint.Rd
    ├── new-default-blueprint.Rd
    ├── new_case_weights.Rd
    ├── new_frequency_weights.Rd
    ├── new_importance_weights.Rd
    ├── new_model.Rd
    ├── quantile_pred.Rd
    ├── recompose.Rd
    ├── refresh_blueprint.Rd
    ├── rmd
    │   └── one-hot.Rmd
    ├── run-forge.Rd
    ├── run-mold.Rd
    ├── scream.Rd
    ├── shrink.Rd
    ├── spruce-multiple.Rd
    ├── spruce.Rd
    ├── standardize.Rd
    ├── tune.Rd
    ├── update_blueprint.Rd
    ├── validate_column_names.Rd
    ├── validate_no_formula_duplication.Rd
    ├── validate_outcomes_are_binary.Rd
    ├── validate_outcomes_are_factors.Rd
    ├── validate_outcomes_are_numeric.Rd
    ├── validate_outcomes_are_univariate.Rd
    ├── validate_prediction_size.Rd
    ├── validate_predictors_are_numeric.Rd
    └── weighted_table.Rd
├── pkgdown
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── revdep
    ├── .gitignore
    ├── README.md
    ├── cran.md
    ├── email.yml
    ├── failures.md
    └── problems.md
├── tests
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── blueprint-formula-default.md
    │       ├── blueprint-recipe.md
    │       ├── blueprint.md
    │       ├── case-weights.md
    │       ├── constructor.md
    │       ├── delete-response.md
    │       ├── encoding.md
    │       ├── forge-formula.md
    │       ├── forge-recipe.md
    │       ├── forge-xy.md
    │       ├── forge.md
    │       ├── intercept.md
    │       ├── levels.md
    │       ├── model-matrix.md
    │       ├── model-offset.md
    │       ├── mold-formula.md
    │       ├── mold-recipe.md
    │       ├── mold-xy.md
    │       ├── mold.md
    │       ├── print.md
    │       ├── quantile-pred.md
    │       ├── recompose.md
    │       ├── scream.md
    │       ├── shrink.md
    │       ├── spruce.md
    │       ├── standardize.md
    │       ├── table.md
    │       ├── tune.md
    │       ├── use.md
    │       └── validation.md
    │   ├── data-raw
    │       └── hardhat-0.2.0.R
    │   ├── data
    │       ├── hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds
    │       ├── hardhat-0.2.0-post-mold-recipe.rds
    │       └── hardhat-0.2.0-pre-mold-recipe.rds
    │   ├── helper-matrix.R
    │   ├── helper-sparsevctrs.R
    │   ├── test-blueprint-formula-default.R
    │   ├── test-blueprint-recipe.R
    │   ├── test-blueprint.R
    │   ├── test-case-weights.R
    │   ├── test-constructor.R
    │   ├── test-delete-response.R
    │   ├── test-encoding.R
    │   ├── test-forge-formula.R
    │   ├── test-forge-recipe.R
    │   ├── test-forge-xy.R
    │   ├── test-forge.R
    │   ├── test-intercept.R
    │   ├── test-levels.R
    │   ├── test-model-matrix.R
    │   ├── test-model-offset.R
    │   ├── test-mold-formula.R
    │   ├── test-mold-recipe.R
    │   ├── test-mold-xy.R
    │   ├── test-mold.R
    │   ├── test-print.R
    │   ├── test-quantile-pred.R
    │   ├── test-recompose.R
    │   ├── test-scream.R
    │   ├── test-shrink.R
    │   ├── test-spruce.R
    │   ├── test-standardize.R
    │   ├── test-table.R
    │   ├── test-tune.R
    │   ├── test-use.R
    │   └── test-validation.R
└── vignettes
    ├── .gitignore
    ├── forge.Rmd
    ├── mold.Rmd
    └── package.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^pkgdown$
 2 | ^_pkgdown\.yml$
 3 | ^docs$
 4 | ^codecov\.yml$
 5 | ^README\.Rmd$
 6 | ^\.travis\.yml$
 7 | ^hardhat\.Rproj$
 8 | ^\.Rproj\.user$
 9 | ^man-roxygen$
10 | ^graphics$
11 | ^\.github$
12 | ^LICENSE\.md$
13 | ^cran-comments\.md$
14 | ^CRAN-RELEASE$
15 | ^revdep$
16 | ^CODE_OF_CONDUCT\.md$
17 | ^CRAN-SUBMISSION$
18 | ^[\.]?air\.toml$
19 | ^\.vscode$
20 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to tidymodels
 2 | 
 3 | For more detailed information about contributing to tidymodels packages, see our [**development contributing guide**](https://www.tidymodels.org/contribute/).
 4 | 
 5 | ## Documentation
 6 | 
 7 | Typos or grammatical errors in documentation may be edited directly using the GitHub web interface, as long as the changes are made in the _source_ file.
 8 | 
 9 | *  YES ✅: you edit a roxygen comment in an `.R` file in the `R/` directory.
10 | *  NO 🚫: you edit an `.Rd` file in the `man/` directory.
11 | 
12 | We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation.
13 | 
14 | ## Code
15 | 
16 | Before you submit 🎯 a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal.
17 | 
18 | The [tidymodels packages](https://www.tidymodels.org/packages/) work together. Each package contains its own unit tests, while integration tests and other tests using all the packages are contained in [extratests](https://github.com/tidymodels/extratests).
19 | 
20 | *  We recommend that you create a Git branch for each pull request (PR).
21 | *  Look at the build status before and after making changes. The `README` contains badges for any continuous integration services used by the package.  
22 | *  New code should follow the tidyverse [style guide](http://style.tidyverse.org). You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.  
23 | *  For user-facing changes, add a bullet to the top of `NEWS.md` below the current development version header describing the changes made followed by your GitHub username, and links to relevant issue(s)/PR(s).
24 | *  We use [testthat](https://cran.r-project.org/package=testthat). Contributions with test cases included are easier to accept.
25 | *  If your contribution spans the use of more than one package, consider building [extratests](https://github.com/tidymodels/extratests) with your changes to check for breakages and/or adding new tests there. Let us know in your PR if you ran these extra tests.
26 | 
27 | ### Code of Conduct
28 | 
29 | This project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
30 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check-hard.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends,
 5 | # Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never
 6 | # installed, with the exception of testthat, knitr, and rmarkdown. The cache is
 7 | # never used to avoid accidentally restoring a cache containing a suggested
 8 | # dependency.
 9 | on:
10 |   push:
11 |     branches: [main, master]
12 |   pull_request:
13 |     branches: [main, master]
14 | 
15 | name: R-CMD-check-no-suggests.yaml
16 | 
17 | permissions: read-all
18 | 
19 | jobs:
20 |   check-no-suggests:
21 |     runs-on: ${{ matrix.config.os }}
22 | 
23 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
24 | 
25 |     strategy:
26 |       fail-fast: false
27 |       matrix:
28 |         config:
29 |           - {os: ubuntu-latest,   r: 'release'}
30 | 
31 |     env:
32 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
33 |       R_KEEP_PKG_SOURCE: yes
34 | 
35 |     steps:
36 |       - uses: actions/checkout@v4
37 | 
38 |       - uses: r-lib/actions/setup-pandoc@v2
39 | 
40 |       - uses: r-lib/actions/setup-r@v2
41 |         with:
42 |           r-version: ${{ matrix.config.r }}
43 |           http-user-agent: ${{ matrix.config.http-user-agent }}
44 |           use-public-rspm: true
45 | 
46 |       - uses: r-lib/actions/setup-r-dependencies@v2
47 |         with:
48 |           dependencies: '"hard"'
49 |           cache: false
50 |           extra-packages: |
51 |             any::rcmdcheck
52 |             any::testthat
53 |             any::knitr
54 |             any::rmarkdown
55 |           needs: check
56 | 
57 |       - uses: r-lib/actions/check-r-package@v2
58 |         with:
59 |           upload-snapshots: true
60 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
61 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow is overkill for most R packages and
 5 | # check-standard.yaml is likely a better choice.
 6 | # usethis::use_github_action("check-standard") will install it.
 7 | on:
 8 |   push:
 9 |     branches: [main, master]
10 |   pull_request:
11 | 
12 | name: R-CMD-check.yaml
13 | 
14 | permissions: read-all
15 | 
16 | jobs:
17 |   R-CMD-check:
18 |     runs-on: ${{ matrix.config.os }}
19 | 
20 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         config:
26 |           - {os: macos-latest,   r: 'release'}
27 | 
28 |           - {os: windows-latest, r: 'release'}
29 |           # use 4.0 or 4.1 to check with rtools40's older compiler
30 |           - {os: windows-latest, r: 'oldrel-4'}
31 | 
32 |           - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
33 |           - {os: ubuntu-latest,  r: 'release'}
34 |           - {os: ubuntu-latest,  r: 'oldrel-1'}
35 |           - {os: ubuntu-latest,  r: 'oldrel-2'}
36 |           - {os: ubuntu-latest,  r: 'oldrel-3'}
37 |           - {os: ubuntu-latest,  r: 'oldrel-4'}
38 | 
39 |     env:
40 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
41 |       R_KEEP_PKG_SOURCE: yes
42 | 
43 |     steps:
44 |       - uses: actions/checkout@v4
45 | 
46 |       - uses: r-lib/actions/setup-pandoc@v2
47 | 
48 |       - uses: r-lib/actions/setup-r@v2
49 |         with:
50 |           r-version: ${{ matrix.config.r }}
51 |           http-user-agent: ${{ matrix.config.http-user-agent }}
52 |           use-public-rspm: true
53 | 
54 |       - uses: r-lib/actions/setup-r-dependencies@v2
55 |         with:
56 |           extra-packages: any::rcmdcheck
57 |           needs: check
58 | 
59 |       - uses: r-lib/actions/check-r-package@v2
60 |         with:
61 |           upload-snapshots: true
62 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
63 | 


--------------------------------------------------------------------------------
/.github/workflows/lock.yaml:
--------------------------------------------------------------------------------
 1 | name: 'Lock Threads'
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 0 * * *'
 6 | 
 7 | jobs:
 8 |   lock:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: dessant/lock-threads@v2
12 |         with:
13 |           github-token: ${{ github.token }}
14 |           issue-lock-inactive-days: '14'
15 | #          issue-exclude-labels: ''
16 | #          issue-lock-labels: 'outdated'
17 |           issue-lock-comment: >
18 |             This issue has been automatically locked. If you believe you have
19 |             found a related problem, please file a new issue (with a reprex:
20 |             <https://reprex.tidyverse.org>) and link to this issue.
21 |           issue-lock-reason: ''
22 |           pr-lock-inactive-days: '14'
23 | #          pr-exclude-labels: 'wip'
24 |           pr-lock-labels: ''
25 |           pr-lock-comment: >
26 |             This pull request has been automatically locked. If you believe you
27 |             have found a related problem, please file a new issue (with a reprex:
28 |             <https://reprex.tidyverse.org>) and link to this issue.
29 |           pr-lock-reason: ''
30 | #          process-only: 'issues'
31 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |   release:
 8 |     types: [published]
 9 |   workflow_dispatch:
10 | 
11 | name: pkgdown.yaml
12 | 
13 | permissions: read-all
14 | 
15 | jobs:
16 |   pkgdown:
17 |     runs-on: ubuntu-latest
18 |     # Only restrict concurrency for non-PR jobs
19 |     concurrency:
20 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
21 |     env:
22 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
23 |     permissions:
24 |       contents: write
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       - uses: r-lib/actions/setup-pandoc@v2
29 | 
30 |       - uses: r-lib/actions/setup-r@v2
31 |         with:
32 |           use-public-rspm: true
33 | 
34 |       - uses: r-lib/actions/setup-r-dependencies@v2
35 |         with:
36 |           extra-packages: any::pkgdown, local::.
37 |           needs: website
38 | 
39 |       - name: Build site
40 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
41 |         shell: Rscript {0}
42 | 
43 |       - name: Deploy to GitHub pages 🚀
44 |         if: github.event_name != 'pull_request'
45 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
46 |         with:
47 |           clean: false
48 |           branch: gh-pages
49 |           folder: docs
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | name: pr-commands.yaml
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   document:
13 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
14 |     name: document
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - uses: r-lib/actions/pr-fetch@v2
24 |         with:
25 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::roxygen2
34 |           needs: pr-document
35 | 
36 |       - name: Document
37 |         run: roxygen2::roxygenise()
38 |         shell: Rscript {0}
39 | 
40 |       - name: commit
41 |         run: |
42 |           git config --local user.name "$GITHUB_ACTOR"
43 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
44 |           git add man/\* NAMESPACE
45 |           git commit -m 'Document'
46 | 
47 |       - uses: r-lib/actions/pr-push@v2
48 |         with:
49 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
50 | 
51 |   style:
52 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
53 |     name: style
54 |     runs-on: ubuntu-latest
55 |     env:
56 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
57 |     permissions:
58 |       contents: write
59 |     steps:
60 |       - uses: actions/checkout@v4
61 | 
62 |       - uses: r-lib/actions/pr-fetch@v2
63 |         with:
64 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
65 | 
66 |       - uses: r-lib/actions/setup-r@v2
67 | 
68 |       - name: Install dependencies
69 |         run: install.packages("styler")
70 |         shell: Rscript {0}
71 | 
72 |       - name: Style
73 |         run: styler::style_pkg()
74 |         shell: Rscript {0}
75 | 
76 |       - name: commit
77 |         run: |
78 |           git config --local user.name "$GITHUB_ACTOR"
79 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
80 |           git add \*.R
81 |           git commit -m 'Style'
82 | 
83 |       - uses: r-lib/actions/pr-push@v2
84 |         with:
85 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
86 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 | 
 8 | name: test-coverage.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   test-coverage:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: any::covr, any::xml2
28 |           needs: coverage
29 | 
30 |       - name: Test coverage
31 |         run: |
32 |           cov <- covr::package_coverage(
33 |             quiet = FALSE,
34 |             clean = FALSE,
35 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
36 |           )
37 |           print(cov)
38 |           covr::to_cobertura(cov)
39 |         shell: Rscript {0}
40 | 
41 |       - uses: codecov/codecov-action@v5
42 |         with:
43 |           # Fail if error if not on PR, or if on PR and token is given
44 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
45 |           files: ./cobertura.xml
46 |           plugins: noop
47 |           disable_search: true
48 |           token: ${{ secrets.CODECOV_TOKEN }}
49 | 
50 |       - name: Show testthat output
51 |         if: always()
52 |         run: |
53 |           ## --------------------------------------------------------------------
54 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
55 |         shell: bash
56 | 
57 |       - name: Upload test results
58 |         if: failure()
59 |         uses: actions/upload-artifact@v4
60 |         with:
61 |           name: coverage-test-failures
62 |           path: ${{ runner.temp }}/package
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | inst/doc
2 | docs/
3 | .Rhistory
4 | .RData
5 | .Rproj.user
6 | .DS_Store
7 | docs
8 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |     "recommendations": [
3 |         "Posit.air-vscode"
4 |     ]
5 | }
6 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "[r]": {
3 |         "editor.formatOnSave": true,
4 |         "editor.defaultFormatter": "Posit.air-vscode"
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: hardhat
 2 | Title: Construct Modeling Packages
 3 | Version: 1.4.1.9000
 4 | Authors@R: c(
 5 |     person("Hannah", "Frick", , "hannah@posit.co", role = c("aut", "cre"),
 6 |            comment = c(ORCID = "0000-0002-6049-5258")),
 7 |     person("Davis", "Vaughan", , "davis@posit.co", role = "aut"),
 8 |     person("Max", "Kuhn", , "max@posit.co", role = "aut"),
 9 |     person("Posit Software, PBC", role = c("cph", "fnd"),
10 |            comment = c(ROR = "03wc8by49"))
11 |   )
12 | Description: Building modeling packages is hard. A large amount of effort
13 |     generally goes into providing an implementation for a new method that
14 |     is efficient, fast, and correct, but often less emphasis is put on the
15 |     user interface. A good interface requires specialized knowledge about
16 |     S3 methods and formulas, which the average package developer might not
17 |     have. The goal of 'hardhat' is to reduce the burden around building
18 |     new modeling packages by providing functionality for preprocessing,
19 |     predicting, and validating input.
20 | License: MIT + file LICENSE
21 | URL: https://github.com/tidymodels/hardhat, https://hardhat.tidymodels.org
22 | BugReports: https://github.com/tidymodels/hardhat/issues
23 | Depends:
24 |     R (>= 4.1)
25 | Imports:
26 |     cli (>= 3.6.0),
27 |     glue (>= 1.6.2),
28 |     rlang (>= 1.1.0),
29 |     sparsevctrs (>= 0.2.0),
30 |     tibble (>= 3.2.1),
31 |     vctrs (>= 0.6.0)
32 | Suggests:
33 |     covr,
34 |     crayon,
35 |     devtools,
36 |     knitr,
37 |     Matrix,
38 |     modeldata (>= 0.0.2),
39 |     recipes (>= 1.0.5),
40 |     rmarkdown (>= 2.3),
41 |     roxygen2,
42 |     testthat (>= 3.0.0),
43 |     usethis (>= 2.1.5),
44 |     withr (>= 3.0.0)
45 | VignetteBuilder:
46 |     knitr
47 | Config/Needs/website: tidyverse/tidytemplate
48 | Config/testthat/edition: 3
49 | Config/usethis/last-upkeep: 2025-04-23
50 | Encoding: UTF-8
51 | LazyData: true
52 | Roxygen: list(markdown = TRUE)
53 | RoxygenNote: 7.3.2
54 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2025
2 | COPYRIGHT HOLDER: hardhat authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2025 hardhat authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/R/blueprint-formula.R:
--------------------------------------------------------------------------------
 1 | #' @param formula Either `NULL`, or a formula that specifies how the
 2 | #' predictors and outcomes should be preprocessed. This argument is set
 3 | #' automatically at [mold()] time.
 4 | #'
 5 | #' @param indicators A single character string. Control how factors are
 6 | #' expanded into dummy variable indicator columns. One of:
 7 | #'
 8 | #'   - `"traditional"` - The default. Create dummy variables using the
 9 | #'   traditional [model.matrix()] infrastructure. Generally this creates
10 | #'   `K - 1` indicator columns for each factor, where `K` is the number of
11 | #'   levels in that factor.
12 | #'
13 | #'   - `"none"` - Leave factor variables alone. No expansion is done.
14 | #'
15 | #'   - `"one_hot"` - Create dummy variables using a one-hot encoding approach
16 | #'   that expands unordered factors into all `K` indicator columns, rather than
17 | #'   `K - 1`.
18 | #'
19 | #' @rdname new-blueprint
20 | #' @export
21 | new_formula_blueprint <- function(
22 |   intercept = FALSE,
23 |   allow_novel_levels = FALSE,
24 |   ptypes = NULL,
25 |   formula = NULL,
26 |   indicators = "traditional",
27 |   composition = "tibble",
28 |   ...,
29 |   subclass = character()
30 | ) {
31 |   check_formula(formula, allow_null = TRUE)
32 |   check_indicators(indicators)
33 | 
34 |   new_blueprint(
35 |     intercept = intercept,
36 |     allow_novel_levels = allow_novel_levels,
37 |     ptypes = ptypes,
38 |     formula = formula,
39 |     indicators = indicators,
40 |     composition = composition,
41 |     ...,
42 |     subclass = c(subclass, "formula_blueprint")
43 |   )
44 | }
45 | 
46 | #' @export
47 | refresh_blueprint.formula_blueprint <- function(blueprint) {
48 |   do.call(new_formula_blueprint, as.list(blueprint))
49 | }
50 | 
51 | check_formula_blueprint <- function(
52 |   x,
53 |   ...,
54 |   arg = caller_arg(x),
55 |   call = caller_env()
56 | ) {
57 |   check_inherits(x, "formula_blueprint", arg = arg, call = call)
58 | }
59 | 
60 | # ------------------------------------------------------------------------------
61 | 
62 | check_indicators <- function(indicators, error_call = caller_env()) {
63 |   arg_match0(
64 |     arg = indicators,
65 |     values = c("traditional", "none", "one_hot"),
66 |     error_call = error_call
67 |   )
68 | }
69 | 


--------------------------------------------------------------------------------
/R/blueprint-recipe.R:
--------------------------------------------------------------------------------
 1 | #' @param recipe Either `NULL`, or an unprepped recipe. This argument is set
 2 | #'   automatically at [mold()] time.
 3 | #'
 4 | #' @param fresh Should already trained operations be re-trained when `prep()` is
 5 | #'   called?
 6 | #'
 7 | #' @param strings_as_factors Should character columns be converted to factors
 8 | #'   when `prep()` is called?
 9 | #'
10 | #' @rdname new-blueprint
11 | #' @export
12 | new_recipe_blueprint <- function(
13 |   intercept = FALSE,
14 |   allow_novel_levels = FALSE,
15 |   fresh = TRUE,
16 |   strings_as_factors = TRUE,
17 |   composition = "tibble",
18 |   ptypes = NULL,
19 |   recipe = NULL,
20 |   ...,
21 |   subclass = character()
22 | ) {
23 |   check_bool(fresh)
24 |   check_bool(strings_as_factors)
25 |   check_recipe(recipe, allow_null = TRUE)
26 | 
27 |   new_blueprint(
28 |     intercept = intercept,
29 |     allow_novel_levels = allow_novel_levels,
30 |     fresh = fresh,
31 |     strings_as_factors = strings_as_factors,
32 |     composition = composition,
33 |     ptypes = ptypes,
34 |     recipe = recipe,
35 |     ...,
36 |     subclass = c(subclass, "recipe_blueprint")
37 |   )
38 | }
39 | 
40 | #' @export
41 | refresh_blueprint.recipe_blueprint <- function(blueprint) {
42 |   do.call(new_recipe_blueprint, as.list(blueprint))
43 | }
44 | 
45 | check_recipe_blueprint <- function(
46 |   x,
47 |   ...,
48 |   arg = caller_arg(x),
49 |   call = caller_env()
50 | ) {
51 |   check_inherits(x, "recipe_blueprint", arg = arg, call = call)
52 | }
53 | 
54 | # ------------------------------------------------------------------------------
55 | 
56 | blueprint_strings_as_factors <- function(x) {
57 |   # See #228
58 |   if (has_name(x, "strings_as_factors")) {
59 |     # Blueprint is new enough to have this field
60 |     x[["strings_as_factors"]]
61 |   } else {
62 |     # Backwards compatible support for the `recipes::prep()` default if the
63 |     # blueprint is old
64 |     TRUE
65 |   }
66 | }
67 | 
68 | # ------------------------------------------------------------------------------
69 | 
70 | is_recipe <- function(x) {
71 |   inherits(x, "recipe")
72 | }
73 | 
74 | check_recipe <- function(
75 |   x,
76 |   ...,
77 |   allow_null = FALSE,
78 |   arg = caller_arg(x),
79 |   call = caller_env()
80 | ) {
81 |   if (!missing(x)) {
82 |     if (is_recipe(x)) {
83 |       return(invisible(NULL))
84 |     }
85 |     if (allow_null && is_null(x)) {
86 |       return(invisible(NULL))
87 |     }
88 |   }
89 | 
90 |   stop_input_type(
91 |     x = x,
92 |     what = "a recipe",
93 |     allow_null = allow_null,
94 |     arg = arg,
95 |     call = call
96 |   )
97 | }
98 | 


--------------------------------------------------------------------------------
/R/blueprint-xy.R:
--------------------------------------------------------------------------------
 1 | #' @rdname new-blueprint
 2 | #' @export
 3 | new_xy_blueprint <- function(
 4 |   intercept = FALSE,
 5 |   allow_novel_levels = FALSE,
 6 |   composition = "tibble",
 7 |   ptypes = NULL,
 8 |   ...,
 9 |   subclass = character()
10 | ) {
11 |   new_blueprint(
12 |     intercept = intercept,
13 |     allow_novel_levels = allow_novel_levels,
14 |     composition = composition,
15 |     ptypes = ptypes,
16 |     ...,
17 |     subclass = c(subclass, "xy_blueprint")
18 |   )
19 | }
20 | 
21 | #' @export
22 | refresh_blueprint.xy_blueprint <- function(blueprint) {
23 |   do.call(new_xy_blueprint, as.list(blueprint))
24 | }
25 | 
26 | check_xy_blueprint <- function(
27 |   x,
28 |   ...,
29 |   arg = caller_arg(x),
30 |   call = caller_env()
31 | ) {
32 |   check_inherits(x, "xy_blueprint", arg = arg, call = call)
33 | }
34 | 


--------------------------------------------------------------------------------
/R/classes.R:
--------------------------------------------------------------------------------
 1 | #' Extract data classes from a data frame or matrix
 2 | #'
 3 | #' When predicting from a model, it is often important for the `new_data` to
 4 | #' have the same classes as the original data used to fit the model.
 5 | #' `get_data_classes()` extracts the classes from the original training data.
 6 | #'
 7 | #' @param data A data frame or matrix.
 8 | #'
 9 | #' @inheritParams validate_column_names
10 | #'
11 | #' @return
12 | #'
13 | #' A named list. The names are the column names of `data` and the values are
14 | #' character vectors containing the class of that column.
15 | #'
16 | #' @examples
17 | #' get_data_classes(iris)
18 | #'
19 | #' get_data_classes(as.matrix(mtcars))
20 | #'
21 | #' # Unlike .MFclass(), the full class
22 | #' # vector is returned
23 | #' data <- data.frame(col = ordered(c("a", "b")))
24 | #'
25 | #' .MFclass(data$col)
26 | #'
27 | #' get_data_classes(data)
28 | #' @export
29 | get_data_classes <- function(data, ..., call = current_env()) {
30 |   check_dots_empty0(...)
31 |   data <- extract_ptype(data, call = call)
32 |   check_unique_column_names(data, call = call)
33 |   lapply(data, class)
34 | }
35 | 


--------------------------------------------------------------------------------
/R/compost.R:
--------------------------------------------------------------------------------
 1 | # This will eventually live in recipes
 2 | # https://github.com/tidymodels/recipes/issues/268
 3 | 
 4 | compost <- function(object) {
 5 |   if (!recipes::fully_trained(object)) {
 6 |     return(object)
 7 |   }
 8 | 
 9 |   object$template <- NULL
10 |   object$retained <- FALSE
11 | 
12 |   object
13 | }
14 | 


--------------------------------------------------------------------------------
/R/constructor.R:
--------------------------------------------------------------------------------
  1 | #' Constructor for a base model
  2 | #'
  3 | #' A __model__ is a _scalar object_, as classified in
  4 | #' [Advanced R](https://adv-r.hadley.nz/s3.html#object-styles). As such, it
  5 | #' takes uniquely named elements in `...` and combines them into a list with
  6 | #' a class of `class`. This entire object represent a single model.
  7 | #'
  8 | #' Because every model should have multiple interfaces, including formula
  9 | #' and `recipes` interfaces, all models should have a `blueprint` that
 10 | #' can process new data when `predict()` is called. The easiest way to generate
 11 | #' an blueprint with all of the information required at prediction time is to
 12 | #' use the one that is returned from a call to [mold()].
 13 | #'
 14 | #' @param ... Name-value pairs for elements specific to the model defined by
 15 | #' `class`.
 16 | #'
 17 | #' @param blueprint A preprocessing `blueprint` returned from a call to [mold()].
 18 | #'
 19 | #' @param class A character vector representing the class of the model.
 20 | #'
 21 | #' @return
 22 | #'
 23 | #' A new scalar model object, represented as a classed list with named elements
 24 | #' specified in `...`.
 25 | #'
 26 | #' @examples
 27 | #' new_model(
 28 | #'   custom_element = "my-elem",
 29 | #'   blueprint = default_xy_blueprint(),
 30 | #'   class = "custom_model"
 31 | #' )
 32 | #' @export
 33 | new_model <- function(
 34 |   ...,
 35 |   blueprint = default_xy_blueprint(),
 36 |   class = character()
 37 | ) {
 38 |   check_blueprint(blueprint)
 39 | 
 40 |   new_abstract_model(
 41 |     ...,
 42 |     blueprint = blueprint,
 43 |     class = c(class, "hardhat_model")
 44 |   )
 45 | }
 46 | 
 47 | # ------------------------------------------------------------------------------
 48 | 
 49 | #' @export
 50 | print.hardhat_model <- function(x, ...) {
 51 |   cat_line("<", class(x)[1], ">")
 52 |   x$blueprint <- NULL
 53 |   print(unclass(x))
 54 | }
 55 | 
 56 | cat_line <- function(...) {
 57 |   cat(paste0(..., "\n", collapse = ""))
 58 | }
 59 | 
 60 | # ------------------------------------------------------------------------------
 61 | 
 62 | new_abstract_model <- function(..., class) {
 63 |   elems <- list2(...)
 64 |   check_unique_names(elems, arg = "...")
 65 | 
 66 |   new_scalar(elems, class = class)
 67 | }
 68 | 
 69 | new_scalar <- function(elems, ..., class = character()) {
 70 |   check_elems(elems)
 71 |   structure(elems, ..., class = c(class, "hardhat_scalar"))
 72 | }
 73 | 
 74 | # ------------------------------------------------------------------------------
 75 | 
 76 | check_elems <- function(elems, ..., call = caller_env()) {
 77 |   check_dots_empty0(...)
 78 | 
 79 |   if (!is.list(elems) || length(elems) == 0) {
 80 |     cli::cli_abort(
 81 |       "{.arg elems} must be a list of length 1 or greater.",
 82 |       call = call
 83 |     )
 84 |   }
 85 | 
 86 |   if (!has_unique_names(elems)) {
 87 |     cli::cli_abort(
 88 |       "{.arg elems} must have unique names.",
 89 |       call = call
 90 |     )
 91 |   }
 92 | 
 93 |   if (!identical(names(attributes(elems)), "names")) {
 94 |     cli::cli_abort(
 95 |       "{.arg elems} must have no attributes (apart from names).",
 96 |       call = call
 97 |     )
 98 |   }
 99 | 
100 |   invisible(elems)
101 | }
102 | 


--------------------------------------------------------------------------------
/R/delete-response.R:
--------------------------------------------------------------------------------
 1 | #' Delete the response from a terms object
 2 | #'
 3 | #' `delete_response()` is exactly the same as `delete.response()`, except
 4 | #' that it fixes a long standing bug by also removing the part of the
 5 | #' `"dataClasses"` attribute corresponding to the response, if it exists.
 6 | #'
 7 | #' @param terms A terms object.
 8 | #'
 9 | #' @return
10 | #'
11 | #' `terms` with the response sections removed.
12 | #'
13 | #' @details
14 | #'
15 | #' The bug is described here:
16 | #'
17 | #' \url{https://stat.ethz.ch/pipermail/r-devel/2012-January/062942.html}
18 | #'
19 | #' @examples
20 | #'
21 | #' framed <- model_frame(Species ~ Sepal.Width, iris)
22 | #'
23 | #' attr(delete.response(framed$terms), "dataClasses")
24 | #'
25 | #' attr(delete_response(framed$terms), "dataClasses")
26 | #' @export
27 | delete_response <- function(terms) {
28 |   check_terms(terms)
29 | 
30 |   resp <- attr(terms, "response")
31 |   data_class <- attr(terms, "dataClasses")
32 | 
33 |   response_exists <- !(is.null(resp) || (resp == 0L))
34 |   data_class_exists <- !is.null(data_class)
35 | 
36 |   # Remove dataClass corresponding to y if it exists
37 |   if (response_exists & data_class_exists) {
38 |     attr(terms, "dataClasses") <- data_class[-resp]
39 |   }
40 | 
41 |   delete.response(terms)
42 | }
43 | 


--------------------------------------------------------------------------------
/R/encoding.R:
--------------------------------------------------------------------------------
 1 | #' Encode a factor as a one-hot indicator matrix
 2 | #'
 3 | #' @description
 4 | #' `fct_encode_one_hot()` encodes a factor as a one-hot indicator matrix.
 5 | #'
 6 | #' This matrix consists of `length(x)` rows and `length(levels(x))` columns.
 7 | #' Every value in row `i` of the matrix is filled with `0L` except for the
 8 | #' column that has the same name as `x[[i]]`, which is instead filled with `1L`.
 9 | #'
10 | #' @details
11 | #' The columns are returned in the same order as `levels(x)`.
12 | #'
13 | #' If `x` has names, the names are propagated onto the result as the row names.
14 | #'
15 | #' @param x A factor.
16 | #'
17 | #'   `x` can't contain missing values.
18 | #'
19 | #'   `x` is allowed to be an ordered factor.
20 | #'
21 | #' @return An integer matrix with `length(x)` rows and `length(levels(x))`
22 | #'   columns.
23 | #'
24 | #' @export
25 | #' @examples
26 | #' fct_encode_one_hot(factor(letters))
27 | #'
28 | #' fct_encode_one_hot(factor(letters[1:2], levels = letters))
29 | #'
30 | #' set.seed(1234)
31 | #' fct_encode_one_hot(factor(sample(letters[1:4], 10, TRUE)))
32 | fct_encode_one_hot <- function(x) {
33 |   if (!is.factor(x)) {
34 |     cli::cli_abort("{.arg x} must be a factor, not {.obj_type_friendly {x}}.")
35 |   }
36 | 
37 |   row_names <- names(x)
38 |   col_names <- levels(x)
39 |   dim_names <- list(row_names, col_names)
40 | 
41 |   n_cols <- length(col_names)
42 |   n_rows <- length(x)
43 | 
44 |   x <- unclass(x)
45 | 
46 |   if (vec_any_missing(x)) {
47 |     cli::cli_abort("{.arg x} can't contain missing values.")
48 |   }
49 | 
50 |   out <- matrix(0L, nrow = n_rows, ncol = n_cols, dimnames = dim_names)
51 | 
52 |   # Use integer matrix indexing to assign the `1`s
53 |   loc <- cbind(row = seq_len(n_rows), col = x)
54 |   out[loc] <- 1L
55 | 
56 |   out
57 | }
58 | 


--------------------------------------------------------------------------------
/R/hardhat-example-data.R:
--------------------------------------------------------------------------------
 1 | #' Example data for hardhat
 2 | #'
 3 | #' @details Data objects for a training and test set with the same variables:
 4 | #' three numeric and two factor columns.
 5 | #'
 6 | #' @name hardhat-example-data
 7 | #' @aliases example_train example_test
 8 | #' @docType data
 9 | #' @return \item{example_train,example_test}{tibbles}
10 | #'
11 | #' @keywords datasets
12 | #' @examples
13 | #' data("hardhat-example-data")
14 | NULL
15 | 


--------------------------------------------------------------------------------
/R/hardhat-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | "_PACKAGE"
 3 | 
 4 | # The following block is used by usethis to automatically manage
 5 | # roxygen namespace tags. Modify with care!
 6 | ## usethis namespace: start
 7 | #' @import rlang
 8 | #' @import vctrs
 9 | #' @importFrom glue glue
10 | #' @importFrom tibble as_tibble
11 | #' @importFrom tibble tibble
12 | #' @importFrom stats model.frame
13 | #' @importFrom stats model.matrix
14 | #' @importFrom stats delete.response
15 | #' @importFrom stats get_all_vars
16 | #' @importFrom stats terms
17 | #' @importFrom stats median
18 | ## usethis namespace: end
19 | NULL
20 | 


--------------------------------------------------------------------------------
/R/intercept.R:
--------------------------------------------------------------------------------
 1 | #' Add an intercept column to `data`
 2 | #'
 3 | #' This function adds an integer column of `1`'s to `data`.
 4 | #'
 5 | #' If a column named `name` already exists in `data`, then `data` is returned
 6 | #' unchanged and a warning is issued.
 7 | #'
 8 | #' @param data A data frame or matrix.
 9 | #'
10 | #' @param name The name for the intercept column. Defaults to `"(Intercept)"`,
11 | #' which is the same name that [stats::lm()] uses.
12 | #'
13 | #' @inheritParams validate_column_names
14 | #'
15 | #' @return
16 | #'
17 | #' `data` with an intercept column.
18 | #'
19 | #' @examples
20 | #' add_intercept_column(mtcars)
21 | #'
22 | #' add_intercept_column(mtcars, "intercept")
23 | #'
24 | #' add_intercept_column(as.matrix(mtcars))
25 | #' @export
26 | add_intercept_column <- function(
27 |   data,
28 |   name = "(Intercept)",
29 |   ...,
30 |   call = current_env()
31 | ) {
32 |   check_dots_empty0(...)
33 |   check_data_frame_or_matrix(data, call = call)
34 |   check_name(name, call = call)
35 | 
36 |   if (name %in% colnames(data)) {
37 |     cli::cli_warn(c(
38 |       "{.arg data} already has a column named {.val {name}}.",
39 |       "i" = "Returning {.arg data} unchanged."
40 |     ))
41 | 
42 |     return(data)
43 |   }
44 | 
45 |   if (is.matrix(data)) {
46 |     new_col <- matrix(
47 |       data = 1L,
48 |       nrow = nrow(data),
49 |       dimnames = list(NULL, name)
50 |     )
51 | 
52 |     data <- cbind(new_col, data)
53 | 
54 |     return(data)
55 |   }
56 | 
57 |   if (is.data.frame(data)) {
58 |     data <- tibble::add_column(data, !!name := 1L, .before = 1L)
59 | 
60 |     return(data)
61 |   }
62 | }
63 | 
64 | maybe_add_intercept_column <- function(
65 |   data,
66 |   intercept = FALSE,
67 |   ...,
68 |   call = caller_env()
69 | ) {
70 |   check_dots_empty0(...)
71 |   if (!intercept) {
72 |     return(data)
73 |   }
74 | 
75 |   add_intercept_column(data, call = call)
76 | }
77 | 


--------------------------------------------------------------------------------
/R/levels.R:
--------------------------------------------------------------------------------
 1 | #' Extract factor levels from a data frame
 2 | #'
 3 | #' `get_levels()` extracts the levels from any factor columns in `data`. It is
 4 | #' mainly useful for extracting the original factor levels from the predictors
 5 | #' in the training set. `get_outcome_levels()` is a small wrapper around
 6 | #' `get_levels()` for extracting levels from a factor outcome
 7 | #' that first calls [standardize()] on `y`.
 8 | #'
 9 | #' @inheritParams standardize
10 | #'
11 | #' @param data A data.frame to extract levels from.
12 | #'
13 | #' @return
14 | #'
15 | #' A named list with as many elements as there are factor columns in `data`
16 | #' or `y`. The names are the names of the factor columns, and the values
17 | #' are character vectors of the levels.
18 | #'
19 | #' If there are no factor columns, `NULL` is returned.
20 | #'
21 | #' @seealso [stats::.getXlevels()]
22 | #'
23 | #' @examples
24 | #'
25 | #' # Factor columns are returned with their levels
26 | #' get_levels(iris)
27 | #'
28 | #' # No factor columns
29 | #' get_levels(mtcars)
30 | #'
31 | #' # standardize() is first run on `y`
32 | #' # which converts the input to a data frame
33 | #' # with an automatically named column, `".outcome"`
34 | #' get_outcome_levels(y = factor(letters[1:5]))
35 | #' @export
36 | get_levels <- function(data) {
37 |   if (!is.data.frame(data)) {
38 |     return(NULL)
39 |   }
40 | 
41 |   list_of_levels <- lapply(data, levels)
42 | 
43 |   null_elems <- vapply(list_of_levels, is.null, logical(1))
44 | 
45 |   if (all(null_elems)) {
46 |     return(NULL)
47 |   }
48 | 
49 |   list_of_levels[!null_elems]
50 | }
51 | 
52 | #' @rdname get_levels
53 | #' @export
54 | get_outcome_levels <- function(y) {
55 |   y <- standardize(y)
56 |   get_levels(y)
57 | }
58 | 


--------------------------------------------------------------------------------
/R/model-offset.R:
--------------------------------------------------------------------------------
 1 | #' Extract a model offset
 2 | #'
 3 | #' `model_offset()` extracts a numeric offset from a model frame. It is
 4 | #' inspired by [stats::model.offset()], but has nicer error messages and
 5 | #' is slightly stricter.
 6 | #'
 7 | #' @param terms A `"terms"` object corresponding to `data`, returned from a
 8 | #' call to `model_frame()`.
 9 | #'
10 | #' @param data A data frame returned from a call to `model_frame()`.
11 | #'
12 | #' @inheritParams validate_column_names
13 | #'
14 | #' @return
15 | #'
16 | #' A numeric vector representing the offset.
17 | #'
18 | #' @details
19 | #'
20 | #' If a column that has been tagged as an offset is not numeric, a nice error
21 | #' message is thrown telling you exactly which column was problematic.
22 | #'
23 | #' [stats::model.offset()] also allows for a column named `"(offset)"` to be
24 | #' considered an offset along with any others that have been tagged by
25 | #' [stats::offset()]. However, [stats::model.matrix()] does not recognize
26 | #' these columns as offsets (so it doesn't remove them as it should). Because
27 | #' of this inconsistency, columns named `"(offset)"` are _not_ treated specially
28 | #' by `model_offset()`.
29 | #'
30 | #' @examples
31 | #'
32 | #' x <- model.frame(Species ~ offset(Sepal.Width), iris)
33 | #'
34 | #' model_offset(terms(x), x)
35 | #'
36 | #' xx <- model.frame(Species ~ offset(Sepal.Width) + offset(Sepal.Length), iris)
37 | #'
38 | #' model_offset(terms(xx), xx)
39 | #'
40 | #' # Problematic columns are caught with intuitive errors
41 | #' tryCatch(
42 | #'   expr = {
43 | #'     x <- model.frame(~ offset(Species), iris)
44 | #'     model_offset(terms(x), x)
45 | #'   },
46 | #'   error = function(e) {
47 | #'     print(e$message)
48 | #'   }
49 | #' )
50 | #' @export
51 | model_offset <- function(terms, data, ..., call = caller_env()) {
52 |   check_dots_empty0(...)
53 | 
54 |   .offset_pos <- attr(terms, "offset")
55 | 
56 |   has_offset <- !is.null(.offset_pos)
57 | 
58 |   if (!has_offset) {
59 |     return(NULL)
60 |   }
61 | 
62 |   ans <- rep(0, times = nrow(data))
63 | 
64 |   for (.pos in .offset_pos) {
65 |     .offset_val <- data[[.pos]]
66 | 
67 |     if (!is.numeric(.offset_val)) {
68 |       bad_col <- colnames(data)[.pos]
69 | 
70 |       cli::cli_abort(
71 |         "Column {.val {bad_col}} is tagged as an offset and thus must be
72 |         numeric, not {.obj_type_friendly { .offset_val }}.",
73 |         call = call
74 |       )
75 |     }
76 | 
77 |     ans <- ans + .offset_val
78 |   }
79 | 
80 |   ans
81 | }
82 | 
83 | extract_offset <- function(terms, data, ..., call = caller_env()) {
84 |   check_dots_empty0(...)
85 | 
86 |   .offset <- model_offset(terms, data, call = call)
87 | 
88 |   if (is.null(.offset)) {
89 |     NULL
90 |   } else {
91 |     tibble::tibble(.offset = .offset)
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/R/new.R:
--------------------------------------------------------------------------------
 1 | # These are standardized constructors for internal objects returned from
 2 | # different blueprint handlers
 3 | 
 4 | # ------------------------------------------------------------------------------
 5 | # Mold
 6 | 
 7 | new_mold_clean <- function(blueprint, data) {
 8 |   list(
 9 |     blueprint = blueprint,
10 |     data = data
11 |   )
12 | }
13 | 
14 | new_mold_clean_xy <- function(blueprint, x, y) {
15 |   list(
16 |     blueprint = blueprint,
17 |     x = x,
18 |     y = y
19 |   )
20 | }
21 | 
22 | new_mold_process <- function(predictors, outcomes, blueprint, extras) {
23 |   list(
24 |     predictors = predictors,
25 |     outcomes = outcomes,
26 |     blueprint = blueprint,
27 |     extras = extras
28 |   )
29 | }
30 | 
31 | new_mold_process_terms <- function(blueprint, data, ptype, extras = NULL) {
32 |   list(
33 |     blueprint = blueprint,
34 |     data = data,
35 |     ptype = ptype,
36 |     extras = extras
37 |   )
38 | }
39 | 
40 | # ------------------------------------------------------------------------------
41 | # Forge
42 | 
43 | new_forge_clean <- function(blueprint, predictors, outcomes, extras = NULL) {
44 |   list(
45 |     blueprint = blueprint,
46 |     predictors = predictors,
47 |     outcomes = outcomes,
48 |     extras = extras
49 |   )
50 | }
51 | 
52 | new_forge_process <- function(predictors, outcomes, extras) {
53 |   list(
54 |     predictors = predictors,
55 |     outcomes = outcomes,
56 |     extras = extras
57 |   )
58 | }
59 | 
60 | new_forge_process_terms <- function(blueprint, data, extras = NULL) {
61 |   list(
62 |     blueprint = blueprint,
63 |     data = data,
64 |     extras = extras
65 |   )
66 | }
67 | 
68 | # ------------------------------------------------------------------------------
69 | # ptypes
70 | 
71 | new_ptypes <- function(predictors, outcomes) {
72 |   list(
73 |     predictors = predictors,
74 |     outcomes = outcomes
75 |   )
76 | }
77 | 
78 | # ------------------------------------------------------------------------------
79 | # Extras
80 | 
81 | # Just c() them together
82 | # Extras aren't predictor or outcome specific
83 | new_extras <- function(predictors_extras, outcomes_extras) {
84 |   c(predictors_extras, outcomes_extras)
85 | }
86 | 


--------------------------------------------------------------------------------
/R/print.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | format.xy_blueprint <- function(x, ...) "XY"
 3 | 
 4 | #' @export
 5 | format.recipe_blueprint <- function(x, ...) "Recipe"
 6 | 
 7 | #' @export
 8 | format.formula_blueprint <- function(x, ...) "Formula"
 9 | 
10 | #' @export
11 | print.hardhat_blueprint <- function(x, ...) {
12 |   cli::cli_text("{format(x)} blueprint:")
13 | 
14 |   cli::cli_par()
15 |   cli::cli_text("# Predictors: {n_blueprint_predictors(x)}")
16 |   cli::cli_text("# Outcomes: {n_blueprint_outcomes(x)}")
17 |   cli::cli_text("Intercept: {x$intercept}")
18 |   cli::cli_text("Novel Levels: {x$allow_novel_levels}")
19 |   cli::cli_text("Composition: {x$composition}")
20 |   if (inherits(x, "formula_blueprint")) {
21 |     cli::cli_text("Indicators: {x$indicators}")
22 |   }
23 |   cli::cli_end()
24 |   invisible(x)
25 | }
26 | 
27 | n_blueprint_predictors <- function(x) {
28 |   ncol(x$ptypes$predictors) %||% 0L
29 | }
30 | 
31 | n_blueprint_outcomes <- function(x) {
32 |   ncol(x$ptypes$outcomes) %||% 0L
33 | }
34 | 


--------------------------------------------------------------------------------
/R/ptype.R:
--------------------------------------------------------------------------------
 1 | #' Extract a prototype
 2 | #'
 3 | #' @description
 4 | #'
 5 | #' `extract_ptype()` extracts a tibble with 0 rows from `data`. This contains
 6 | #' all of the required information about column names, classes, and factor
 7 | #' levels that are required to check the structure of new data at prediction
 8 | #' time.
 9 | #'
10 | #' @param data A data frame or matrix.
11 | #'
12 | #' @inheritParams validate_column_names
13 | #'
14 | #' @return
15 | #'
16 | #' A 0 row slice of `data` after converting it to a tibble.
17 | #'
18 | #' @details
19 | #'
20 | #' `extract_ptype()` is useful when creating a new preprocessing `blueprint`. It
21 | #' extracts the required information that will be used by the validation checks
22 | #' at prediction time.
23 | #'
24 | #' @examples
25 | #'
26 | #' hardhat:::extract_ptype(iris)
27 | #' @keywords internal
28 | #'
29 | extract_ptype <- function(data, ..., call = current_env()) {
30 |   check_dots_empty0(...)
31 | 
32 |   if (is.null(data)) {
33 |     return(NULL)
34 |   }
35 | 
36 |   check_data_frame_or_matrix(data, call = call)
37 |   data <- coerce_to_tibble(data)
38 | 
39 |   vec_slice(data, 0L)
40 | }
41 | 


--------------------------------------------------------------------------------
/R/recompose.R:
--------------------------------------------------------------------------------
 1 | #' Recompose a data frame into another form
 2 | #'
 3 | #' @description
 4 | #' `recompose()` takes a data frame and converts it into one of:
 5 | #' - A tibble
 6 | #' - A data frame
 7 | #' - A matrix
 8 | #' - A sparse matrix (using the Matrix package)
 9 | #'
10 | #' This is an internal function used only by hardhat and recipes.
11 | #'
12 | #' @inheritParams rlang::args_dots_empty
13 | #'
14 | #' @param data A data frame.
15 | #'
16 | #' @param composition One of:
17 | #'   - `"tibble"` to convert to a tibble.
18 | #'   - `"data.frame"` to convert to a base data frame.
19 | #'   - `"matrix"` to convert to a matrix. All columns must be numeric.
20 | #'   - `"dgCMatrix"` to convert to a sparse matrix. All columns must be numeric,
21 | #'     and the Matrix package must be installed.
22 | #'
23 | #' @inheritParams validate_column_names
24 | #'
25 | #' @returns
26 | #' The output type is determined from the `composition`.
27 | #'
28 | #' @export
29 | #' @keywords internal
30 | #'
31 | #' @examples
32 | #' df <- vctrs::data_frame(x = 1)
33 | #'
34 | #' recompose(df)
35 | #' recompose(df, composition = "matrix")
36 | #'
37 | #' # All columns must be numeric to convert to a matrix
38 | #' df <- vctrs::data_frame(x = 1, y = "a")
39 | #' try(recompose(df, composition = "matrix"))
40 | recompose <- function(data, ..., composition = "tibble", call = caller_env()) {
41 |   check_dots_empty0(...)
42 |   check_data_frame(data, call = call)
43 | 
44 |   composition <- arg_match0(
45 |     arg = composition,
46 |     values = c("tibble", "data.frame", "matrix", "dgCMatrix"),
47 |     error_call = call
48 |   )
49 | 
50 |   switch(
51 |     composition,
52 |     tibble = {
53 |       coerce_to_tibble(data)
54 |     },
55 |     data.frame = {
56 |       new_data_frame(data, n = vec_size(data))
57 |     },
58 |     matrix = {
59 |       coerce_to_matrix(data, error_call = call)
60 |     },
61 |     dgCMatrix = {
62 |       if (is_sparse_tibble(data)) {
63 |         sparsevctrs::coerce_to_sparse_matrix(data, call = call)
64 |       } else {
65 |         data <- coerce_to_matrix(data, error_call = call)
66 |         coerce_to_sparse(data, error_call = call)
67 |       }
68 |     }
69 |   )
70 | }
71 | 
72 | coerce_to_matrix <- function(data, error_call = caller_env()) {
73 |   numeric <- map_lgl(data, is.numeric)
74 | 
75 |   if (!all(numeric)) {
76 |     loc <- which(!numeric)
77 |     loc <- names(data)[loc]
78 | 
79 |     message <- c(
80 |       "{.arg data} must only contain numeric columns.",
81 |       i = "{cli::qty(length(loc))}{?This/These} column{?s} {?isn't/aren't} 
82 |            numeric: {.val {loc}}."
83 |     )
84 | 
85 |     cli::cli_abort(message, call = error_call)
86 |   }
87 | 
88 |   as.matrix(data)
89 | }
90 | 
91 | coerce_to_sparse <- function(data, error_call = caller_env()) {
92 |   check_installed("Matrix", call = error_call)
93 |   Matrix::Matrix(data, sparse = TRUE)
94 | }
95 | 


--------------------------------------------------------------------------------
/R/shrink.R:
--------------------------------------------------------------------------------
 1 | #' Subset only required columns
 2 | #'
 3 | #' @description
 4 | #'
 5 | #' `shrink()` subsets `data` to only contain the required columns specified by
 6 | #' the prototype, `ptype`.
 7 | #'
 8 | #' @details
 9 | #'
10 | #' `shrink()` is called by [forge()] before [scream()] and before the actual
11 | #' processing is done.
12 | #'
13 | #' @param data A data frame containing the data to subset.
14 | #'
15 | #' @param ptype A data frame prototype containing the required columns.
16 | #'
17 | #' @inheritParams validate_column_names
18 | #'
19 | #' @return
20 | #'
21 | #' A tibble containing the required columns.
22 | #'
23 | #' @examples
24 | #' # ---------------------------------------------------------------------------
25 | #' # Setup
26 | #'
27 | #' train <- iris[1:100, ]
28 | #' test <- iris[101:150, ]
29 | #'
30 | #' # ---------------------------------------------------------------------------
31 | #' # shrink()
32 | #'
33 | #' # mold() is run at model fit time
34 | #' # and a formula preprocessing blueprint is recorded
35 | #' x <- mold(log(Sepal.Width) ~ Species, train)
36 | #'
37 | #' # Inside the result of mold() are the prototype tibbles
38 | #' # for the predictors and the outcomes
39 | #' ptype_pred <- x$blueprint$ptypes$predictors
40 | #' ptype_out <- x$blueprint$ptypes$outcomes
41 | #'
42 | #' # Pass the test data, along with a prototype, to
43 | #' # shrink() to extract the prototype columns
44 | #' shrink(test, ptype_pred)
45 | #'
46 | #' # To extract the outcomes, just use the
47 | #' # outcome prototype
48 | #' shrink(test, ptype_out)
49 | #'
50 | #' # shrink() makes sure that the columns
51 | #' # required by `ptype` actually exist in the data
52 | #' # and errors nicely when they don't
53 | #' test2 <- subset(test, select = -Species)
54 | #' try(shrink(test2, ptype_pred))
55 | #' @export
56 | shrink <- function(data, ptype, ..., call = current_env()) {
57 |   check_dots_empty0(...)
58 | 
59 |   if (is.null(data)) {
60 |     return(NULL)
61 |   }
62 | 
63 |   check_data_frame_or_matrix(data, call = call)
64 |   data <- coerce_to_tibble(data)
65 | 
66 |   cols <- colnames(ptype)
67 |   validate_column_names(data, cols, call = call)
68 | 
69 |   out <- data[cols]
70 | 
71 |   out
72 | }
73 | 


--------------------------------------------------------------------------------
/R/sparsevctrs.R:
--------------------------------------------------------------------------------
1 | is_sparse_tibble <- function(x) {
2 |   any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))
3 | }
4 | 


--------------------------------------------------------------------------------
/R/tune.R:
--------------------------------------------------------------------------------
 1 | #' Mark arguments for tuning
 2 | #'
 3 | #' `tune()` is an argument placeholder to be used with the recipes, parsnip, and
 4 | #' tune packages. It marks recipes step and parsnip model arguments for tuning.
 5 | #'
 6 | #' @param id A single character value that can be used to differentiate
 7 | #'   parameters that are used in multiple places but have the same name, or if
 8 | #'   the user wants to add a note to the specified parameter.
 9 | #'
10 | #' @return A call object that echos the user's input.
11 | #'
12 | #' @seealso `tune::tune_grid()`, `tune::tune_bayes()`
13 | #'
14 | #' @export
15 | #'
16 | #' @examplesIf rlang::is_installed(c("recipes"))
17 | #' tune()
18 | #' tune("your name here")
19 | #'
20 | #' # In practice, `tune()` is used alongside recipes or parsnip to mark
21 | #' # specific arguments for tuning
22 | #' library(recipes)
23 | #'
24 | #' recipe(mpg ~ ., data = mtcars) |>
25 | #'   step_normalize(all_numeric_predictors()) |>
26 | #'   step_pca(all_numeric_predictors, num_comp = tune())
27 | tune <- function(id = "") {
28 |   check_string(id)
29 | 
30 |   if (id == "") {
31 |     call("tune")
32 |   } else {
33 |     call("tune", id)
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://hardhat.tidymodels.org
 2 | 
 3 | template:
 4 |   bootstrap: 5
 5 |   package: tidytemplate
 6 |   bslib:
 7 |     primary: '#CA225E'
 8 |   includes:
 9 |     in_header: |
10 |       <script defer data-domain="hardhat.tidymodels.org,all.tidymodels.org" src="https://plausible.io/js/plausible.js"></script>
11 | 
12 | development:
13 |   mode: auto
14 | 
15 | news:
16 |   releases:
17 |   - text: Version 0.1.0
18 |     href: https://www.tidyverse.org/blog/2019/12/hardhat-0-1-0/
19 | 
20 | reference:
21 | - title: Preprocessing
22 |   contents:
23 |   - mold
24 |   - forge
25 | 
26 | - title: Prediction
27 |   contents:
28 |   - contains("spruce")
29 |   - quantile_pred
30 | 
31 | - title: Utility
32 |   contents:
33 |   - contains("model_")
34 |   - delete_response
35 |   - standardize
36 |   - new_model
37 |   - add_intercept_column
38 |   - weighted_table
39 |   - fct_encode_one_hot
40 | 
41 | - title: Validation
42 |   contents:
43 |   - scream
44 |   - shrink
45 |   - contains("validate")
46 |   - contains("check")
47 | 
48 | - title: Blueprint
49 |   contents:
50 |   - contains("blueprint")
51 |   - run_mold
52 |   - run_forge
53 | 
54 | - title: Case Weights
55 |   contents:
56 |   - new_case_weights
57 |   - is_case_weights
58 | 
59 | - subtitle: Importance Weights
60 |   contents:
61 |   - importance_weights
62 |   - new_importance_weights
63 |   - is_importance_weights
64 | 
65 | - subtitle: Frequency Weights
66 |   contents:
67 |   - frequency_weights
68 |   - new_frequency_weights
69 |   - is_frequency_weights
70 | 
71 | - title: Setup
72 |   contents:
73 |   - contains("use_")
74 |   - contains("create_")
75 | 
76 | - title: Information
77 |   contents: contains("get_")
78 | 
79 | - title: Development
80 |   contents:
81 |   - tune
82 |   - hardhat-extract
83 | 
84 | - title: Data
85 |   contents: hardhat-example-data
86 | 


--------------------------------------------------------------------------------
/air.toml:
--------------------------------------------------------------------------------
1 | [format]
2 | exclude = ["inst/templates/"]
3 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/cran-comments.md


--------------------------------------------------------------------------------
/data/hardhat-example-data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/data/hardhat-example-data.RData


--------------------------------------------------------------------------------
/graphics/factor-handling/factor-handling.graffle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/factor-handling/factor-handling.graffle


--------------------------------------------------------------------------------
/graphics/factor-handling/factor-handling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/factor-handling/factor-handling.png


--------------------------------------------------------------------------------
/graphics/modeling-package-design.graffle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/modeling-package-design.graffle


--------------------------------------------------------------------------------
/graphics/modeling-package-design/Fitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/modeling-package-design/Fitting.png


--------------------------------------------------------------------------------
/graphics/modeling-package-design/Prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/modeling-package-design/Prediction.png


--------------------------------------------------------------------------------
/hardhat.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/inst/templates/R/constructor.R:
--------------------------------------------------------------------------------
1 | new_{{model}} <- function(coefs, blueprint) {
2 |   hardhat::new_model(coefs = coefs, blueprint = blueprint, class = "{{model}}")
3 | }
4 | 


--------------------------------------------------------------------------------
/inst/templates/R/fit.R:
--------------------------------------------------------------------------------
  1 | #' Fit a `{{model}}`
  2 | #'
  3 | #' `{{model}}()` fits a model.
  4 | #'
  5 | #' @param x Depending on the context:
  6 | #'
  7 | #'   * A __data frame__ of predictors.
  8 | #'   * A __matrix__ of predictors.
  9 | #'   * A __recipe__ specifying a set of preprocessing steps
 10 | #'     created from [recipes::recipe()].
 11 | #'
 12 | #' @param y When `x` is a __data frame__ or __matrix__, `y` is the outcome
 13 | #' specified as:
 14 | #'
 15 | #'   * A __data frame__ with 1 numeric column.
 16 | #'   * A __matrix__ with 1 numeric column.
 17 | #'   * A numeric __vector__.
 18 | #'
 19 | #' @param data When a __recipe__ or __formula__ is used, `data` is specified as:
 20 | #'
 21 | #'   * A __data frame__ containing both the predictors and the outcome.
 22 | #'
 23 | #' @param formula A formula specifying the outcome terms on the left-hand side,
 24 | #' and the predictor terms on the right-hand side.
 25 | #'
 26 | #' @param ... Not currently used, but required for extensibility.
 27 | #'
 28 | #' @return
 29 | #'
 30 | #' A `{{model}}` object.
 31 | #'
 32 | #' @examples
 33 | #' predictors <- mtcars[, -1]
 34 | #' outcome <- mtcars[, 1]
 35 | #'
 36 | #' # XY interface
 37 | #' mod <- {{model}}(predictors, outcome)
 38 | #'
 39 | #' # Formula interface
 40 | #' mod2 <- {{model}}(mpg ~ ., mtcars)
 41 | #'
 42 | #' # Recipes interface
 43 | #' library(recipes)
 44 | #' rec <- recipe(mpg ~ ., mtcars)
 45 | #' rec <- step_log(rec, disp)
 46 | #' mod3 <- {{model}}(rec, mtcars)
 47 | #'
 48 | #' @export
 49 | {{model}} <- function(x, ...) {
 50 |   UseMethod("{{model}}")
 51 | }
 52 | 
 53 | #' @export
 54 | #' @rdname {{model}}
 55 | {{model}}.default <- function(x, ...) {
 56 |   stop("`{{model}}()` is not defined for a '", class(x)[1], "'.", call. = FALSE)
 57 | }
 58 | 
 59 | # XY method - data frame
 60 | 
 61 | #' @export
 62 | #' @rdname {{model}}
 63 | {{model}}.data.frame <- function(x, y, ...) {
 64 |   processed <- hardhat::mold(x, y)
 65 |   {{model}}_bridge(processed, ...)
 66 | }
 67 | 
 68 | # XY method - matrix
 69 | 
 70 | #' @export
 71 | #' @rdname {{model}}
 72 | {{model}}.matrix <- function(x, y, ...) {
 73 |   processed <- hardhat::mold(x, y)
 74 |   {{model}}_bridge(processed, ...)
 75 | }
 76 | 
 77 | # Formula method
 78 | 
 79 | #' @export
 80 | #' @rdname {{model}}
 81 | {{model}}.formula <- function(formula, data, ...) {
 82 |   processed <- hardhat::mold(formula, data)
 83 |   {{model}}_bridge(processed, ...)
 84 | }
 85 | 
 86 | # Recipe method
 87 | 
 88 | #' @export
 89 | #' @rdname {{model}}
 90 | {{model}}.recipe <- function(x, data, ...) {
 91 |   processed <- hardhat::mold(x, data)
 92 |   {{model}}_bridge(processed, ...)
 93 | }
 94 | 
 95 | # ------------------------------------------------------------------------------
 96 | # Bridge
 97 | 
 98 | {{model}}_bridge <- function(processed, ...) {
 99 |   predictors <- processed$predictors
100 |   outcome <- processed$outcomes[[1]]
101 | 
102 |   fit <- {{model}}_impl(predictors, outcome)
103 | 
104 |   new_{{model}}(
105 |     coefs = fit$coefs,
106 |     blueprint = processed$blueprint
107 |   )
108 | }
109 | 
110 | 
111 | # ------------------------------------------------------------------------------
112 | # Implementation
113 | 
114 | {{model}}_impl <- function(predictors, outcome) {
115 |   list(coefs = 1)
116 | }
117 | 


--------------------------------------------------------------------------------
/inst/templates/R/predict.R:
--------------------------------------------------------------------------------
 1 | #' Predict from a `{{model}}`
 2 | #'
 3 | #' @param object A `{{model}}` object.
 4 | #'
 5 | #' @param new_data A data frame or matrix of new predictors.
 6 | #'
 7 | #' @param type A single character. The type of predictions to generate.
 8 | #' Valid options are:
 9 | #'
10 | #' - `"numeric"` for numeric predictions.
11 | #'
12 | #' @param ... Not used, but required for extensibility.
13 | #'
14 | #' @return
15 | #'
16 | #' A tibble of predictions. The number of rows in the tibble is guaranteed
17 | #' to be the same as the number of rows in `new_data`.
18 | #'
19 | #' @examples
20 | #' train <- mtcars[1:20,]
21 | #' test <- mtcars[21:32, -1]
22 | #'
23 | #' # Fit
24 | #' mod <- {{model}}(mpg ~ cyl + log(drat), train)
25 | #'
26 | #' # Predict, with preprocessing
27 | #' predict(mod, test)
28 | #'
29 | #' @export
30 | predict.{{model}} <- function(object, new_data, type = "numeric", ...) {
31 |   forged <- hardhat::forge(new_data, object$blueprint)
32 |   rlang::arg_match(type, valid_{{model}}_predict_types())
33 |   predict_{{model}}_bridge(type, object, forged$predictors)
34 | }
35 | 
36 | valid_{{model}}_predict_types <- function() {
37 |   c("numeric")
38 | }
39 | 
40 | # ------------------------------------------------------------------------------
41 | # Bridge
42 | 
43 | predict_{{model}}_bridge <- function(type, model, predictors) {
44 |   predictors <- as.matrix(predictors)
45 | 
46 |   predict_function <- get_{{model}}_predict_function(type)
47 |   predictions <- predict_function(model, predictors)
48 | 
49 |   hardhat::validate_prediction_size(predictions, predictors)
50 | 
51 |   predictions
52 | }
53 | 
54 | get_{{model}}_predict_function <- function(type) {
55 |   switch(
56 |     type,
57 |     numeric = predict_{{model}}_numeric
58 |   )
59 | }
60 | 
61 | # ------------------------------------------------------------------------------
62 | # Implementation
63 | 
64 | predict_{{model}}_numeric <- function(model, predictors) {
65 |   predictions <- rep(1L, times = nrow(predictors))
66 |   hardhat::spruce_numeric(predictions)
67 | }
68 | 


--------------------------------------------------------------------------------
/man-roxygen/section-validation.R:
--------------------------------------------------------------------------------
 1 | #' @section Validation:
 2 | #'
 3 | #' hardhat provides validation functions at two levels.
 4 | #'
 5 | #' - `check_*()`:  _check a condition, and return a list_. The list
 6 | #' always contains at least one element, `ok`, a logical that specifies if the
 7 | #' check passed. Each check also has check specific elements in the returned
 8 | #' list that can be used to construct meaningful error messages.
 9 | #'
10 | #' - `validate_*()`: _check a condition, and error if it does not pass_. These
11 | #' functions call their corresponding check function, and
12 | #' then provide a default error message. If you, as a developer, want a
13 | #' different error message, then call the `check_*()` function yourself,
14 | #' and provide your own validation function.
15 | 


--------------------------------------------------------------------------------
/man/add_intercept_column.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/intercept.R
 3 | \name{add_intercept_column}
 4 | \alias{add_intercept_column}
 5 | \title{Add an intercept column to \code{data}}
 6 | \usage{
 7 | add_intercept_column(data, name = "(Intercept)", ..., call = current_env())
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame or matrix.}
11 | 
12 | \item{name}{The name for the intercept column. Defaults to \code{"(Intercept)"},
13 | which is the same name that \code{\link[stats:lm]{stats::lm()}} uses.}
14 | 
15 | \item{...}{These dots are for future extensions and must be empty.}
16 | 
17 | \item{call}{The call used for errors and warnings.}
18 | }
19 | \value{
20 | \code{data} with an intercept column.
21 | }
22 | \description{
23 | This function adds an integer column of \code{1}'s to \code{data}.
24 | }
25 | \details{
26 | If a column named \code{name} already exists in \code{data}, then \code{data} is returned
27 | unchanged and a warning is issued.
28 | }
29 | \examples{
30 | add_intercept_column(mtcars)
31 | 
32 | add_intercept_column(mtcars, "intercept")
33 | 
34 | add_intercept_column(as.matrix(mtcars))
35 | }
36 | 


--------------------------------------------------------------------------------
/man/check_quantile_levels.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile-pred.R
 3 | \name{check_quantile_levels}
 4 | \alias{check_quantile_levels}
 5 | \title{Check levels of quantiles}
 6 | \usage{
 7 | check_quantile_levels(levels, call = rlang::caller_env())
 8 | }
 9 | \arguments{
10 | \item{levels}{The quantile levels.}
11 | 
12 | \item{call}{Call shown in the error messages.}
13 | }
14 | \value{
15 | Invisible \code{TRUE}
16 | }
17 | \description{
18 | Check levels of quantiles
19 | }
20 | \details{
21 | Checks the levels for their data type, range, uniqueness, order and missingness.
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/man/delete_response.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/delete-response.R
 3 | \name{delete_response}
 4 | \alias{delete_response}
 5 | \title{Delete the response from a terms object}
 6 | \usage{
 7 | delete_response(terms)
 8 | }
 9 | \arguments{
10 | \item{terms}{A terms object.}
11 | }
12 | \value{
13 | \code{terms} with the response sections removed.
14 | }
15 | \description{
16 | \code{delete_response()} is exactly the same as \code{delete.response()}, except
17 | that it fixes a long standing bug by also removing the part of the
18 | \code{"dataClasses"} attribute corresponding to the response, if it exists.
19 | }
20 | \details{
21 | The bug is described here:
22 | 
23 | \url{https://stat.ethz.ch/pipermail/r-devel/2012-January/062942.html}
24 | }
25 | \examples{
26 | 
27 | framed <- model_frame(Species ~ Sepal.Width, iris)
28 | 
29 | attr(delete.response(framed$terms), "dataClasses")
30 | 
31 | attr(delete_response(framed$terms), "dataClasses")
32 | }
33 | 


--------------------------------------------------------------------------------
/man/extract_ptype.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ptype.R
 3 | \name{extract_ptype}
 4 | \alias{extract_ptype}
 5 | \title{Extract a prototype}
 6 | \usage{
 7 | extract_ptype(data, ..., call = current_env())
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame or matrix.}
11 | 
12 | \item{...}{These dots are for future extensions and must be empty.}
13 | 
14 | \item{call}{The call used for errors and warnings.}
15 | }
16 | \value{
17 | A 0 row slice of \code{data} after converting it to a tibble.
18 | }
19 | \description{
20 | \code{extract_ptype()} extracts a tibble with 0 rows from \code{data}. This contains
21 | all of the required information about column names, classes, and factor
22 | levels that are required to check the structure of new data at prediction
23 | time.
24 | }
25 | \details{
26 | \code{extract_ptype()} is useful when creating a new preprocessing \code{blueprint}. It
27 | extracts the required information that will be used by the validation checks
28 | at prediction time.
29 | }
30 | \examples{
31 | 
32 | hardhat:::extract_ptype(iris)
33 | }
34 | \keyword{internal}
35 | 


--------------------------------------------------------------------------------
/man/fct_encode_one_hot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/encoding.R
 3 | \name{fct_encode_one_hot}
 4 | \alias{fct_encode_one_hot}
 5 | \title{Encode a factor as a one-hot indicator matrix}
 6 | \usage{
 7 | fct_encode_one_hot(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A factor.
11 | 
12 | \code{x} can't contain missing values.
13 | 
14 | \code{x} is allowed to be an ordered factor.}
15 | }
16 | \value{
17 | An integer matrix with \code{length(x)} rows and \code{length(levels(x))}
18 | columns.
19 | }
20 | \description{
21 | \code{fct_encode_one_hot()} encodes a factor as a one-hot indicator matrix.
22 | 
23 | This matrix consists of \code{length(x)} rows and \code{length(levels(x))} columns.
24 | Every value in row \code{i} of the matrix is filled with \code{0L} except for the
25 | column that has the same name as \code{x[[i]]}, which is instead filled with \code{1L}.
26 | }
27 | \details{
28 | The columns are returned in the same order as \code{levels(x)}.
29 | 
30 | If \code{x} has names, the names are propagated onto the result as the row names.
31 | }
32 | \examples{
33 | fct_encode_one_hot(factor(letters))
34 | 
35 | fct_encode_one_hot(factor(letters[1:2], levels = letters))
36 | 
37 | set.seed(1234)
38 | fct_encode_one_hot(factor(sample(letters[1:4], 10, TRUE)))
39 | }
40 | 


--------------------------------------------------------------------------------
/man/figures/Fitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/Fitting.png


--------------------------------------------------------------------------------
/man/figures/Prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/Prediction.png


--------------------------------------------------------------------------------
/man/figures/factor-handling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/factor-handling.png


--------------------------------------------------------------------------------
/man/figures/lifecycle-archived.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="112" height="20" role="img" aria-label="lifecycle: archived">
 2 |     <title>lifecycle: archived</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="112" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="57" height="20" fill="#e05d44" />
13 |         <rect width="112" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="825" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">archived</text>
19 |         <text x="825" y="140" transform="scale(.1)" fill="#fff" textLength="470">archived</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-defunct.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="106" height="20" role="img" aria-label="lifecycle: defunct">
 2 |     <title>lifecycle: defunct</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="106" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="51" height="20" fill="#fe7d37" />
13 |         <rect width="106" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="795" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">defunct</text>
19 |         <text x="795" y="140" transform="scale(.1)" fill="#fff" textLength="410">defunct</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="126" height="20" role="img" aria-label="lifecycle: deprecated">
 2 |     <title>lifecycle: deprecated</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="126" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="71" height="20" fill="#fe7d37" />
13 |         <rect width="126" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="895" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">deprecated</text>
19 |         <text x="895" y="140" transform="scale(.1)" fill="#fff" textLength="610">deprecated</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="138" height="20" role="img" aria-label="lifecycle: experimental">
 2 |     <title>lifecycle: experimental</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="138" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="83" height="20" fill="#fe7d37" />
13 |         <rect width="138" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="955" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="730">experimental</text>
19 |         <text x="955" y="140" transform="scale(.1)" fill="#fff" textLength="730">experimental</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-maturing.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="lifecycle: maturing">
 2 |     <title>lifecycle: maturing</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="116" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="61" height="20" fill="#007ec6" />
13 |         <rect width="116" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="845" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">maturing</text>
19 |         <text x="845" y="140" transform="scale(.1)" fill="#fff" textLength="510">maturing</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-questioning.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20" role="img" aria-label="lifecycle: questioning">
 2 |     <title>lifecycle: questioning</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="128" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="73" height="20" fill="#007ec6" />
13 |         <rect width="128" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">questioning</text>
19 |         <text x="905" y="140" transform="scale(.1)" fill="#fff" textLength="630">questioning</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-soft-deprecated.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="152" height="20" role="img" aria-label="lifecycle: soft-deprecated">
 2 |     <title>lifecycle: soft-deprecated</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="152" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="97" height="20" fill="#007ec6" />
13 |         <rect width="152" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="1025" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="870">soft-deprecated</text>
19 |         <text x="1025" y="140" transform="scale(.1)" fill="#fff" textLength="870">soft-deprecated</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="lifecycle: stable">
 2 |     <title>lifecycle: stable</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="98" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="43" height="20" fill="#4c1" />
13 |         <rect width="98" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">
17 |       lifecycle
18 |     </text>
19 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">
20 |       lifecycle
21 |     </text>
22 |         <text aria-hidden="true" x="755" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">
23 |       stable
24 |     </text>
25 |         <text x="755" y="140" transform="scale(.1)" fill="#fff" textLength="330">
26 |       stable
27 |     </text>
28 |     </g>
29 | </svg>
30 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20" role="img" aria-label="lifecycle: superseded">
 2 |     <title>lifecycle: superseded</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="128" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="73" height="20" fill="#007ec6" />
13 |         <rect width="128" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">superseded</text>
19 |         <text x="905" y="140" transform="scale(.1)" fill="#fff" textLength="630">superseded</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/logo.png


--------------------------------------------------------------------------------
/man/forge.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forge.R
 3 | \name{forge}
 4 | \alias{forge}
 5 | \title{Forge prediction-ready data}
 6 | \usage{
 7 | forge(new_data, blueprint, ..., outcomes = FALSE)
 8 | }
 9 | \arguments{
10 | \item{new_data}{A data frame or matrix of predictors to process. If
11 | \code{outcomes = TRUE}, this should also contain the outcomes to process.}
12 | 
13 | \item{blueprint}{A preprocessing \code{blueprint}.}
14 | 
15 | \item{...}{Not used.}
16 | 
17 | \item{outcomes}{A logical. Should the outcomes be processed and returned
18 | as well?}
19 | }
20 | \value{
21 | A named list with 3 elements:
22 | \itemize{
23 | \item \code{predictors}: A tibble containing the preprocessed
24 | \code{new_data} predictors.
25 | \item \code{outcomes}: If \code{outcomes = TRUE}, a tibble containing the preprocessed
26 | outcomes found in \code{new_data}. Otherwise, \code{NULL}.
27 | \item \code{extras}: Either \code{NULL} if the blueprint returns no extra information,
28 | or a named list containing the extra information.
29 | }
30 | }
31 | \description{
32 | \code{forge()} applies the transformations requested by the specific \code{blueprint}
33 | on a set of \code{new_data}. This \code{new_data} contains new predictors
34 | (and potentially outcomes) that will be used to generate predictions.
35 | 
36 | All blueprints have consistent return values with the others, but each is
37 | unique enough to have its own help page. Click through below to learn
38 | how to use each one in conjunction with \code{forge()}.
39 | \itemize{
40 | \item XY Method - \code{\link[=default_xy_blueprint]{default_xy_blueprint()}}
41 | \item Formula Method - \code{\link[=default_formula_blueprint]{default_formula_blueprint()}}
42 | \item Recipes Method - \code{\link[=default_recipe_blueprint]{default_recipe_blueprint()}}
43 | }
44 | }
45 | \details{
46 | If the outcomes are present in \code{new_data}, they can optionally be processed
47 | and returned in the \code{outcomes} slot of the returned list by setting
48 | \code{outcomes = TRUE}. This is very useful when doing cross validation where
49 | you need to preprocess the outcomes of a test set before computing
50 | performance.
51 | }
52 | \examples{
53 | # See the blueprint specific documentation linked above
54 | # for various ways to call forge with different
55 | # blueprints.
56 | 
57 | train <- iris[1:100, ]
58 | test <- iris[101:150, ]
59 | 
60 | # Formula
61 | processed <- mold(
62 |   log(Sepal.Width) ~ Species,
63 |   train,
64 |   blueprint = default_formula_blueprint(indicators = "none")
65 | )
66 | 
67 | forge(test, processed$blueprint, outcomes = TRUE)
68 | }
69 | 


--------------------------------------------------------------------------------
/man/frequency_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{frequency_weights}
 4 | \alias{frequency_weights}
 5 | \title{Frequency weights}
 6 | \usage{
 7 | frequency_weights(x)
 8 | }
 9 | \arguments{
10 | \item{x}{An integer vector.}
11 | }
12 | \value{
13 | A new frequency weights vector.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
17 | 
18 | \code{frequency_weights()} creates a vector of frequency weights which allow you
19 | to compactly repeat an observation a set number of times. Frequency weights
20 | are supplied as a non-negative integer vector, where only whole numbers are
21 | allowed.
22 | }
23 | \details{
24 | Frequency weights are integers that denote how many times a particular row of
25 | the data has been observed. They help compress redundant rows into a single
26 | entry.
27 | 
28 | In tidymodels, frequency weights are used for all parts of the preprocessing,
29 | model fitting, and performance estimation operations.
30 | }
31 | \examples{
32 | # Record that the first observation has 10 replicates, the second has 12
33 | # replicates, and so on
34 | frequency_weights(c(10, 12, 2, 1))
35 | 
36 | # Fractional values are not allowed
37 | try(frequency_weights(c(1.5, 2.3, 10)))
38 | }
39 | \seealso{
40 | \code{\link[=importance_weights]{importance_weights()}}
41 | }
42 | 


--------------------------------------------------------------------------------
/man/get_data_classes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/classes.R
 3 | \name{get_data_classes}
 4 | \alias{get_data_classes}
 5 | \title{Extract data classes from a data frame or matrix}
 6 | \usage{
 7 | get_data_classes(data, ..., call = current_env())
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame or matrix.}
11 | 
12 | \item{...}{These dots are for future extensions and must be empty.}
13 | 
14 | \item{call}{The call used for errors and warnings.}
15 | }
16 | \value{
17 | A named list. The names are the column names of \code{data} and the values are
18 | character vectors containing the class of that column.
19 | }
20 | \description{
21 | When predicting from a model, it is often important for the \code{new_data} to
22 | have the same classes as the original data used to fit the model.
23 | \code{get_data_classes()} extracts the classes from the original training data.
24 | }
25 | \examples{
26 | get_data_classes(iris)
27 | 
28 | get_data_classes(as.matrix(mtcars))
29 | 
30 | # Unlike .MFclass(), the full class
31 | # vector is returned
32 | data <- data.frame(col = ordered(c("a", "b")))
33 | 
34 | .MFclass(data$col)
35 | 
36 | get_data_classes(data)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/get_levels.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/levels.R
 3 | \name{get_levels}
 4 | \alias{get_levels}
 5 | \alias{get_outcome_levels}
 6 | \title{Extract factor levels from a data frame}
 7 | \usage{
 8 | get_levels(data)
 9 | 
10 | get_outcome_levels(y)
11 | }
12 | \arguments{
13 | \item{data}{A data.frame to extract levels from.}
14 | 
15 | \item{y}{The outcome. This can be:
16 | \itemize{
17 | \item A factor vector
18 | \item A numeric vector
19 | \item A 1D numeric array
20 | \item A numeric matrix with column names
21 | \item A 2D numeric array with column names
22 | \item A data frame with numeric or factor columns
23 | }}
24 | }
25 | \value{
26 | A named list with as many elements as there are factor columns in \code{data}
27 | or \code{y}. The names are the names of the factor columns, and the values
28 | are character vectors of the levels.
29 | 
30 | If there are no factor columns, \code{NULL} is returned.
31 | }
32 | \description{
33 | \code{get_levels()} extracts the levels from any factor columns in \code{data}. It is
34 | mainly useful for extracting the original factor levels from the predictors
35 | in the training set. \code{get_outcome_levels()} is a small wrapper around
36 | \code{get_levels()} for extracting levels from a factor outcome
37 | that first calls \code{\link[=standardize]{standardize()}} on \code{y}.
38 | }
39 | \examples{
40 | 
41 | # Factor columns are returned with their levels
42 | get_levels(iris)
43 | 
44 | # No factor columns
45 | get_levels(mtcars)
46 | 
47 | # standardize() is first run on `y`
48 | # which converts the input to a data frame
49 | # with an automatically named column, `".outcome"`
50 | get_outcome_levels(y = factor(letters[1:5]))
51 | }
52 | \seealso{
53 | \code{\link[stats:checkMFClasses]{stats::.getXlevels()}}
54 | }
55 | 


--------------------------------------------------------------------------------
/man/hardhat-example-data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hardhat-example-data.R
 3 | \docType{data}
 4 | \name{hardhat-example-data}
 5 | \alias{hardhat-example-data}
 6 | \alias{example_train}
 7 | \alias{example_test}
 8 | \title{Example data for hardhat}
 9 | \value{
10 | \item{example_train,example_test}{tibbles}
11 | }
12 | \description{
13 | Example data for hardhat
14 | }
15 | \details{
16 | Data objects for a training and test set with the same variables:
17 | three numeric and two factor columns.
18 | }
19 | \examples{
20 | data("hardhat-example-data")
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/hardhat-extract.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract.R
 3 | \name{hardhat-extract}
 4 | \alias{hardhat-extract}
 5 | \alias{extract_workflow}
 6 | \alias{extract_recipe}
 7 | \alias{extract_spec_parsnip}
 8 | \alias{extract_fit_parsnip}
 9 | \alias{extract_fit_engine}
10 | \alias{extract_mold}
11 | \alias{extract_preprocessor}
12 | \alias{extract_postprocessor}
13 | \alias{extract_parameter_dials}
14 | \alias{extract_parameter_set_dials}
15 | \alias{extract_fit_time}
16 | \title{Generics for object extraction}
17 | \usage{
18 | extract_workflow(x, ...)
19 | 
20 | extract_recipe(x, ...)
21 | 
22 | extract_spec_parsnip(x, ...)
23 | 
24 | extract_fit_parsnip(x, ...)
25 | 
26 | extract_fit_engine(x, ...)
27 | 
28 | extract_mold(x, ...)
29 | 
30 | extract_preprocessor(x, ...)
31 | 
32 | extract_postprocessor(x, ...)
33 | 
34 | extract_parameter_dials(x, ...)
35 | 
36 | extract_parameter_set_dials(x, ...)
37 | 
38 | extract_fit_time(x, ...)
39 | }
40 | \arguments{
41 | \item{x}{An object.}
42 | 
43 | \item{...}{Extra arguments passed on to methods.}
44 | }
45 | \description{
46 | These generics are used to extract elements from various model
47 | objects. Methods are defined in other packages, such as tune,
48 | workflows, and workflowsets, but the returned object is always the same.
49 | \itemize{
50 | \item \code{extract_fit_engine()} returns the engine specific fit embedded within
51 | a parsnip model fit. For example, when using \code{parsnip::linear_reg()}
52 | with the \code{"lm"} engine, this returns the underlying \code{lm} object.
53 | \item \code{extract_fit_parsnip()} returns a parsnip model fit.
54 | \item \code{extract_mold()} returns the preprocessed "mold" object returned
55 | from \code{\link[=mold]{mold()}}. It contains information about the preprocessing,
56 | including either the prepped recipe, the formula terms object, or
57 | variable selectors.
58 | \item \code{extract_spec_parsnip()} returns a parsnip model specification.
59 | \item \code{extract_preprocessor()} returns the formula, recipe, or variable
60 | expressions used for preprocessing.
61 | \item \code{extract_recipe()} returns a recipe, possibly estimated.
62 | \item \code{extract_workflow()} returns a workflow, possibly fit.
63 | \item \code{extract_parameter_dials()} returns a single dials parameter object.
64 | \item \code{extract_parameter_set_dials()} returns a set of dials parameter objects.
65 | \item \code{extract_fit_time()} returns a tibble with fit times.
66 | }
67 | }
68 | \examples{
69 | # See packages where methods are defined for examples, such as `parsnip` or
70 | # `workflows`.
71 | }
72 | 


--------------------------------------------------------------------------------
/man/hardhat-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hardhat-package.R
 3 | \docType{package}
 4 | \name{hardhat-package}
 5 | \alias{hardhat}
 6 | \alias{hardhat-package}
 7 | \title{hardhat: Construct Modeling Packages}
 8 | \description{
 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 | 
11 | Building modeling packages is hard. A large amount of effort generally goes into providing an implementation for a new method that is efficient, fast, and correct, but often less emphasis is put on the user interface. A good interface requires specialized knowledge about S3 methods and formulas, which the average package developer might not have. The goal of 'hardhat' is to reduce the burden around building new modeling packages by providing functionality for preprocessing, predicting, and validating input.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 |   \item \url{https://github.com/tidymodels/hardhat}
17 |   \item \url{https://hardhat.tidymodels.org}
18 |   \item Report bugs at \url{https://github.com/tidymodels/hardhat/issues}
19 | }
20 | 
21 | }
22 | \author{
23 | \strong{Maintainer}: Hannah Frick \email{hannah@posit.co} (\href{https://orcid.org/0000-0002-6049-5258}{ORCID})
24 | 
25 | Authors:
26 | \itemize{
27 |   \item Davis Vaughan \email{davis@posit.co}
28 |   \item Max Kuhn \email{max@posit.co}
29 | }
30 | 
31 | Other contributors:
32 | \itemize{
33 |   \item Posit Software, PBC [copyright holder, funder]
34 | }
35 | 
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/man/importance_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{importance_weights}
 4 | \alias{importance_weights}
 5 | \title{Importance weights}
 6 | \usage{
 7 | importance_weights(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A double vector.}
11 | }
12 | \value{
13 | A new importance weights vector.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
17 | 
18 | \code{importance_weights()} creates a vector of importance weights which allow you
19 | to apply a context dependent weight to your observations. Importance weights
20 | are supplied as a non-negative double vector, where fractional values are
21 | allowed.
22 | }
23 | \details{
24 | Importance weights focus on how much each row of the data set should
25 | influence model estimation. These can be based on data or arbitrarily set to
26 | achieve some goal.
27 | 
28 | In tidymodels, importance weights only affect the model estimation and
29 | \emph{supervised} recipes steps. They are not used with yardstick functions for
30 | calculating measures of model performance.
31 | }
32 | \examples{
33 | importance_weights(c(1.5, 2.3, 10))
34 | }
35 | \seealso{
36 | \code{\link[=frequency_weights]{frequency_weights()}}
37 | }
38 | 


--------------------------------------------------------------------------------
/man/is_blueprint.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/blueprint.R
 3 | \name{is_blueprint}
 4 | \alias{is_blueprint}
 5 | \title{Is \code{x} a preprocessing blueprint?}
 6 | \usage{
 7 | is_blueprint(x)
 8 | }
 9 | \arguments{
10 | \item{x}{An object.}
11 | }
12 | \description{
13 | \code{is_blueprint()} checks if \code{x} inherits from \code{"hardhat_blueprint"}.
14 | }
15 | \examples{
16 | is_blueprint(default_xy_blueprint())
17 | }
18 | 


--------------------------------------------------------------------------------
/man/is_case_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{is_case_weights}
 4 | \alias{is_case_weights}
 5 | \title{Is \code{x} a case weights vector?}
 6 | \usage{
 7 | is_case_weights(x)
 8 | }
 9 | \arguments{
10 | \item{x}{An object.}
11 | }
12 | \value{
13 | A single \code{TRUE} or \code{FALSE}.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
17 | 
18 | \code{is_case_weights()} checks if \code{x} inherits from \code{"hardhat_case_weights"}.
19 | }
20 | \examples{
21 | is_case_weights(1)
22 | is_case_weights(frequency_weights(1))
23 | }
24 | 


--------------------------------------------------------------------------------
/man/is_frequency_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{is_frequency_weights}
 4 | \alias{is_frequency_weights}
 5 | \title{Is \code{x} a frequency weights vector?}
 6 | \usage{
 7 | is_frequency_weights(x)
 8 | }
 9 | \arguments{
10 | \item{x}{An object.}
11 | }
12 | \value{
13 | A single \code{TRUE} or \code{FALSE}.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
17 | 
18 | \code{is_frequency_weights()} checks if \code{x} inherits from
19 | \code{"hardhat_frequency_weights"}.
20 | }
21 | \examples{
22 | is_frequency_weights(1)
23 | is_frequency_weights(frequency_weights(1))
24 | is_frequency_weights(importance_weights(1))
25 | }
26 | 


--------------------------------------------------------------------------------
/man/is_importance_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{is_importance_weights}
 4 | \alias{is_importance_weights}
 5 | \title{Is \code{x} an importance weights vector?}
 6 | \usage{
 7 | is_importance_weights(x)
 8 | }
 9 | \arguments{
10 | \item{x}{An object.}
11 | }
12 | \value{
13 | A single \code{TRUE} or \code{FALSE}.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
17 | 
18 | \code{is_importance_weights()} checks if \code{x} inherits from
19 | \code{"hardhat_importance_weights"}.
20 | }
21 | \examples{
22 | is_importance_weights(1)
23 | is_importance_weights(frequency_weights(1))
24 | is_importance_weights(importance_weights(1))
25 | }
26 | 


--------------------------------------------------------------------------------
/man/model_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model-matrix.R
 3 | \name{model_matrix}
 4 | \alias{model_matrix}
 5 | \title{Construct a design matrix}
 6 | \usage{
 7 | model_matrix(terms, data, ..., call = current_env())
 8 | }
 9 | \arguments{
10 | \item{terms}{A terms object to construct a model matrix with. This is
11 | typically the terms object returned from the corresponding call to
12 | \code{\link[=model_frame]{model_frame()}}.}
13 | 
14 | \item{data}{A tibble to construct the design matrix with. This is
15 | typically the tibble returned from the corresponding call to
16 | \code{\link[=model_frame]{model_frame()}}.}
17 | 
18 | \item{...}{These dots are for future extensions and must be empty.}
19 | 
20 | \item{call}{The call used for errors and warnings.}
21 | }
22 | \value{
23 | A tibble containing the design matrix.
24 | }
25 | \description{
26 | \code{model_matrix()} is a stricter version of \code{\link[stats:model.matrix]{stats::model.matrix()}}. Notably,
27 | \code{model_matrix()} will \emph{never} drop rows, and the result will be a tibble.
28 | }
29 | \details{
30 | The following explains the rationale for some of the difference in arguments
31 | compared to \code{\link[stats:model.matrix]{stats::model.matrix()}}:
32 | \itemize{
33 | \item \code{contrasts.arg}: Set the contrasts argument, \code{options("contrasts")}
34 | globally, or assign a contrast to the factor of interest directly using
35 | \code{\link[stats:contrasts]{stats::contrasts()}}. See the examples section.
36 | \item \code{xlev}: Not allowed because \code{model.frame()} is never called, so it is
37 | unnecessary.
38 | \item \code{...}: Not allowed because the default method of \code{model.matrix()} does
39 | not use it, and the \code{lm} method uses it to pass potential offsets and
40 | weights through, which are handled differently in hardhat.
41 | }
42 | }
43 | \examples{
44 | # ---------------------------------------------------------------------------
45 | # Example usage
46 | 
47 | framed <- model_frame(Sepal.Width ~ Species, iris)
48 | 
49 | model_matrix(framed$terms, framed$data)
50 | 
51 | # ---------------------------------------------------------------------------
52 | # Missing values never result in dropped rows
53 | 
54 | iris2 <- iris
55 | iris2$Species[1] <- NA
56 | 
57 | framed2 <- model_frame(Sepal.Width ~ Species, iris2)
58 | 
59 | model_matrix(framed2$terms, framed2$data)
60 | 
61 | # ---------------------------------------------------------------------------
62 | # Contrasts
63 | 
64 | # Default contrasts
65 | y <- factor(c("a", "b"))
66 | x <- data.frame(y = y)
67 | framed <- model_frame(~y, x)
68 | 
69 | # Setting contrasts directly
70 | y_with_contrast <- y
71 | contrasts(y_with_contrast) <- contr.sum(2)
72 | x2 <- data.frame(y = y_with_contrast)
73 | framed2 <- model_frame(~y, x2)
74 | 
75 | # Compare!
76 | model_matrix(framed$terms, framed$data)
77 | model_matrix(framed2$terms, framed2$data)
78 | 
79 | # Also, can set the contrasts globally
80 | global_override <- c(unordered = "contr.sum", ordered = "contr.poly")
81 | 
82 | rlang::with_options(
83 |   .expr = {
84 |     model_matrix(framed$terms, framed$data)
85 |   },
86 |   contrasts = global_override
87 | )
88 | }
89 | 


--------------------------------------------------------------------------------
/man/model_offset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model-offset.R
 3 | \name{model_offset}
 4 | \alias{model_offset}
 5 | \title{Extract a model offset}
 6 | \usage{
 7 | model_offset(terms, data, ..., call = caller_env())
 8 | }
 9 | \arguments{
10 | \item{terms}{A \code{"terms"} object corresponding to \code{data}, returned from a
11 | call to \code{model_frame()}.}
12 | 
13 | \item{data}{A data frame returned from a call to \code{model_frame()}.}
14 | 
15 | \item{...}{These dots are for future extensions and must be empty.}
16 | 
17 | \item{call}{The call used for errors and warnings.}
18 | }
19 | \value{
20 | A numeric vector representing the offset.
21 | }
22 | \description{
23 | \code{model_offset()} extracts a numeric offset from a model frame. It is
24 | inspired by \code{\link[stats:model.extract]{stats::model.offset()}}, but has nicer error messages and
25 | is slightly stricter.
26 | }
27 | \details{
28 | If a column that has been tagged as an offset is not numeric, a nice error
29 | message is thrown telling you exactly which column was problematic.
30 | 
31 | \code{\link[stats:model.extract]{stats::model.offset()}} also allows for a column named \code{"(offset)"} to be
32 | considered an offset along with any others that have been tagged by
33 | \code{\link[stats:offset]{stats::offset()}}. However, \code{\link[stats:model.matrix]{stats::model.matrix()}} does not recognize
34 | these columns as offsets (so it doesn't remove them as it should). Because
35 | of this inconsistency, columns named \code{"(offset)"} are \emph{not} treated specially
36 | by \code{model_offset()}.
37 | }
38 | \examples{
39 | 
40 | x <- model.frame(Species ~ offset(Sepal.Width), iris)
41 | 
42 | model_offset(terms(x), x)
43 | 
44 | xx <- model.frame(Species ~ offset(Sepal.Width) + offset(Sepal.Length), iris)
45 | 
46 | model_offset(terms(xx), xx)
47 | 
48 | # Problematic columns are caught with intuitive errors
49 | tryCatch(
50 |   expr = {
51 |     x <- model.frame(~ offset(Species), iris)
52 |     model_offset(terms(x), x)
53 |   },
54 |   error = function(e) {
55 |     print(e$message)
56 |   }
57 | )
58 | }
59 | 


--------------------------------------------------------------------------------
/man/modeling-usethis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/use.R
 3 | \name{modeling-usethis}
 4 | \alias{modeling-usethis}
 5 | \alias{create_modeling_package}
 6 | \alias{use_modeling_deps}
 7 | \alias{use_modeling_files}
 8 | \title{Create a modeling package}
 9 | \usage{
10 | create_modeling_package(path, model, fields = NULL, open = interactive())
11 | 
12 | use_modeling_deps()
13 | 
14 | use_modeling_files(model)
15 | }
16 | \arguments{
17 | \item{path}{A path. If it exists, it is used. If it does not exist,
18 | it is created, provided that the parent path exists.}
19 | 
20 | \item{model}{A string. The name of the high level modeling function that
21 | users will call. For example, \code{"linear_regression"}. This will be used to
22 | populate the skeleton. Spaces are not allowed.}
23 | 
24 | \item{fields}{A named list of fields to add to DESCRIPTION,
25 | potentially overriding default values. See \code{usethis::use_description()} for
26 | how you can set personalized defaults using package options.}
27 | 
28 | \item{open}{If TRUE, activates the new project:
29 | \itemize{
30 | \item If RStudio desktop, the package is opened in a new session.
31 | \item If on RStudio server, the current RStudio project is activated.
32 | \item Otherwise, the working directory and active project is changed.
33 | }}
34 | }
35 | \value{
36 | \code{create_modeling_package()} returns the project path invisibly.
37 | 
38 | \code{use_modeling_deps()} returns invisibly.
39 | 
40 | \code{use_modeling_files()} return \code{model} invisibly.
41 | }
42 | \description{
43 | \code{create_modeling_package()} will:
44 | \itemize{
45 | \item Call \code{usethis::create_package()} to set up a new R package.
46 | \item Call \code{use_modeling_deps()}.
47 | \item Call \code{use_modeling_files()}.
48 | }
49 | 
50 | \code{use_modeling_deps()} will:
51 | \itemize{
52 | \item Add hardhat, rlang, and stats to Imports
53 | \item Add recipes to Suggests
54 | \item If roxygen2 is available, use roxygen markdown
55 | }
56 | 
57 | \code{use_modeling_files()} will:
58 | \itemize{
59 | \item Add a package documentation file
60 | \item Generate and populate 3 files in \verb{R/}:
61 | \itemize{
62 | \item \code{{{model}}-constructor.R}
63 | \item \code{{{model}}-fit.R}
64 | \item \code{{{model}}-predict.R}
65 | }
66 | }
67 | }
68 | 


--------------------------------------------------------------------------------
/man/mold.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mold.R
 3 | \name{mold}
 4 | \alias{mold}
 5 | \title{Mold data for modeling}
 6 | \usage{
 7 | mold(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An object. See the method specific implementations linked in the
11 | Description for more information.}
12 | 
13 | \item{...}{Not used.}
14 | }
15 | \value{
16 | A named list containing 4 elements:
17 | \itemize{
18 | \item \code{predictors}: A tibble containing the molded predictors to be used in the
19 | model.
20 | \item \code{outcomes}: A tibble containing the molded outcomes to be used in the
21 | model.
22 | \item \code{blueprint}: A method specific \code{"hardhat_blueprint"} object for use when
23 | making predictions.
24 | \item \code{extras}: Either \code{NULL} if the blueprint returns no extra information,
25 | or a named list containing the extra information.
26 | }
27 | }
28 | \description{
29 | \code{mold()} applies the appropriate processing steps required to get training
30 | data ready to be fed into a model. It does this through the use of various
31 | \emph{blueprints} that understand how to preprocess data that come in various
32 | forms, such as a formula or a recipe.
33 | 
34 | All blueprints have consistent return values with the others, but each is
35 | unique enough to have its own help page. Click through below to learn
36 | how to use each one in conjunction with \code{mold()}.
37 | \itemize{
38 | \item XY Method - \code{\link[=default_xy_blueprint]{default_xy_blueprint()}}
39 | \item Formula Method - \code{\link[=default_formula_blueprint]{default_formula_blueprint()}}
40 | \item Recipes Method - \code{\link[=default_recipe_blueprint]{default_recipe_blueprint()}}
41 | }
42 | }
43 | \examples{
44 | \dontshow{if (rlang::is_installed(c("recipes"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
45 | # See the method specific documentation linked in Description
46 | # for the details of each blueprint, and more examples.
47 | 
48 | # XY
49 | mold(iris["Sepal.Width"], iris$Species)
50 | 
51 | # Formula
52 | mold(Species ~ Sepal.Width, iris)
53 | 
54 | # Recipe
55 | library(recipes)
56 | mold(recipe(Species ~ Sepal.Width, iris), iris)
57 | \dontshow{\}) # examplesIf}
58 | }
59 | 


--------------------------------------------------------------------------------
/man/new_case_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{new_case_weights}
 4 | \alias{new_case_weights}
 5 | \title{Extend case weights}
 6 | \usage{
 7 | new_case_weights(x, ..., class)
 8 | }
 9 | \arguments{
10 | \item{x}{An integer or double vector.}
11 | 
12 | \item{...}{Name-value pairs defining attributes}
13 | 
14 | \item{class}{Name of subclass.}
15 | }
16 | \value{
17 | A new subclassed case weights vector.
18 | }
19 | \description{
20 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
21 | 
22 | \code{new_case_weights()} is a developer oriented function for constructing a new
23 | case weights type. The \verb{<case_weights>} type itself is an \emph{abstract} type
24 | with very little functionality. Because of this, \code{class} is a required
25 | argument.
26 | }
27 | \examples{
28 | new_case_weights(1:5, class = "my_weights")
29 | }
30 | 


--------------------------------------------------------------------------------
/man/new_frequency_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{new_frequency_weights}
 4 | \alias{new_frequency_weights}
 5 | \title{Construct a frequency weights vector}
 6 | \usage{
 7 | new_frequency_weights(x = integer(), ..., class = character())
 8 | }
 9 | \arguments{
10 | \item{x}{An integer vector.}
11 | 
12 | \item{...}{Name-value pairs defining attributes}
13 | 
14 | \item{class}{Name of subclass.}
15 | }
16 | \value{
17 | A new frequency weights vector.
18 | }
19 | \description{
20 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
21 | 
22 | \code{new_frequency_weights()} is a developer oriented function for constructing
23 | a new frequency weights vector. Generally, you should use
24 | \code{\link[=frequency_weights]{frequency_weights()}} instead.
25 | }
26 | \examples{
27 | new_frequency_weights()
28 | new_frequency_weights(1:5)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/new_importance_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/case-weights.R
 3 | \name{new_importance_weights}
 4 | \alias{new_importance_weights}
 5 | \title{Construct an importance weights vector}
 6 | \usage{
 7 | new_importance_weights(x = double(), ..., class = character())
 8 | }
 9 | \arguments{
10 | \item{x}{A double vector.}
11 | 
12 | \item{...}{Name-value pairs defining attributes}
13 | 
14 | \item{class}{Name of subclass.}
15 | }
16 | \value{
17 | A new importance weights vector.
18 | }
19 | \description{
20 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
21 | 
22 | \code{new_importance_weights()} is a developer oriented function for constructing
23 | a new importance weights vector. Generally, you should use
24 | \code{\link[=importance_weights]{importance_weights()}} instead.
25 | }
26 | \examples{
27 | new_importance_weights()
28 | new_importance_weights(c(1.5, 2.3, 10))
29 | }
30 | 


--------------------------------------------------------------------------------
/man/new_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/constructor.R
 3 | \name{new_model}
 4 | \alias{new_model}
 5 | \title{Constructor for a base model}
 6 | \usage{
 7 | new_model(..., blueprint = default_xy_blueprint(), class = character())
 8 | }
 9 | \arguments{
10 | \item{...}{Name-value pairs for elements specific to the model defined by
11 | \code{class}.}
12 | 
13 | \item{blueprint}{A preprocessing \code{blueprint} returned from a call to \code{\link[=mold]{mold()}}.}
14 | 
15 | \item{class}{A character vector representing the class of the model.}
16 | }
17 | \value{
18 | A new scalar model object, represented as a classed list with named elements
19 | specified in \code{...}.
20 | }
21 | \description{
22 | A \strong{model} is a \emph{scalar object}, as classified in
23 | \href{https://adv-r.hadley.nz/s3.html#object-styles}{Advanced R}. As such, it
24 | takes uniquely named elements in \code{...} and combines them into a list with
25 | a class of \code{class}. This entire object represent a single model.
26 | }
27 | \details{
28 | Because every model should have multiple interfaces, including formula
29 | and \code{recipes} interfaces, all models should have a \code{blueprint} that
30 | can process new data when \code{predict()} is called. The easiest way to generate
31 | an blueprint with all of the information required at prediction time is to
32 | use the one that is returned from a call to \code{\link[=mold]{mold()}}.
33 | }
34 | \examples{
35 | new_model(
36 |   custom_element = "my-elem",
37 |   blueprint = default_xy_blueprint(),
38 |   class = "custom_model"
39 | )
40 | }
41 | 


--------------------------------------------------------------------------------
/man/quantile_pred.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile-pred.R
 3 | \name{quantile_pred}
 4 | \alias{quantile_pred}
 5 | \alias{extract_quantile_levels}
 6 | \alias{as_tibble.quantile_pred}
 7 | \alias{as.matrix.quantile_pred}
 8 | \title{Create a vector containing sets of quantiles}
 9 | \usage{
10 | quantile_pred(values, quantile_levels = double())
11 | 
12 | extract_quantile_levels(x)
13 | 
14 | \method{as_tibble}{quantile_pred}(x, ..., .rows = NULL, .name_repair = "minimal", rownames = NULL)
15 | 
16 | \method{as.matrix}{quantile_pred}(x, ...)
17 | }
18 | \arguments{
19 | \item{values}{A matrix of values. Each column should correspond to one of
20 | the quantile levels.}
21 | 
22 | \item{quantile_levels}{A vector of probabilities corresponding to \code{values}.}
23 | 
24 | \item{x}{An object produced by \code{quantile_pred()}.}
25 | 
26 | \item{...}{Not currently used.}
27 | 
28 | \item{.rows, .name_repair, rownames}{Arguments not used but required by the
29 | original S3 method.}
30 | }
31 | \value{
32 | \itemize{
33 | \item \code{quantile_pred()} returns a vector of values associated with the
34 | quantile levels.
35 | \item \code{extract_quantile_levels()} returns a numeric vector of levels.
36 | \item \code{as_tibble()} returns a tibble with rows \code{".pred_quantile"},
37 | \code{".quantile_levels"}, and \code{".row"}.
38 | \item \code{as.matrix()} returns an unnamed matrix with rows as samples, columns as
39 | quantile levels, and entries are predictions.
40 | }
41 | }
42 | \description{
43 | \code{quantile_pred()} is a special vector class used to efficiently store
44 | predictions from a quantile regression model. It requires the same quantile
45 | levels for each row being predicted.
46 | }
47 | \examples{
48 | .pred_quantile <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8))
49 | 
50 | unclass(.pred_quantile)
51 | 
52 | # Access the underlying information
53 | extract_quantile_levels(.pred_quantile)
54 | 
55 | # Matrix format
56 | as.matrix(.pred_quantile)
57 | 
58 | # Tidy format
59 | library(tibble)
60 | as_tibble(.pred_quantile)
61 | }
62 | 


--------------------------------------------------------------------------------
/man/recompose.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/recompose.R
 3 | \name{recompose}
 4 | \alias{recompose}
 5 | \title{Recompose a data frame into another form}
 6 | \usage{
 7 | recompose(data, ..., composition = "tibble", call = caller_env())
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame.}
11 | 
12 | \item{...}{These dots are for future extensions and must be empty.}
13 | 
14 | \item{composition}{One of:
15 | \itemize{
16 | \item \code{"tibble"} to convert to a tibble.
17 | \item \code{"data.frame"} to convert to a base data frame.
18 | \item \code{"matrix"} to convert to a matrix. All columns must be numeric.
19 | \item \code{"dgCMatrix"} to convert to a sparse matrix. All columns must be numeric,
20 | and the Matrix package must be installed.
21 | }}
22 | 
23 | \item{call}{The call used for errors and warnings.}
24 | }
25 | \value{
26 | The output type is determined from the \code{composition}.
27 | }
28 | \description{
29 | \code{recompose()} takes a data frame and converts it into one of:
30 | \itemize{
31 | \item A tibble
32 | \item A data frame
33 | \item A matrix
34 | \item A sparse matrix (using the Matrix package)
35 | }
36 | 
37 | This is an internal function used only by hardhat and recipes.
38 | }
39 | \examples{
40 | df <- vctrs::data_frame(x = 1)
41 | 
42 | recompose(df)
43 | recompose(df, composition = "matrix")
44 | 
45 | # All columns must be numeric to convert to a matrix
46 | df <- vctrs::data_frame(x = 1, y = "a")
47 | try(recompose(df, composition = "matrix"))
48 | }
49 | \keyword{internal}
50 | 


--------------------------------------------------------------------------------
/man/refresh_blueprint.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/blueprint.R
 3 | \name{refresh_blueprint}
 4 | \alias{refresh_blueprint}
 5 | \title{Refresh a preprocessing blueprint}
 6 | \usage{
 7 | refresh_blueprint(blueprint)
 8 | }
 9 | \arguments{
10 | \item{blueprint}{A preprocessing blueprint.}
11 | }
12 | \value{
13 | \code{blueprint} is returned after a call to the corresponding constructor.
14 | }
15 | \description{
16 | \code{refresh_blueprint()} is a developer facing generic function that is called
17 | at the end of \code{\link[=update_blueprint]{update_blueprint()}}. It simply is a wrapper around the
18 | method specific \verb{new_*_blueprint()} function that runs the updated blueprint
19 | through the constructor again to ensure that all of the elements of the
20 | blueprint are still valid after the update.
21 | }
22 | \details{
23 | If you implement your own custom \code{blueprint}, you should export a
24 | \code{refresh_blueprint()} method that just calls the constructor for your blueprint
25 | and passes through all of the elements of the blueprint to the constructor.
26 | }
27 | \examples{
28 | 
29 | blueprint <- default_xy_blueprint()
30 | 
31 | # This should never be done manually, but is essentially
32 | # what `update_blueprint(blueprint, intercept = TRUE)` does for you
33 | blueprint$intercept <- TRUE
34 | 
35 | # Then update_blueprint() will call refresh_blueprint()
36 | # to ensure that the structure is correct
37 | refresh_blueprint(blueprint)
38 | 
39 | # So you can't do something like...
40 | blueprint_bad <- blueprint
41 | blueprint_bad$intercept <- 1
42 | 
43 | # ...because the constructor will catch it
44 | try(refresh_blueprint(blueprint_bad))
45 | 
46 | # And update_blueprint() catches this automatically
47 | try(update_blueprint(blueprint, intercept = 1))
48 | }
49 | 


--------------------------------------------------------------------------------
/man/rmd/one-hot.Rmd:
--------------------------------------------------------------------------------
 1 | ```{r load, include = FALSE}
 2 | library(dplyr)
 3 | ```
 4 | 
 5 | By default, `model.matrix()` generates binary indicator variables for factor predictors. When the formula does not remove an intercept, an incomplete set of indicators are created; no indicator is made for the first level of the factor.
 6 | 
 7 | For example, `species` and `island` both have three levels but `model.matrix()` creates two indicator variables for each:
 8 | 
 9 | ```{r ref-cell}
10 | library(dplyr)
11 | library(modeldata)
12 | data(penguins)
13 | 
14 | levels(penguins$species)
15 | levels(penguins$island)
16 | 
17 | model.matrix(~ species + island, data = penguins) |> 
18 |   colnames()
19 | ```
20 | 
21 | For a formula with no intercept, the first factor is expanded to indicators for _all_ factor levels but all other factors are expanded to all but one (as above):
22 | 
23 | ```{r hybrid}
24 | model.matrix(~ 0 + species + island, data = penguins) |> 
25 |   colnames()
26 | ```
27 | 
28 | For inference, this hybrid encoding can be problematic. 
29 | 
30 | To generate all indicators, use this contrast: 
31 | 
32 | ```{r one-hot}
33 | # Switch out the contrast method
34 | old_contr <- options("contrasts")$contrasts
35 | new_contr <- old_contr
36 | new_contr["unordered"] <- "contr_one_hot"
37 | options(contrasts = new_contr)
38 | 
39 | model.matrix(~ species + island, data = penguins) |> 
40 |   colnames()
41 | 
42 | options(contrasts = old_contr)
43 | ```
44 | 
45 | Removing the intercept here does not affect the factor encodings. 
46 | 
47 |  
48 | 


--------------------------------------------------------------------------------
/man/run-forge.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forge.R, R/blueprint-formula-default.R,
 3 | %   R/blueprint-recipe-default.R, R/blueprint-xy-default.R
 4 | \name{run-forge}
 5 | \alias{run-forge}
 6 | \alias{run_forge}
 7 | \alias{run_forge.default_formula_blueprint}
 8 | \alias{run_forge.default_recipe_blueprint}
 9 | \alias{run_forge.default_xy_blueprint}
10 | \title{\code{forge()} according to a blueprint}
11 | \usage{
12 | run_forge(blueprint, new_data, ..., outcomes = FALSE)
13 | 
14 | \method{run_forge}{default_formula_blueprint}(blueprint, new_data, ..., outcomes = FALSE, call = caller_env())
15 | 
16 | \method{run_forge}{default_recipe_blueprint}(blueprint, new_data, ..., outcomes = FALSE, call = caller_env())
17 | 
18 | \method{run_forge}{default_xy_blueprint}(blueprint, new_data, ..., outcomes = FALSE, call = caller_env())
19 | }
20 | \arguments{
21 | \item{blueprint}{A preprocessing \code{blueprint}.}
22 | 
23 | \item{new_data}{A data frame or matrix of predictors to process. If
24 | \code{outcomes = TRUE}, this should also contain the outcomes to process.}
25 | 
26 | \item{...}{Not used.}
27 | 
28 | \item{outcomes}{A logical. Should the outcomes be processed and returned
29 | as well?}
30 | 
31 | \item{call}{The call used for errors and warnings.}
32 | }
33 | \value{
34 | \code{run_forge()} methods return the object that is then immediately returned
35 | from \code{forge()}. See the return value section of \code{\link[=forge]{forge()}} to understand what
36 | the structure of the return value should look like.
37 | }
38 | \description{
39 | This is a developer facing function that is \emph{only} used if you are creating
40 | your own blueprint subclass. It is called from \code{\link[=forge]{forge()}} and dispatches off
41 | the S3 class of the \code{blueprint}. This gives you an opportunity to forge the
42 | new data in a way that is specific to your blueprint.
43 | 
44 | \code{run_forge()} is always called from \code{forge()} with the same arguments, unlike
45 | \code{\link[=run_mold]{run_mold()}}, because there aren't different interfaces for calling
46 | \code{forge()}. \code{run_forge()} is always called as:
47 | 
48 | \code{run_forge(blueprint, new_data = new_data, outcomes = outcomes)}
49 | 
50 | If you write a blueprint subclass for \code{\link[=new_xy_blueprint]{new_xy_blueprint()}},
51 | \code{\link[=new_recipe_blueprint]{new_recipe_blueprint()}}, \code{\link[=new_formula_blueprint]{new_formula_blueprint()}}, or \code{\link[=new_blueprint]{new_blueprint()}},
52 | then your \code{run_forge()} method signature must match this.
53 | }
54 | \examples{
55 | bp <- default_xy_blueprint()
56 | 
57 | outcomes <- mtcars["mpg"]
58 | predictors <- mtcars
59 | predictors$mpg <- NULL
60 | 
61 | mold <- run_mold(bp, x = predictors, y = outcomes)
62 | 
63 | run_forge(mold$blueprint, new_data = predictors)
64 | }
65 | 


--------------------------------------------------------------------------------
/man/run-mold.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mold.R, R/blueprint-formula-default.R,
 3 | %   R/blueprint-recipe-default.R, R/blueprint-xy-default.R
 4 | \name{run-mold}
 5 | \alias{run-mold}
 6 | \alias{run_mold}
 7 | \alias{run_mold.default_formula_blueprint}
 8 | \alias{run_mold.default_recipe_blueprint}
 9 | \alias{run_mold.default_xy_blueprint}
10 | \title{\code{mold()} according to a blueprint}
11 | \usage{
12 | run_mold(blueprint, ...)
13 | 
14 | \method{run_mold}{default_formula_blueprint}(blueprint, ..., data, call = caller_env())
15 | 
16 | \method{run_mold}{default_recipe_blueprint}(blueprint, ..., data, call = caller_env())
17 | 
18 | \method{run_mold}{default_xy_blueprint}(blueprint, ..., x, y, call = caller_env())
19 | }
20 | \arguments{
21 | \item{blueprint}{A preprocessing blueprint.}
22 | 
23 | \item{...}{Not used. Required for extensibility.}
24 | 
25 | \item{data}{A data frame or matrix containing the outcomes and predictors.}
26 | 
27 | \item{call}{The call used for errors and warnings.}
28 | 
29 | \item{x}{A data frame or matrix containing the predictors.}
30 | 
31 | \item{y}{A data frame, matrix, or vector containing the outcomes.}
32 | }
33 | \value{
34 | \code{run_mold()} methods return the object that is then immediately returned from
35 | \code{mold()}. See the return value section of \code{\link[=mold]{mold()}} to understand what the
36 | structure of the return value should look like.
37 | }
38 | \description{
39 | This is a developer facing function that is \emph{only} used if you are creating
40 | your own blueprint subclass. It is called from \code{\link[=mold]{mold()}} and dispatches off
41 | the S3 class of the \code{blueprint}. This gives you an opportunity to mold the
42 | data in a way that is specific to your blueprint.
43 | 
44 | \code{run_mold()} will be called with different arguments depending on the
45 | interface to \code{mold()} that is used:
46 | \itemize{
47 | \item XY interface:
48 | \itemize{
49 | \item \code{run_mold(blueprint, x = x, y = y)}
50 | }
51 | \item Formula interface:
52 | \itemize{
53 | \item \code{run_mold(blueprint, data = data)}
54 | \item Additionally, the \code{blueprint} will have been updated to contain the
55 | \code{formula}.
56 | }
57 | \item Recipe interface:
58 | \itemize{
59 | \item \code{run_mold(blueprint, data = data)}
60 | \item Additionally, the \code{blueprint} will have been updated to contain the
61 | \code{recipe}.
62 | }
63 | }
64 | 
65 | If you write a blueprint subclass for \code{\link[=new_xy_blueprint]{new_xy_blueprint()}},
66 | \code{\link[=new_recipe_blueprint]{new_recipe_blueprint()}}, or \code{\link[=new_formula_blueprint]{new_formula_blueprint()}} then your \code{run_mold()}
67 | method signature must match whichever interface listed above will be used.
68 | 
69 | If you write a completely new blueprint inheriting only from
70 | \code{\link[=new_blueprint]{new_blueprint()}} and write a new \code{\link[=mold]{mold()}} method (because you aren't using
71 | an xy, formula, or recipe interface), then you will have full control over
72 | how \code{run_mold()} will be called.
73 | }
74 | \examples{
75 | bp <- default_xy_blueprint()
76 | 
77 | outcomes <- mtcars["mpg"]
78 | predictors <- mtcars
79 | predictors$mpg <- NULL
80 | 
81 | run_mold(bp, x = predictors, y = outcomes)
82 | }
83 | 


--------------------------------------------------------------------------------
/man/shrink.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shrink.R
 3 | \name{shrink}
 4 | \alias{shrink}
 5 | \title{Subset only required columns}
 6 | \usage{
 7 | shrink(data, ptype, ..., call = current_env())
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame containing the data to subset.}
11 | 
12 | \item{ptype}{A data frame prototype containing the required columns.}
13 | 
14 | \item{...}{These dots are for future extensions and must be empty.}
15 | 
16 | \item{call}{The call used for errors and warnings.}
17 | }
18 | \value{
19 | A tibble containing the required columns.
20 | }
21 | \description{
22 | \code{shrink()} subsets \code{data} to only contain the required columns specified by
23 | the prototype, \code{ptype}.
24 | }
25 | \details{
26 | \code{shrink()} is called by \code{\link[=forge]{forge()}} before \code{\link[=scream]{scream()}} and before the actual
27 | processing is done.
28 | }
29 | \examples{
30 | # ---------------------------------------------------------------------------
31 | # Setup
32 | 
33 | train <- iris[1:100, ]
34 | test <- iris[101:150, ]
35 | 
36 | # ---------------------------------------------------------------------------
37 | # shrink()
38 | 
39 | # mold() is run at model fit time
40 | # and a formula preprocessing blueprint is recorded
41 | x <- mold(log(Sepal.Width) ~ Species, train)
42 | 
43 | # Inside the result of mold() are the prototype tibbles
44 | # for the predictors and the outcomes
45 | ptype_pred <- x$blueprint$ptypes$predictors
46 | ptype_out <- x$blueprint$ptypes$outcomes
47 | 
48 | # Pass the test data, along with a prototype, to
49 | # shrink() to extract the prototype columns
50 | shrink(test, ptype_pred)
51 | 
52 | # To extract the outcomes, just use the
53 | # outcome prototype
54 | shrink(test, ptype_out)
55 | 
56 | # shrink() makes sure that the columns
57 | # required by `ptype` actually exist in the data
58 | # and errors nicely when they don't
59 | test2 <- subset(test, select = -Species)
60 | try(shrink(test2, ptype_pred))
61 | }
62 | 


--------------------------------------------------------------------------------
/man/spruce-multiple.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spruce.R
 3 | \name{spruce-multiple}
 4 | \alias{spruce-multiple}
 5 | \alias{spruce_numeric_multiple}
 6 | \alias{spruce_class_multiple}
 7 | \alias{spruce_prob_multiple}
 8 | \title{Spruce up multi-outcome predictions}
 9 | \usage{
10 | spruce_numeric_multiple(...)
11 | 
12 | spruce_class_multiple(...)
13 | 
14 | spruce_prob_multiple(...)
15 | }
16 | \arguments{
17 | \item{...}{Multiple vectors of predictions:
18 | \itemize{
19 | \item For \code{spruce_numeric_multiple()}, numeric vectors of equal size.
20 | \item For \code{spruce_class_multiple()}, factors of "hard" class predictions of
21 | equal size.
22 | \item For \code{spruce_prob_multiple()}, tibbles of equal size, which are the result
23 | of calling \code{\link[=spruce_prob]{spruce_prob()}} on each matrix of prediction probabilities.
24 | }
25 | 
26 | If the \code{...} are named, then this name will be used as the suffix on the
27 | resulting column name, otherwise a positional index will be used.}
28 | }
29 | \value{
30 | \itemize{
31 | \item For \code{spruce_numeric_multiple()}, a tibble of numeric columns named with the
32 | pattern \verb{.pred_*}.
33 | \item For \code{spruce_class_multiple()}, a tibble of factor columns named with the
34 | pattern \verb{.pred_class_*}.
35 | \item For \code{spruce_prob_multiple()}, a tibble of data frame columns named with the
36 | pattern \verb{.pred_*}.
37 | }
38 | }
39 | \description{
40 | This family of \verb{spruce_*_multiple()} functions converts multi-outcome
41 | predictions into a standardized format. They are generally called from a
42 | prediction implementation function for the specific \code{type} of prediction to
43 | return.
44 | }
45 | \examples{
46 | spruce_numeric_multiple(1:3, foo = 2:4)
47 | 
48 | spruce_class_multiple(
49 |   one_step = factor(c("a", "b", "c")),
50 |   two_step = factor(c("a", "c", "c"))
51 | )
52 | 
53 | one_step <- matrix(c(.3, .7, .0, .1, .3, .6), nrow = 2, byrow = TRUE)
54 | two_step <- matrix(c(.2, .7, .1, .2, .4, .4), nrow = 2, byrow = TRUE)
55 | binary <- matrix(c(.5, .5, .4, .6), nrow = 2, byrow = TRUE)
56 | 
57 | spruce_prob_multiple(
58 |   one_step = spruce_prob(c("a", "b", "c"), one_step),
59 |   two_step = spruce_prob(c("a", "b", "c"), two_step),
60 |   binary = spruce_prob(c("yes", "no"), binary)
61 | )
62 | }
63 | 


--------------------------------------------------------------------------------
/man/spruce.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spruce.R
 3 | \name{spruce}
 4 | \alias{spruce}
 5 | \alias{spruce_numeric}
 6 | \alias{spruce_class}
 7 | \alias{spruce_prob}
 8 | \title{Spruce up predictions}
 9 | \usage{
10 | spruce_numeric(pred)
11 | 
12 | spruce_class(pred_class)
13 | 
14 | spruce_prob(pred_levels, prob_matrix)
15 | }
16 | \arguments{
17 | \item{pred}{(\code{type = "numeric"}) A numeric vector of predictions.}
18 | 
19 | \item{pred_class}{(\code{type = "class"}) A factor of "hard" class predictions.}
20 | 
21 | \item{pred_levels, prob_matrix}{(\code{type = "prob"})
22 | \itemize{
23 | \item \code{pred_levels} should be a character vector of the original levels of
24 | the outcome used in training.
25 | \item \code{prob_matrix} should be a numeric matrix of class probabilities with
26 | as many columns as levels in \code{pred_levels}, and in the same order.
27 | }}
28 | }
29 | \value{
30 | A tibble, ideally with the same number of rows as the \code{new_data} passed
31 | to \code{predict()}. The column names and number of columns vary based on the
32 | function used, but are standardized.
33 | }
34 | \description{
35 | The family of \verb{spruce_*()} functions convert predictions into a
36 | standardized format. They are generally called from a prediction
37 | implementation function for the specific \code{type} of prediction to return.
38 | }
39 | \details{
40 | After running a \verb{spruce_*()} function, you should \emph{always} use the validation
41 | function \code{validate_prediction_size()} to ensure that the number of rows
42 | being returned is the same as the number of rows in the input (\code{new_data}).
43 | }
44 | 


--------------------------------------------------------------------------------
/man/standardize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/standardize.R
 3 | \name{standardize}
 4 | \alias{standardize}
 5 | \title{Standardize the outcome}
 6 | \usage{
 7 | standardize(y)
 8 | }
 9 | \arguments{
10 | \item{y}{The outcome. This can be:
11 | \itemize{
12 | \item A factor vector
13 | \item A numeric vector
14 | \item A 1D numeric array
15 | \item A numeric matrix with column names
16 | \item A 2D numeric array with column names
17 | \item A data frame with numeric or factor columns
18 | }}
19 | }
20 | \value{
21 | All possible values of \code{y} are transformed into a \code{tibble} for
22 | standardization. Vectors are transformed into a \code{tibble} with
23 | a single column named \code{".outcome"}.
24 | }
25 | \description{
26 | Most of the time, the input to a model should be flexible enough to capture
27 | a number of different input types from the user. \code{standardize()} focuses
28 | on capturing the flexibility in the \emph{outcome}.
29 | }
30 | \details{
31 | \code{standardize()} is called from \code{\link[=mold]{mold()}} when using an XY interface (i.e.
32 | a \code{y} argument was supplied).
33 | }
34 | \examples{
35 | standardize(1:5)
36 | 
37 | standardize(factor(letters[1:5]))
38 | 
39 | mat <- matrix(1:10, ncol = 2)
40 | colnames(mat) <- c("a", "b")
41 | standardize(mat)
42 | 
43 | df <- data.frame(x = 1:5, y = 6:10)
44 | standardize(df)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/tune.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tune.R
 3 | \name{tune}
 4 | \alias{tune}
 5 | \title{Mark arguments for tuning}
 6 | \usage{
 7 | tune(id = "")
 8 | }
 9 | \arguments{
10 | \item{id}{A single character value that can be used to differentiate
11 | parameters that are used in multiple places but have the same name, or if
12 | the user wants to add a note to the specified parameter.}
13 | }
14 | \value{
15 | A call object that echos the user's input.
16 | }
17 | \description{
18 | \code{tune()} is an argument placeholder to be used with the recipes, parsnip, and
19 | tune packages. It marks recipes step and parsnip model arguments for tuning.
20 | }
21 | \examples{
22 | \dontshow{if (rlang::is_installed(c("recipes"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
23 | tune()
24 | tune("your name here")
25 | 
26 | # In practice, `tune()` is used alongside recipes or parsnip to mark
27 | # specific arguments for tuning
28 | library(recipes)
29 | 
30 | recipe(mpg ~ ., data = mtcars) |>
31 |   step_normalize(all_numeric_predictors()) |>
32 |   step_pca(all_numeric_predictors, num_comp = tune())
33 | \dontshow{\}) # examplesIf}
34 | }
35 | \seealso{
36 | \code{tune::tune_grid()}, \code{tune::tune_bayes()}
37 | }
38 | 


--------------------------------------------------------------------------------
/man/update_blueprint.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/blueprint.R
 3 | \name{update_blueprint}
 4 | \alias{update_blueprint}
 5 | \title{Update a preprocessing blueprint}
 6 | \usage{
 7 | update_blueprint(blueprint, ...)
 8 | }
 9 | \arguments{
10 | \item{blueprint}{A preprocessing blueprint.}
11 | 
12 | \item{...}{Name-value pairs of \emph{existing} elements in \code{blueprint} that should
13 | be updated.}
14 | }
15 | \description{
16 | \code{update_blueprint()} is the correct way to alter elements of an existing
17 | \code{blueprint} object. It has two benefits over just doing
18 | \code{blueprint$elem <- new_elem}.
19 | \itemize{
20 | \item The name you are updating \emph{must} already exist in the blueprint. This prevents
21 | you from accidentally updating non-existent elements.
22 | \item The constructor for the blueprint is automatically run after the update by
23 | \code{refresh_blueprint()} to ensure that the blueprint is still valid.
24 | }
25 | }
26 | \examples{
27 | 
28 | blueprint <- default_xy_blueprint()
29 | 
30 | # `intercept` defaults to FALSE
31 | blueprint
32 | 
33 | update_blueprint(blueprint, intercept = TRUE)
34 | 
35 | # Can't update non-existent elements
36 | try(update_blueprint(blueprint, intercpt = TRUE))
37 | 
38 | # Can't add non-valid elements
39 | try(update_blueprint(blueprint, intercept = 1))
40 | }
41 | 


--------------------------------------------------------------------------------
/man/validate_no_formula_duplication.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{validate_no_formula_duplication}
 4 | \alias{validate_no_formula_duplication}
 5 | \alias{check_no_formula_duplication}
 6 | \title{Ensure no duplicate terms appear in \code{formula}}
 7 | \usage{
 8 | validate_no_formula_duplication(formula, original = FALSE)
 9 | 
10 | check_no_formula_duplication(formula, original = FALSE)
11 | }
12 | \arguments{
13 | \item{formula}{A formula to check.}
14 | 
15 | \item{original}{A logical. Should the original names be checked, or should
16 | the names after processing be used? If \code{FALSE}, \code{y ~ log(y)} is allowed
17 | because the names are \code{"y"} and \code{"log(y)"}, if \code{TRUE}, \code{y ~ log(y)} is not
18 | allowed because the original names are both \code{"y"}.}
19 | }
20 | \value{
21 | \code{validate_no_formula_duplication()} returns \code{formula} invisibly.
22 | 
23 | \code{check_no_formula_duplication()} returns a named list of two components,
24 | \code{ok} and \code{duplicates}.
25 | }
26 | \description{
27 | validate - asserts the following:
28 | \itemize{
29 | \item \code{formula} must not have duplicates terms on the left and right hand
30 | side of the formula.
31 | }
32 | 
33 | check - returns the following:
34 | \itemize{
35 | \item \code{ok} A logical. Does the check pass?
36 | \item \code{duplicates} A character vector. The duplicate terms.
37 | }
38 | }
39 | \section{Validation}{
40 | 
41 | 
42 | hardhat provides validation functions at two levels.
43 | \itemize{
44 | \item \verb{check_*()}:  \emph{check a condition, and return a list}. The list
45 | always contains at least one element, \code{ok}, a logical that specifies if the
46 | check passed. Each check also has check specific elements in the returned
47 | list that can be used to construct meaningful error messages.
48 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These
49 | functions call their corresponding check function, and
50 | then provide a default error message. If you, as a developer, want a
51 | different error message, then call the \verb{check_*()} function yourself,
52 | and provide your own validation function.
53 | }
54 | }
55 | 
56 | \examples{
57 | # All good
58 | check_no_formula_duplication(y ~ x)
59 | 
60 | # Not good!
61 | check_no_formula_duplication(y ~ y)
62 | 
63 | # This is generally okay
64 | check_no_formula_duplication(y ~ log(y))
65 | 
66 | # But you can be more strict
67 | check_no_formula_duplication(y ~ log(y), original = TRUE)
68 | 
69 | # This would throw an error
70 | try(validate_no_formula_duplication(log(y) ~ log(y)))
71 | }
72 | \seealso{
73 | Other validation functions: 
74 | \code{\link{validate_column_names}()},
75 | \code{\link{validate_outcomes_are_binary}()},
76 | \code{\link{validate_outcomes_are_factors}()},
77 | \code{\link{validate_outcomes_are_numeric}()},
78 | \code{\link{validate_outcomes_are_univariate}()},
79 | \code{\link{validate_prediction_size}()},
80 | \code{\link{validate_predictors_are_numeric}()}
81 | }
82 | \concept{validation functions}
83 | 


--------------------------------------------------------------------------------
/man/validate_outcomes_are_binary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{validate_outcomes_are_binary}
 4 | \alias{validate_outcomes_are_binary}
 5 | \alias{check_outcomes_are_binary}
 6 | \title{Ensure that the outcome has binary factors}
 7 | \usage{
 8 | validate_outcomes_are_binary(outcomes)
 9 | 
10 | check_outcomes_are_binary(outcomes, ..., call = caller_env())
11 | }
12 | \arguments{
13 | \item{outcomes}{An object to check.}
14 | 
15 | \item{...}{These dots are for future extensions and must be empty.}
16 | 
17 | \item{call}{The call used for errors and warnings.}
18 | }
19 | \value{
20 | \code{validate_outcomes_are_binary()} returns \code{outcomes} invisibly.
21 | 
22 | \code{check_outcomes_are_binary()} returns a named list of three components,
23 | \code{ok}, \code{bad_cols}, and \code{num_levels}.
24 | }
25 | \description{
26 | validate - asserts the following:
27 | \itemize{
28 | \item \code{outcomes} must have binary factor columns.
29 | }
30 | 
31 | check - returns the following:
32 | \itemize{
33 | \item \code{ok} A logical. Does the check pass?
34 | \item \code{bad_cols} A character vector. The names of the columns with problems.
35 | \item \code{num_levels} An integer vector. The actual number of levels of the columns
36 | with problems.
37 | }
38 | }
39 | \details{
40 | The expected way to use this validation function is to supply it the
41 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}.
42 | }
43 | \section{Validation}{
44 | 
45 | 
46 | hardhat provides validation functions at two levels.
47 | \itemize{
48 | \item \verb{check_*()}:  \emph{check a condition, and return a list}. The list
49 | always contains at least one element, \code{ok}, a logical that specifies if the
50 | check passed. Each check also has check specific elements in the returned
51 | list that can be used to construct meaningful error messages.
52 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These
53 | functions call their corresponding check function, and
54 | then provide a default error message. If you, as a developer, want a
55 | different error message, then call the \verb{check_*()} function yourself,
56 | and provide your own validation function.
57 | }
58 | }
59 | 
60 | \examples{
61 | # Not a binary factor. 0 levels
62 | check_outcomes_are_binary(data.frame(x = 1))
63 | 
64 | # Not a binary factor. 1 level
65 | check_outcomes_are_binary(data.frame(x = factor("A")))
66 | 
67 | # All good
68 | check_outcomes_are_binary(data.frame(x = factor(c("A", "B"))))
69 | }
70 | \seealso{
71 | Other validation functions: 
72 | \code{\link{validate_column_names}()},
73 | \code{\link{validate_no_formula_duplication}()},
74 | \code{\link{validate_outcomes_are_factors}()},
75 | \code{\link{validate_outcomes_are_numeric}()},
76 | \code{\link{validate_outcomes_are_univariate}()},
77 | \code{\link{validate_prediction_size}()},
78 | \code{\link{validate_predictors_are_numeric}()}
79 | }
80 | \concept{validation functions}
81 | 


--------------------------------------------------------------------------------
/man/validate_outcomes_are_factors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{validate_outcomes_are_factors}
 4 | \alias{validate_outcomes_are_factors}
 5 | \alias{check_outcomes_are_factors}
 6 | \title{Ensure that the outcome has only factor columns}
 7 | \usage{
 8 | validate_outcomes_are_factors(outcomes)
 9 | 
10 | check_outcomes_are_factors(outcomes, ..., call = caller_env())
11 | }
12 | \arguments{
13 | \item{outcomes}{An object to check.}
14 | 
15 | \item{...}{These dots are for future extensions and must be empty.}
16 | 
17 | \item{call}{The call used for errors and warnings.}
18 | }
19 | \value{
20 | \code{validate_outcomes_are_factors()} returns \code{outcomes} invisibly.
21 | 
22 | \code{check_outcomes_are_factors()} returns a named list of two components,
23 | \code{ok} and \code{bad_classes}.
24 | }
25 | \description{
26 | validate - asserts the following:
27 | \itemize{
28 | \item \code{outcomes} must have factor columns.
29 | }
30 | 
31 | check - returns the following:
32 | \itemize{
33 | \item \code{ok} A logical. Does the check pass?
34 | \item \code{bad_classes} A named list. The names are the names of problematic columns,
35 | and the values are the classes of the matching column.
36 | }
37 | }
38 | \details{
39 | The expected way to use this validation function is to supply it the
40 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}.
41 | }
42 | \section{Validation}{
43 | 
44 | 
45 | hardhat provides validation functions at two levels.
46 | \itemize{
47 | \item \verb{check_*()}:  \emph{check a condition, and return a list}. The list
48 | always contains at least one element, \code{ok}, a logical that specifies if the
49 | check passed. Each check also has check specific elements in the returned
50 | list that can be used to construct meaningful error messages.
51 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These
52 | functions call their corresponding check function, and
53 | then provide a default error message. If you, as a developer, want a
54 | different error message, then call the \verb{check_*()} function yourself,
55 | and provide your own validation function.
56 | }
57 | }
58 | 
59 | \examples{
60 | # Not a factor column.
61 | check_outcomes_are_factors(data.frame(x = 1))
62 | 
63 | # All good
64 | check_outcomes_are_factors(data.frame(x = factor(c("A", "B"))))
65 | }
66 | \seealso{
67 | Other validation functions: 
68 | \code{\link{validate_column_names}()},
69 | \code{\link{validate_no_formula_duplication}()},
70 | \code{\link{validate_outcomes_are_binary}()},
71 | \code{\link{validate_outcomes_are_numeric}()},
72 | \code{\link{validate_outcomes_are_univariate}()},
73 | \code{\link{validate_prediction_size}()},
74 | \code{\link{validate_predictors_are_numeric}()}
75 | }
76 | \concept{validation functions}
77 | 


--------------------------------------------------------------------------------
/man/validate_outcomes_are_numeric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{validate_outcomes_are_numeric}
 4 | \alias{validate_outcomes_are_numeric}
 5 | \alias{check_outcomes_are_numeric}
 6 | \title{Ensure outcomes are all numeric}
 7 | \usage{
 8 | validate_outcomes_are_numeric(outcomes)
 9 | 
10 | check_outcomes_are_numeric(outcomes, ..., call = caller_env())
11 | }
12 | \arguments{
13 | \item{outcomes}{An object to check.}
14 | 
15 | \item{...}{These dots are for future extensions and must be empty.}
16 | 
17 | \item{call}{The call used for errors and warnings.}
18 | }
19 | \value{
20 | \code{validate_outcomes_are_numeric()} returns \code{outcomes} invisibly.
21 | 
22 | \code{check_outcomes_are_numeric()} returns a named list of two components,
23 | \code{ok} and \code{bad_classes}.
24 | }
25 | \description{
26 | validate - asserts the following:
27 | \itemize{
28 | \item \code{outcomes} must have numeric columns.
29 | }
30 | 
31 | check - returns the following:
32 | \itemize{
33 | \item \code{ok} A logical. Does the check pass?
34 | \item \code{bad_classes} A named list. The names are the names of problematic columns,
35 | and the values are the classes of the matching column.
36 | }
37 | }
38 | \details{
39 | The expected way to use this validation function is to supply it the
40 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}.
41 | }
42 | \section{Validation}{
43 | 
44 | 
45 | hardhat provides validation functions at two levels.
46 | \itemize{
47 | \item \verb{check_*()}:  \emph{check a condition, and return a list}. The list
48 | always contains at least one element, \code{ok}, a logical that specifies if the
49 | check passed. Each check also has check specific elements in the returned
50 | list that can be used to construct meaningful error messages.
51 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These
52 | functions call their corresponding check function, and
53 | then provide a default error message. If you, as a developer, want a
54 | different error message, then call the \verb{check_*()} function yourself,
55 | and provide your own validation function.
56 | }
57 | }
58 | 
59 | \examples{
60 | # All good
61 | check_outcomes_are_numeric(mtcars)
62 | 
63 | # Species is not numeric
64 | check_outcomes_are_numeric(iris)
65 | 
66 | # This gives an intelligent error message
67 | try(validate_outcomes_are_numeric(iris))
68 | }
69 | \seealso{
70 | Other validation functions: 
71 | \code{\link{validate_column_names}()},
72 | \code{\link{validate_no_formula_duplication}()},
73 | \code{\link{validate_outcomes_are_binary}()},
74 | \code{\link{validate_outcomes_are_factors}()},
75 | \code{\link{validate_outcomes_are_univariate}()},
76 | \code{\link{validate_prediction_size}()},
77 | \code{\link{validate_predictors_are_numeric}()}
78 | }
79 | \concept{validation functions}
80 | 


--------------------------------------------------------------------------------
/man/validate_outcomes_are_univariate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{validate_outcomes_are_univariate}
 4 | \alias{validate_outcomes_are_univariate}
 5 | \alias{check_outcomes_are_univariate}
 6 | \title{Ensure that the outcome is univariate}
 7 | \usage{
 8 | validate_outcomes_are_univariate(outcomes)
 9 | 
10 | check_outcomes_are_univariate(outcomes)
11 | }
12 | \arguments{
13 | \item{outcomes}{An object to check.}
14 | }
15 | \value{
16 | \code{validate_outcomes_are_univariate()} returns \code{outcomes} invisibly.
17 | 
18 | \code{check_outcomes_are_univariate()} returns a named list of two components,
19 | \code{ok} and \code{n_cols}.
20 | }
21 | \description{
22 | validate - asserts the following:
23 | \itemize{
24 | \item \code{outcomes} must have 1 column. Atomic vectors are treated as
25 | 1 column matrices.
26 | }
27 | 
28 | check - returns the following:
29 | \itemize{
30 | \item \code{ok} A logical. Does the check pass?
31 | \item \code{n_cols} A single numeric. The actual number of columns.
32 | }
33 | }
34 | \details{
35 | The expected way to use this validation function is to supply it the
36 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}.
37 | }
38 | \section{Validation}{
39 | 
40 | 
41 | hardhat provides validation functions at two levels.
42 | \itemize{
43 | \item \verb{check_*()}:  \emph{check a condition, and return a list}. The list
44 | always contains at least one element, \code{ok}, a logical that specifies if the
45 | check passed. Each check also has check specific elements in the returned
46 | list that can be used to construct meaningful error messages.
47 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These
48 | functions call their corresponding check function, and
49 | then provide a default error message. If you, as a developer, want a
50 | different error message, then call the \verb{check_*()} function yourself,
51 | and provide your own validation function.
52 | }
53 | }
54 | 
55 | \examples{
56 | validate_outcomes_are_univariate(data.frame(x = 1))
57 | 
58 | try(validate_outcomes_are_univariate(mtcars))
59 | }
60 | \seealso{
61 | Other validation functions: 
62 | \code{\link{validate_column_names}()},
63 | \code{\link{validate_no_formula_duplication}()},
64 | \code{\link{validate_outcomes_are_binary}()},
65 | \code{\link{validate_outcomes_are_factors}()},
66 | \code{\link{validate_outcomes_are_numeric}()},
67 | \code{\link{validate_prediction_size}()},
68 | \code{\link{validate_predictors_are_numeric}()}
69 | }
70 | \concept{validation functions}
71 | 


--------------------------------------------------------------------------------
/man/validate_predictors_are_numeric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{validate_predictors_are_numeric}
 4 | \alias{validate_predictors_are_numeric}
 5 | \alias{check_predictors_are_numeric}
 6 | \title{Ensure predictors are all numeric}
 7 | \usage{
 8 | validate_predictors_are_numeric(predictors)
 9 | 
10 | check_predictors_are_numeric(predictors, ..., call = caller_env())
11 | }
12 | \arguments{
13 | \item{predictors}{An object to check.}
14 | 
15 | \item{...}{These dots are for future extensions and must be empty.}
16 | 
17 | \item{call}{The call used for errors and warnings.}
18 | }
19 | \value{
20 | \code{validate_predictors_are_numeric()} returns \code{predictors} invisibly.
21 | 
22 | \code{check_predictors_are_numeric()} returns a named list of two components,
23 | \code{ok}, and \code{bad_classes}.
24 | }
25 | \description{
26 | validate - asserts the following:
27 | \itemize{
28 | \item \code{predictors} must have numeric columns.
29 | }
30 | 
31 | check - returns the following:
32 | \itemize{
33 | \item \code{ok} A logical. Does the check pass?
34 | \item \code{bad_classes} A named list. The names are the names of problematic columns,
35 | and the values are the classes of the matching column.
36 | }
37 | }
38 | \details{
39 | The expected way to use this validation function is to supply it the
40 | \verb{$predictors} element of the result of a call to \code{\link[=mold]{mold()}}.
41 | }
42 | \section{Validation}{
43 | 
44 | 
45 | hardhat provides validation functions at two levels.
46 | \itemize{
47 | \item \verb{check_*()}:  \emph{check a condition, and return a list}. The list
48 | always contains at least one element, \code{ok}, a logical that specifies if the
49 | check passed. Each check also has check specific elements in the returned
50 | list that can be used to construct meaningful error messages.
51 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These
52 | functions call their corresponding check function, and
53 | then provide a default error message. If you, as a developer, want a
54 | different error message, then call the \verb{check_*()} function yourself,
55 | and provide your own validation function.
56 | }
57 | }
58 | 
59 | \examples{
60 | # All good
61 | check_predictors_are_numeric(mtcars)
62 | 
63 | # Species is not numeric
64 | check_predictors_are_numeric(iris)
65 | 
66 | # This gives an intelligent error message
67 | try(validate_predictors_are_numeric(iris))
68 | }
69 | \seealso{
70 | Other validation functions: 
71 | \code{\link{validate_column_names}()},
72 | \code{\link{validate_no_formula_duplication}()},
73 | \code{\link{validate_outcomes_are_binary}()},
74 | \code{\link{validate_outcomes_are_factors}()},
75 | \code{\link{validate_outcomes_are_numeric}()},
76 | \code{\link{validate_outcomes_are_univariate}()},
77 | \code{\link{validate_prediction_size}()}
78 | }
79 | \concept{validation functions}
80 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | data.sqlite
6 | *.html
7 | cloud.noindex
8 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Revdeps
 2 | 
 3 | ## Failed to check (2)
 4 | 
 5 | |package   |version |error |warning |note |
 6 | |:---------|:-------|:-----|:-------|:----|
 7 | |censored  |?       |      |        |     |
 8 | |lnmixsurv |?       |      |        |     |
 9 | 
10 | ## New problems (1)
11 | 
12 | |package     |version |error |warning |note |
13 | |:-----------|:-------|:-----|:-------|:----|
14 | |[viralmodels](problems.md#viralmodels)|1.3.1   |      |__+1__  |     |
15 | 
16 | 


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
 1 | ## revdepcheck results
 2 | 
 3 | We checked 37 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
 4 | 
 5 |  * We saw 1 new problems
 6 |  * We failed to check 2 packages
 7 | 
 8 | Issues with CRAN packages are summarised below.
 9 | 
10 | ### New problems
11 | (This reports the first line of each new failure)
12 | 
13 | * viralmodels
14 |   checking whether package ‘viralmodels’ can be installed ... WARNING
15 | 
16 | ### Failed to check
17 | 
18 | * censored  (NA)
19 | * lnmixsurv (NA)
20 | 


--------------------------------------------------------------------------------
/revdep/email.yml:
--------------------------------------------------------------------------------
1 | release_date: ???
2 | rel_release_date: ???
3 | my_news_url: ???
4 | release_version: ???
5 | release_details: ???
6 | 


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
 1 | # viralmodels
 2 | 
 3 | <details>
 4 | 
 5 | * Version: 1.3.1
 6 | * GitHub: https://github.com/juanv66x/viralmodels
 7 | * Source code: https://github.com/cran/viralmodels
 8 | * Date/Publication: 2024-10-18 12:00:02 UTC
 9 | * Number of recursive dependencies: 185
10 | 
11 | Run `revdepcheck::cloud_details(, "viralmodels")` for more info
12 | 
13 | </details>
14 | 
15 | ## Newly broken
16 | 
17 | *   checking whether package ‘viralmodels’ can be installed ... WARNING
18 |     ```
19 |     Found the following significant warnings:
20 |       Warning: replacing previous import ‘hardhat::contr_one_hot’ by ‘parsnip::contr_one_hot’ when loading ‘viralmodels’
21 |     See ‘/tmp/workdir/viralmodels/new/viralmodels.Rcheck/00install.out’ for details.
22 |     ```
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(hardhat)
3 | 
4 | test_check("hardhat")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/blueprint-formula-default.md:
--------------------------------------------------------------------------------
 1 | # `levels` argument is validated
 2 | 
 3 |     Code
 4 |       new_default_formula_blueprint(levels = 1)
 5 |     Condition
 6 |       Error in `new_default_formula_blueprint()`:
 7 |       ! `levels` must be a list, not the number 1.
 8 | 
 9 | ---
10 | 
11 |     Code
12 |       new_default_formula_blueprint(levels = list(1))
13 |     Condition
14 |       Error in `new_default_formula_blueprint()`:
15 |       ! `levels` must be fully named.
16 | 
17 | ---
18 | 
19 |     Code
20 |       new_default_formula_blueprint(levels = list(a = 1))
21 |     Condition
22 |       Error in `new_default_formula_blueprint()`:
23 |       ! `levels` must only contain character vectors.
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/blueprint-recipe.md:
--------------------------------------------------------------------------------
1 | # `recipe` argument is validated
2 | 
3 |     Code
4 |       new_recipe_blueprint(recipe = 1)
5 |     Condition
6 |       Error in `new_recipe_blueprint()`:
7 |       ! `recipe` must be a recipe or `NULL`, not the number 1.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/blueprint.md:
--------------------------------------------------------------------------------
 1 | # check on input to `new_blueprint()`
 2 | 
 3 |     Code
 4 |       new_blueprint(same_new_arg = 1, same_new_arg = 2)
 5 |     Condition
 6 |       Error in `new_blueprint()`:
 7 |       ! All elements of `...` must have unique names.
 8 | 
 9 | # checks for updating a blueprint
10 | 
11 |     Code
12 |       update_blueprint(blueprint, intercept = TRUE, intercept = FALSE)
13 |     Condition
14 |       Error in `update_blueprint()`:
15 |       ! `...` must have unique names.
16 | 
17 | ---
18 | 
19 |     Code
20 |       update_blueprint(blueprint, intercpt = TRUE)
21 |     Condition
22 |       Error in `update_blueprint()`:
23 |       ! All elements of `...` must already exist.
24 |       i The following fields are new: "intercpt".
25 | 
26 | # checks the ptype
27 | 
28 |     Code
29 |       new_blueprint(ptypes = list(x = 1))
30 |     Condition
31 |       Error in `new_blueprint()`:
32 |       ! `ptypes` must have an element named "predictors".
33 | 
34 | ---
35 | 
36 |     Code
37 |       new_blueprint(ptypes = list(predictors = "not a tibble", outcomes = "not a tibble"))
38 |     Condition
39 |       Error in `new_blueprint()`:
40 |       ! `ptypes$predictors` must be a tibble, not the string "not a tibble".
41 | 
42 | ---
43 | 
44 |     Code
45 |       new_blueprint(ptypes = list(predictors = tibble_too_long, outcomes = tibble_too_long))
46 |     Condition
47 |       Error in `new_blueprint()`:
48 |       ! `ptypes$predictors` must be size 0, not size 1.
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/case-weights.md:
--------------------------------------------------------------------------------
 1 | # importance_weights() doesn't allow negative weights
 2 | 
 3 |     Code
 4 |       importance_weights(-1)
 5 |     Condition
 6 |       Error in `importance_weights()`:
 7 |       ! `x` can't contain negative weights.
 8 | 
 9 | # importance-weights constructor checks for double data
10 | 
11 |     Code
12 |       new_importance_weights(1L)
13 |     Condition
14 |       Error in `new_importance_weights()`:
15 |       ! `x` must be a double vector.
16 | 
17 | # can't cast importance-weights -> integer (too lenient, likely fractional weights)
18 | 
19 |     Code
20 |       vec_cast(x, integer())
21 |     Condition
22 |       Error:
23 |       ! Can't convert `x` <importance_weights> to <integer>.
24 | 
25 | # as.integer() fails (too lenient, likely fractional weights)
26 | 
27 |     Code
28 |       as.integer(x)
29 |     Condition
30 |       Error in `as.integer()`:
31 |       ! Can't convert `x` <importance_weights> to <integer>.
32 | 
33 | # frequency_weights() coerces to integer
34 | 
35 |     Code
36 |       frequency_weights(1.5)
37 |     Condition
38 |       Error in `frequency_weights()`:
39 |       ! Can't convert from `x` <double> to <integer> due to loss of precision.
40 |       * Locations: 1
41 | 
42 | # frequency_weights() doesn't allow negative weights
43 | 
44 |     Code
45 |       frequency_weights(-1L)
46 |     Condition
47 |       Error in `frequency_weights()`:
48 |       ! `x` can't contain negative weights.
49 | 
50 | # frequency-weights constructor checks for integer data
51 | 
52 |     Code
53 |       new_frequency_weights(1)
54 |     Condition
55 |       Error in `new_frequency_weights()`:
56 |       ! `x` must be an integer vector.
57 | 
58 | # `x` must be integer or double
59 | 
60 |     Code
61 |       new_case_weights("x", class = "subclass")
62 |     Condition
63 |       Error in `new_case_weights()`:
64 |       ! `x` must be an integer or double vector.
65 | 
66 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/constructor.md:
--------------------------------------------------------------------------------
 1 | # print - hardhat_model
 2 | 
 3 |     Code
 4 |       new_model()
 5 |     Output
 6 |       <hardhat_model>
 7 |       named list()
 8 |     Code
 9 |       new_model(class = "custom_class")
10 |     Output
11 |       <custom_class>
12 |       named list()
13 |     Code
14 |       new_model(x = 4, y = "hi", class = "custom_class")
15 |     Output
16 |       <custom_class>
17 |       $x
18 |       [1] 4
19 |       
20 |       $y
21 |       [1] "hi"
22 |       
23 | 
24 | # must use a valid blueprint
25 | 
26 |     Code
27 |       new_model(blueprint = 1, class = "custom")
28 |     Condition
29 |       Error in `new_model()`:
30 |       ! `blueprint` must be a <hardhat_blueprint>, not the number 1.
31 | 
32 | # `new_scalar()` must have elements
33 | 
34 |     Code
35 |       new_scalar(list())
36 |     Condition
37 |       Error in `new_scalar()`:
38 |       ! `elems` must be a list of length 1 or greater.
39 | 
40 | # `new_scalar()` must have unique names
41 | 
42 |     Code
43 |       new_scalar(list(x = 1, x = 2))
44 |     Condition
45 |       Error in `new_scalar()`:
46 |       ! `elems` must have unique names.
47 | 
48 | # `new_scalar()` must have no extra attributes
49 | 
50 |     Code
51 |       new_scalar(x)
52 |     Condition
53 |       Error in `new_scalar()`:
54 |       ! `elems` must have no attributes (apart from names).
55 | 
56 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/delete-response.md:
--------------------------------------------------------------------------------
1 | # errors out if not passed a terms object
2 | 
3 |     Code
4 |       delete_response(1)
5 |     Condition
6 |       Error in `delete_response()`:
7 |       ! `terms` must be a <terms>, not the number 1.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/encoding.md:
--------------------------------------------------------------------------------
 1 | # errors on missing values
 2 | 
 3 |     Code
 4 |       fct_encode_one_hot(x)
 5 |     Condition
 6 |       Error in `fct_encode_one_hot()`:
 7 |       ! `x` can't contain missing values.
 8 | 
 9 | # errors on non-factors
10 | 
11 |     Code
12 |       fct_encode_one_hot(1)
13 |     Condition
14 |       Error in `fct_encode_one_hot()`:
15 |       ! `x` must be a factor, not a number.
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/forge.md:
--------------------------------------------------------------------------------
1 | # `run_forge()` throws an informative default error
2 | 
3 |     Code
4 |       run_forge(1)
5 |     Condition
6 |       Error in `run_forge()`:
7 |       ! No `run_forge()` method provided for a number.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/intercept.md:
--------------------------------------------------------------------------------
 1 | # existing intercepts are skipped with a warning
 2 | 
 3 |     Code
 4 |       xx <- add_intercept_column(x)
 5 |     Condition
 6 |       Warning:
 7 |       `data` already has a column named "(Intercept)".
 8 |       i Returning `data` unchanged.
 9 | 
10 | # name can only be a single character
11 | 
12 |     Code
13 |       add_intercept_column(mtcars, name = c("x", "y"))
14 |     Condition
15 |       Error in `add_intercept_column()`:
16 |       ! `name` must be a valid name, not a character vector.
17 | 
18 | ---
19 | 
20 |     Code
21 |       add_intercept_column(mtcars, name = 1)
22 |     Condition
23 |       Error in `add_intercept_column()`:
24 |       ! `name` must be a valid name, not the number 1.
25 | 
26 | # data has to be a data frame or matrix
27 | 
28 |     Code
29 |       add_intercept_column(1)
30 |     Condition
31 |       Error in `add_intercept_column()`:
32 |       ! `data` must be a data frame or a matrix, not the number 1.
33 | 
34 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/levels.md:
--------------------------------------------------------------------------------
1 | # Can extract levels from an outcome
2 | 
3 |     Code
4 |       get_outcome_levels("a")
5 |     Condition
6 |       Error in `standardize()`:
7 |       ! No `standardize()` method provided for a string.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/model-matrix.md:
--------------------------------------------------------------------------------
 1 | # `contr_one_hot()` input checks
 2 | 
 3 |     Code
 4 |       contr_one_hot(n = 2, sparse = TRUE)
 5 |     Condition
 6 |       Warning:
 7 |       `sparse = TRUE` not implemented for `contr_one_hot()`.
 8 |     Output
 9 |         1 2
10 |       1 1 0
11 |       2 0 1
12 | 
13 | ---
14 | 
15 |     Code
16 |       contr_one_hot(n = 2, contrasts = FALSE)
17 |     Condition
18 |       Warning:
19 |       `contrasts = FALSE` not implemented for `contr_one_hot()`.
20 |     Output
21 |         1 2
22 |       1 1 0
23 |       2 0 1
24 | 
25 | ---
26 | 
27 |     Code
28 |       contr_one_hot(n = 1:2)
29 |     Condition
30 |       Error in `contr_one_hot()`:
31 |       ! `n` must be a whole number, not an integer vector.
32 | 
33 | ---
34 | 
35 |     Code
36 |       contr_one_hot(n = list(1:2))
37 |     Condition
38 |       Error in `contr_one_hot()`:
39 |       ! `n` must be a whole number, not a list.
40 | 
41 | ---
42 | 
43 |     Code
44 |       contr_one_hot(character(0))
45 |     Condition
46 |       Error in `contr_one_hot()`:
47 |       ! `n` cannot be empty.
48 | 
49 | ---
50 | 
51 |     Code
52 |       contr_one_hot(-1)
53 |     Condition
54 |       Error in `contr_one_hot()`:
55 |       ! `n` must be a whole number larger than or equal to 1, not the number -1.
56 | 
57 | ---
58 | 
59 |     Code
60 |       contr_one_hot(list())
61 |     Condition
62 |       Error in `contr_one_hot()`:
63 |       ! `n` must be a whole number, not an empty list.
64 | 
65 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/model-offset.md:
--------------------------------------------------------------------------------
 1 | # Only numeric columns can be offsets
 2 | 
 3 |     Code
 4 |       mold(~ Sepal.Width + offset(Species), iris)
 5 |     Condition
 6 |       Error in `mold()`:
 7 |       ! Column "offset(Species)" is tagged as an offset and thus must be numeric, not a <factor> object.
 8 | 
 9 | # offset columns are stored as predictors
10 | 
11 |     Code
12 |       forge(iris2, x$blueprint)
13 |     Condition
14 |       Error in `forge()`:
15 |       ! The required column "Sepal.Length" is missing.
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/mold-recipe.md:
--------------------------------------------------------------------------------
1 | # `data` is validated
2 | 
3 |     Code
4 |       mold(recipes::recipe(Species ~ Sepal.Length, data = iris), 1)
5 |     Condition
6 |       Error in `mold()`:
7 |       ! `data` must be a data frame or a matrix, not the number 1.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/mold-xy.md:
--------------------------------------------------------------------------------
 1 | # unknown mold() inputs throw an error
 2 | 
 3 |     Code
 4 |       mold("hi")
 5 |     Condition
 6 |       Error in `mold()`:
 7 |       ! `x` must be a data frame, matrix, recipe, or formula, not the string "hi".
 8 | 
 9 | # cannot pass anything in the dots
10 | 
11 |     Code
12 |       mold(iris[, "Sepal.Length", drop = FALSE], iris$Species, z = "in the dots")
13 |     Condition
14 |       Error in `mold()`:
15 |       ! `...` must be empty.
16 |       x Problematic argument:
17 |       * z = "in the dots"
18 | 
19 | ---
20 | 
21 |     Code
22 |       mold(iris[, "Sepal.Length", drop = FALSE], iris$Species, blueprint = default_xy_blueprint(
23 |         composition = "dgCMatrix"), z = "in the dots")
24 |     Condition
25 |       Error in `mold()`:
26 |       ! `...` must be empty.
27 |       x Problematic argument:
28 |       * z = "in the dots"
29 | 
30 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/mold.md:
--------------------------------------------------------------------------------
1 | # `run_mold()` throws an informative default error
2 | 
3 |     Code
4 |       run_mold(1)
5 |     Condition
6 |       Error in `run_mold()`:
7 |       ! No `run_mold()` method provided for a number.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/print.md:
--------------------------------------------------------------------------------
 1 | # print - formula
 2 | 
 3 |     Code
 4 |       mold(Species ~ Sepal.Length, iris)$blueprint
 5 |     Message
 6 |       Formula blueprint:
 7 |       # Predictors: 1
 8 |       # Outcomes: 1
 9 |       Intercept: FALSE
10 |       Novel Levels: FALSE
11 |       Composition: tibble
12 |       Indicators: traditional
13 |       
14 |     Code
15 |       mold(~Sepal.Length, iris)$blueprint
16 |     Message
17 |       Formula blueprint:
18 |       # Predictors: 1
19 |       # Outcomes: 0
20 |       Intercept: FALSE
21 |       Novel Levels: FALSE
22 |       Composition: tibble
23 |       Indicators: traditional
24 |       
25 | 
26 | # print - default
27 | 
28 |     Code
29 |       mold(iris[, c("Sepal.Length"), drop = FALSE], iris$Species)$blueprint
30 |     Message
31 |       XY blueprint:
32 |       # Predictors: 1
33 |       # Outcomes: 1
34 |       Intercept: FALSE
35 |       Novel Levels: FALSE
36 |       Composition: tibble
37 |       
38 | 
39 | # print - recipe
40 | 
41 |     Code
42 |       mold(recipes::recipe(Species ~ Sepal.Length, iris), iris)$blueprint
43 |     Condition
44 |       Warning:
45 |       The `strings_as_factors` argument of `prep.recipe()` is deprecated as of recipes 1.3.0.
46 |       i Please use the `strings_as_factors` argument of `recipe()` instead.
47 |     Message
48 |       Recipe blueprint:
49 |       # Predictors: 1
50 |       # Outcomes: 1
51 |       Intercept: FALSE
52 |       Novel Levels: FALSE
53 |       Composition: tibble
54 |       
55 | 
56 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/recompose.md:
--------------------------------------------------------------------------------
 1 | # columns must be numeric when coercing to matrix
 2 | 
 3 |     Code
 4 |       recompose(df, composition = "matrix")
 5 |     Condition
 6 |       Error:
 7 |       ! `data` must only contain numeric columns.
 8 |       i These columns aren't numeric: "y" and "z".
 9 | 
10 | # columns must be numeric when coercing to sparse matrix
11 | 
12 |     Code
13 |       recompose(df, composition = "dgCMatrix")
14 |     Condition
15 |       Error:
16 |       ! `data` must only contain numeric columns.
17 |       i These columns aren't numeric: "y" and "z".
18 | 
19 | # checks for data frame input
20 | 
21 |     Code
22 |       recompose(1)
23 |     Condition
24 |       Error:
25 |       ! `data` must be a data frame, not the number 1.
26 | 
27 | # dots must be empty
28 | 
29 |     Code
30 |       recompose(data.frame(), 1)
31 |     Condition
32 |       Error in `recompose()`:
33 |       ! `...` must be empty.
34 |       x Problematic argument:
35 |       * ..1 = 1
36 |       i Did you forget to name an argument?
37 | 
38 | # validates `composition`
39 | 
40 |     Code
41 |       recompose(data.frame(), composition = "foo")
42 |     Condition
43 |       Error:
44 |       ! `composition` must be one of "tibble", "data.frame", "matrix", or "dgCMatrix", not "foo".
45 | 
46 | ---
47 | 
48 |     Code
49 |       recompose(data.frame(), composition = 1)
50 |     Condition
51 |       Error:
52 |       ! `composition` must be a string or character vector.
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/scream.md:
--------------------------------------------------------------------------------
 1 | # novel levels can be ignored
 2 | 
 3 |     Code
 4 |       x <- scream(new, ptype, allow_novel_levels = TRUE)
 5 | 
 6 | # novel levels in a new character vector can be ignored
 7 | 
 8 |     Code
 9 |       x <- scream(new, ptype, allow_novel_levels = TRUE)
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/shrink.md:
--------------------------------------------------------------------------------
1 | # `data` must be data-like
2 | 
3 |     Code
4 |       shrink(1, ptype)
5 |     Condition
6 |       Error in `shrink()`:
7 |       ! `data` must be a data frame or a matrix, not the number 1.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/spruce.md:
--------------------------------------------------------------------------------
  1 | # spruce - numeric
  2 | 
  3 |     Code
  4 |       spruce_numeric("hi")
  5 |     Condition
  6 |       Error in `spruce_numeric()`:
  7 |       ! `pred` must be a numeric vector, not the string "hi".
  8 | 
  9 | ---
 10 | 
 11 |     Code
 12 |       spruce_numeric(matrix(1))
 13 |     Condition
 14 |       Error in `spruce_numeric()`:
 15 |       ! `pred` must be a numeric vector, not a double matrix.
 16 | 
 17 | # spruce - class
 18 | 
 19 |     Code
 20 |       spruce_class(1)
 21 |     Condition
 22 |       Error in `spruce_class()`:
 23 |       ! `pred_class` must be a factor, not the number 1.
 24 | 
 25 | ---
 26 | 
 27 |     Code
 28 |       spruce_class("hi")
 29 |     Condition
 30 |       Error in `spruce_class()`:
 31 |       ! `pred_class` must be a factor, not the string "hi".
 32 | 
 33 | # spruce - prob
 34 | 
 35 |     Code
 36 |       spruce_prob(1, prob_matrix)
 37 |     Condition
 38 |       Error in `spruce_prob()`:
 39 |       ! `pred_levels` must be a character vector, not the number 1.
 40 | 
 41 | ---
 42 | 
 43 |     Code
 44 |       spruce_prob(pred_levels, 1)
 45 |     Condition
 46 |       Error in `spruce_prob()`:
 47 |       ! `prob_matrix` must be a numeric matrix, not the number 1.
 48 | 
 49 | ---
 50 | 
 51 |     Code
 52 |       spruce_prob("a", matrix("a"))
 53 |     Condition
 54 |       Error in `spruce_prob()`:
 55 |       ! `prob_matrix` must be a numeric matrix, not a character matrix.
 56 | 
 57 | ---
 58 | 
 59 |     Code
 60 |       spruce_prob(c("a", "b"), matrix(1, ncol = 3))
 61 |     Condition
 62 |       Error in `spruce_prob()`:
 63 |       ! The number of levels (2) must be equal to the number of class probability columns (3).
 64 | 
 65 | ---
 66 | 
 67 |     Code
 68 |       spruce_prob(c("a"), matrix(1, ncol = 2))
 69 |     Condition
 70 |       Error in `spruce_prob()`:
 71 |       ! The number of levels (1) must be equal to the number of class probability columns (2).
 72 | 
 73 | # spruce multiple helpers check input type
 74 | 
 75 |     Code
 76 |       spruce_numeric_multiple(1, "x")
 77 |     Condition
 78 |       Error in `spruce_numeric_multiple()`:
 79 |       ! Each element of `...` must be a numeric vector, not a string.
 80 | 
 81 | ---
 82 | 
 83 |     Code
 84 |       spruce_class_multiple(1)
 85 |     Condition
 86 |       Error in `spruce_class_multiple()`:
 87 |       ! Each element of `...` must be a factor, not a number.
 88 | 
 89 | ---
 90 | 
 91 |     Code
 92 |       spruce_prob_multiple(1)
 93 |     Condition
 94 |       Error in `spruce_prob_multiple()`:
 95 |       ! Each element of `...` must be a tibble, not a number.
 96 | 
 97 | # spruce multiple helpers check input sizes (and disallow recycling)
 98 | 
 99 |     Code
100 |       spruce_numeric_multiple(1, 1:2)
101 |     Condition
102 |       Error in `spruce_numeric_multiple()`:
103 |       ! `..1` must have size 2, not size 1.
104 | 
105 | ---
106 | 
107 |     Code
108 |       spruce_class_multiple(factor("x"), factor(c("a", "b")))
109 |     Condition
110 |       Error in `spruce_class_multiple()`:
111 |       ! `..1` must have size 2, not size 1.
112 | 
113 | ---
114 | 
115 |     Code
116 |       spruce_prob_multiple(tibble(x = 1), tibble(x = 1:2))
117 |     Condition
118 |       Error in `spruce_prob_multiple()`:
119 |       ! `..1` must have size 2, not size 1.
120 | 
121 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/standardize.md:
--------------------------------------------------------------------------------
 1 | # standardize - matrix
 2 | 
 3 |     Code
 4 |       standardize(mat_bad)
 5 |     Condition
 6 |       Error in `standardize()`:
 7 |       ! All columns of `y` must have unique names.
 8 | 
 9 | ---
10 | 
11 |     Code
12 |       standardize(mat_bad2)
13 |     Condition
14 |       Error in `standardize()`:
15 |       ! `y` must be a numeric matrix, not a character matrix.
16 | 
17 | # standardize - array
18 | 
19 |     Code
20 |       standardize(bad)
21 |     Condition
22 |       Error in `standardize()`:
23 |       ! All columns of `y` must have unique names.
24 | 
25 | ---
26 | 
27 |     Code
28 |       standardize(bad2)
29 |     Condition
30 |       Error in `standardize()`:
31 |       ! `y` must be a numeric matrix, not a character matrix.
32 | 
33 | # standardize - data.frame
34 | 
35 |     Code
36 |       standardize(bad)
37 |     Condition
38 |       Error in `standardize()`:
39 |       ! All columns of `y` must have unique names.
40 | 
41 | ---
42 | 
43 |     Code
44 |       standardize(bad2)
45 |     Condition
46 |       Error in `standardize()`:
47 |       ! Not all columns of `y` are known outcome types.
48 |       i This column has an unknown type: "x".
49 | 
50 | ---
51 | 
52 |     Code
53 |       standardize(bad3)
54 |     Condition
55 |       Error in `standardize()`:
56 |       ! Not all columns of `y` are known outcome types.
57 |       i These columns have unknown types: "x" and "y".
58 | 
59 | # standardize - unknown
60 | 
61 |     Code
62 |       standardize("hi")
63 |     Condition
64 |       Error in `standardize()`:
65 |       ! No `standardize()` method provided for a string.
66 | 
67 | ---
68 | 
69 |     Code
70 |       standardize(Sys.time())
71 |     Condition
72 |       Error in `standardize()`:
73 |       ! No `standardize()` method provided for a <POSIXct> object.
74 | 
75 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/table.md:
--------------------------------------------------------------------------------
 1 | # `na_remove` is validated
 2 | 
 3 |     Code
 4 |       weighted_table(x, y, weights = w, na_remove = c(TRUE, FALSE))
 5 |     Condition
 6 |       Error in `weighted_table()`:
 7 |       ! `na_remove` must be `TRUE` or `FALSE`, not a logical vector.
 8 | 
 9 | ---
10 | 
11 |     Code
12 |       weighted_table(x, y, weights = w, na_remove = 1)
13 |     Condition
14 |       Error in `weighted_table()`:
15 |       ! `na_remove` must be `TRUE` or `FALSE`, not the number 1.
16 | 
17 | # requires at least one `...`
18 | 
19 |     Code
20 |       weighted_table(weights = w)
21 |     Condition
22 |       Error in `weighted_table()`:
23 |       ! At least one vector must be supplied to `...`.
24 | 
25 | # requires all `...` to be factors
26 | 
27 |     Code
28 |       weighted_table(1, weights = w)
29 |     Condition
30 |       Error in `weighted_table()`:
31 |       ! All elements of `...` must be factors.
32 | 
33 | # requires all `...` to be the same size
34 | 
35 |     Code
36 |       weighted_table(x, y, weights = w)
37 |     Condition
38 |       Error in `weighted_table()`:
39 |       ! All elements of `...` must be the same size.
40 | 
41 | # requires all `weights` to be the same size as `...` elements
42 | 
43 |     Code
44 |       weighted_table(x, y, weights = w)
45 |     Condition
46 |       Error in `weighted_table()`:
47 |       ! `weights` must have size 3, not size 4.
48 | 
49 | # requires `weights` to be castable to double
50 | 
51 |     Code
52 |       weighted_table(x, weights = "a")
53 |     Condition
54 |       Error in `weighted_table()`:
55 |       ! Can't convert `weights` <character> to <double>.
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/tune.md:
--------------------------------------------------------------------------------
 1 | # `id` is validated
 2 | 
 3 |     Code
 4 |       tune(1)
 5 |     Condition
 6 |       Error in `tune()`:
 7 |       ! `id` must be a single string, not the number 1.
 8 | 
 9 | ---
10 | 
11 |     Code
12 |       tune(c("x", "y"))
13 |     Condition
14 |       Error in `tune()`:
15 |       ! `id` must be a single string, not a character vector.
16 | 
17 | ---
18 | 
19 |     Code
20 |       tune(NA_character_)
21 |     Condition
22 |       Error in `tune()`:
23 |       ! `id` must be a single string, not a character `NA`.
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/use.md:
--------------------------------------------------------------------------------
 1 | # no `model` aborts normally
 2 | 
 3 |     Code
 4 |       create_modeling_package(path = "my/path")
 5 |     Condition
 6 |       Error in `create_modeling_package()`:
 7 |       ! `model` is absent but must be supplied.
 8 | 
 9 | # no `path` aborts normally
10 | 
11 |     Code
12 |       create_modeling_package(model = "my_model")
13 |     Condition
14 |       Error in `create_modeling_package()`:
15 |       ! `path` is absent but must be supplied.
16 | 
17 | # `model` can only be a single string
18 | 
19 |     Code
20 |       create_modeling_package(path = "my/path", model = c("model1", "model2"))
21 |     Condition
22 |       Error in `create_modeling_package()`:
23 |       ! `model` must be a single string, not a character vector.
24 | 
25 | ---
26 | 
27 |     Code
28 |       create_modeling_package(path = "my/path", model = 1)
29 |     Condition
30 |       Error in `create_modeling_package()`:
31 |       ! `model` must be a single string, not the number 1.
32 | 
33 | ---
34 | 
35 |     Code
36 |       create_modeling_package(path = "my/path", model = "model with space")
37 |     Condition
38 |       Error in `create_modeling_package()`:
39 |       ! `model` must not contain any spaces.
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/testthat/data-raw/hardhat-0.2.0.R:
--------------------------------------------------------------------------------
 1 | # Objects used for backwards compatibility testing.
 2 | # Objects created with hardhat 0.2.0.
 3 | # devtools::install_version("hardhat", "0.2.0")
 4 | 
 5 | # ------------------------------------------------------------------------------
 6 | # Testing compatibility of `mold()` and a basic recipe
 7 | 
 8 | dir <- here::here("tests", "testthat", "data")
 9 | file <- fs::path(dir, "hardhat-0.2.0-pre-mold-recipe.rds")
10 | 
11 | data <- tibble::tibble(y = 1:5, x = 6:10)
12 | 
13 | blueprint <- hardhat::default_recipe_blueprint()
14 | 
15 | object <- list(data = data, blueprint = blueprint)
16 | 
17 | saveRDS(
18 |   object,
19 |   file = file,
20 |   version = 2
21 | )
22 | 
23 | # ------------------------------------------------------------------------------
24 | # Testing compatibility of `forge()` and a basic recipe
25 | 
26 | dir <- here::here("tests", "testthat", "data")
27 | file <- fs::path(dir, "hardhat-0.2.0-post-mold-recipe.rds")
28 | 
29 | data <- tibble::tibble(y = 1:5, x = 6:10)
30 | new_data <- tibble::tibble(y = 6:10, x = 11:15)
31 | 
32 | rec <- recipes::recipe(y ~ ., data = data)
33 | rec <- recipes::step_mutate(rec, z = 1)
34 | 
35 | blueprint <- hardhat::default_recipe_blueprint()
36 | mold <- hardhat::mold(rec, data = data, blueprint = blueprint)
37 | blueprint <- mold$blueprint
38 | 
39 | object <- list(new_data = new_data, blueprint = blueprint)
40 | 
41 | saveRDS(
42 |   object,
43 |   file = file,
44 |   version = 2
45 | )
46 | 
47 | # ------------------------------------------------------------------------------
48 | # Testing compatibility of `forge()` and a recipe with a nonstandard role
49 | 
50 | dir <- here::here("tests", "testthat", "data")
51 | file <- fs::path(dir, "hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds")
52 | 
53 | data <- tibble::tibble(y = 1:5, x = 6:10, id = 1:5)
54 | new_data <- tibble::tibble(y = 6:10, x = 11:15, id = 6:10)
55 | 
56 | rec <- recipes::recipe(y ~ ., data = data)
57 | rec <- recipes::update_role(rec, id, new_role = "id")
58 | rec <- recipes::step_mutate(rec, z = 1)
59 | 
60 | blueprint <- hardhat::default_recipe_blueprint()
61 | mold <- hardhat::mold(rec, data = data, blueprint = blueprint)
62 | blueprint <- mold$blueprint
63 | 
64 | object <- list(new_data = new_data, blueprint = blueprint)
65 | 
66 | saveRDS(
67 |   object,
68 |   file = file,
69 |   version = 2
70 | )
71 | 
72 | # ------------------------------------------------------------------------------
73 | 


--------------------------------------------------------------------------------
/tests/testthat/data/hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/tests/testthat/data/hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds


--------------------------------------------------------------------------------
/tests/testthat/data/hardhat-0.2.0-post-mold-recipe.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/tests/testthat/data/hardhat-0.2.0-post-mold-recipe.rds


--------------------------------------------------------------------------------
/tests/testthat/data/hardhat-0.2.0-pre-mold-recipe.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/tests/testthat/data/hardhat-0.2.0-pre-mold-recipe.rds


--------------------------------------------------------------------------------
/tests/testthat/helper-matrix.R:
--------------------------------------------------------------------------------
1 | expect_matrix <- function(x) {
2 |   expect_true(inherits(x, "matrix"))
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-sparsevctrs.R:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # For sparse tibble testing
 3 | 
 4 | sparse_hotel_rates <- function() {
 5 |   # 99.2 sparsity
 6 |   hotel_rates <- modeldata::hotel_rates
 7 | 
 8 |   prefix_colnames <- function(x, prefix) {
 9 |     colnames(x) <- paste(colnames(x), prefix, sep = "_")
10 |     x
11 |   }
12 | 
13 |   dummies_country <- hardhat::fct_encode_one_hot(hotel_rates$country)
14 |   dummies_company <- hardhat::fct_encode_one_hot(hotel_rates$company)
15 |   dummies_agent <- hardhat::fct_encode_one_hot(hotel_rates$agent)
16 | 
17 |   res <- cbind(
18 |     hotel_rates["avg_price_per_room"],
19 |     prefix_colnames(dummies_country, "country"),
20 |     prefix_colnames(dummies_company, "company"),
21 |     prefix_colnames(dummies_agent, "agent")
22 |   )
23 | 
24 |   res <- as.matrix(res)
25 |   Matrix::Matrix(res, sparse = TRUE)
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-blueprint-formula-default.R:
--------------------------------------------------------------------------------
 1 | test_that("`levels` argument is validated", {
 2 |   expect_snapshot(error = TRUE, {
 3 |     new_default_formula_blueprint(levels = 1)
 4 |   })
 5 |   expect_snapshot(error = TRUE, {
 6 |     new_default_formula_blueprint(levels = list(1))
 7 |   })
 8 |   expect_snapshot(error = TRUE, {
 9 |     new_default_formula_blueprint(levels = list("a" = 1))
10 |   })
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test-blueprint-recipe.R:
--------------------------------------------------------------------------------
 1 | test_that("`recipe` argument is validated", {
 2 |   expect_snapshot(error = TRUE, {
 3 |     new_recipe_blueprint(recipe = 1)
 4 |   })
 5 | })
 6 | 
 7 | test_that("`recipe` argument allows `NULL`", {
 8 |   x <- new_recipe_blueprint(recipe = NULL)
 9 |   expect_null(x$recipe)
10 | })
11 | 


--------------------------------------------------------------------------------
/tests/testthat/test-blueprint.R:
--------------------------------------------------------------------------------
 1 | test_that("check on input to `new_blueprint()`", {
 2 |   expect_snapshot(error = TRUE, {
 3 |     new_blueprint(same_new_arg = 1, same_new_arg = 2)
 4 |   })
 5 | })
 6 | 
 7 | test_that("checks for updating a blueprint", {
 8 |   blueprint <- default_xy_blueprint()
 9 | 
10 |   expect_snapshot(error = TRUE, {
11 |     update_blueprint(blueprint, intercept = TRUE, intercept = FALSE)
12 |   })
13 |   expect_snapshot(error = TRUE, {
14 |     update_blueprint(blueprint, intercpt = TRUE)
15 |   })
16 | })
17 | 
18 | test_that("checks the ptype", {
19 |   expect_snapshot(error = TRUE, {
20 |     new_blueprint(ptypes = list(x = 1))
21 |   })
22 |   expect_snapshot(error = TRUE, {
23 |     new_blueprint(
24 |       ptypes = list("predictors" = "not a tibble", outcomes = "not a tibble")
25 |     )
26 |   })
27 | 
28 |   tibble_too_long <- tibble::tibble(x = 1)
29 |   expect_snapshot(error = TRUE, {
30 |     new_blueprint(
31 |       ptypes = list("predictors" = tibble_too_long, outcomes = tibble_too_long)
32 |     )
33 |   })
34 | })
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-constructor.R:
--------------------------------------------------------------------------------
 1 | test_that("print - hardhat_model", {
 2 |   expect_snapshot({
 3 |     new_model()
 4 |     new_model(class = "custom_class")
 5 |     new_model(x = 4, y = "hi", class = "custom_class")
 6 |   })
 7 | })
 8 | 
 9 | test_that("can create new empty models", {
10 |   x <- new_model()
11 | 
12 |   expect_s3_class(x$blueprint, "default_xy_blueprint")
13 |   expect_s3_class(x, "hardhat_model")
14 |   expect_s3_class(x, "hardhat_scalar")
15 | })
16 | 
17 | test_that("can create new models", {
18 |   x <- new_model(class = "custom")
19 | 
20 |   expect_s3_class(x, "custom")
21 |   expect_s3_class(x$blueprint, "default_xy_blueprint")
22 | })
23 | 
24 | test_that("can have custom elements", {
25 |   x <- new_model(
26 |     y = 1,
27 |     blueprint = default_xy_blueprint(),
28 |     class = "custom_class"
29 |   )
30 | 
31 |   expect_equal(x$y, 1)
32 | })
33 | 
34 | test_that("must use a valid blueprint", {
35 |   expect_no_error(
36 |     new_model(blueprint = default_xy_blueprint(), class = "custom")
37 |   )
38 | 
39 |   expect_snapshot(error = TRUE, {
40 |     new_model(blueprint = 1, class = "custom")
41 |   })
42 | })
43 | 
44 | test_that("`new_scalar()` must have elements", {
45 |   expect_snapshot(error = TRUE, new_scalar(list()))
46 | })
47 | 
48 | test_that("`new_scalar()` must have unique names", {
49 |   expect_snapshot(error = TRUE, new_scalar(list(x = 1, x = 2)))
50 | })
51 | 
52 | test_that("`new_scalar()` must have no extra attributes", {
53 |   x <- list(x = 1)
54 |   attr(x, "extra") <- 1
55 |   expect_snapshot(error = TRUE, new_scalar(x))
56 | })
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test-delete-response.R:
--------------------------------------------------------------------------------
 1 | test_that("identical to delete.response() if no dataClasses", {
 2 |   trms <- terms(y ~ x)
 3 | 
 4 |   expect_equal(
 5 |     delete_response(trms),
 6 |     delete.response(trms)
 7 |   )
 8 | })
 9 | 
10 | test_that("doesn't return dataClasses for y", {
11 |   framed <- model_frame(Sepal.Width ~ Species, iris)
12 | 
13 |   expect_equal(
14 |     attr(delete_response(framed$terms), "dataClasses"),
15 |     c(Species = "factor")
16 |   )
17 | 
18 |   # expected base R behavior
19 |   expect_equal(
20 |     attr(delete.response(framed$terms), "dataClasses"),
21 |     c(Sepal.Width = "numeric", Species = "factor")
22 |   )
23 | })
24 | 
25 | test_that("equal results if no response, but dataClasses exist", {
26 |   framed <- model_frame(~Species, iris)
27 | 
28 |   expect_equal(
29 |     delete_response(framed$terms),
30 |     delete.response(framed$terms)
31 |   )
32 | })
33 | 
34 | test_that("errors out if not passed a terms object", {
35 |   expect_snapshot(error = TRUE, {
36 |     delete_response(1)
37 |   })
38 | })
39 | 


--------------------------------------------------------------------------------
/tests/testthat/test-encoding.R:
--------------------------------------------------------------------------------
  1 | test_that("generates one-hot indicator matrix", {
  2 |   x <- factor(c("a", "b", "a", "a", "c"))
  3 | 
  4 |   expect <- matrix(
  5 |     0L,
  6 |     nrow = 5,
  7 |     ncol = 3,
  8 |     dimnames = list(NULL, c("a", "b", "c"))
  9 |   )
 10 |   expect[c(1, 3, 4, 7, 15)] <- 1L
 11 | 
 12 |   expect_identical(fct_encode_one_hot(x), expect)
 13 | })
 14 | 
 15 | test_that("works with factors with just 1 level", {
 16 |   x <- factor(rep("a", 3))
 17 | 
 18 |   expect_identical(
 19 |     fct_encode_one_hot(x),
 20 |     matrix(1L, nrow = 3, ncol = 1, dimnames = list(NULL, "a"))
 21 |   )
 22 | })
 23 | 
 24 | test_that("works with levels that aren't in the data", {
 25 |   x <- factor(c("a", "c", "a"), levels = c("a", "b", "c", "d"))
 26 | 
 27 |   expect <- matrix(
 28 |     0L,
 29 |     nrow = 3,
 30 |     ncol = 4,
 31 |     dimnames = list(NULL, c("a", "b", "c", "d"))
 32 |   )
 33 |   expect[c(1, 3, 8)] <- 1L
 34 | 
 35 |   expect_identical(fct_encode_one_hot(x), expect)
 36 | })
 37 | 
 38 | test_that("works with factors with explicit `NA` level but no `NA` data", {
 39 |   expect_identical(
 40 |     fct_encode_one_hot(factor("a", levels = c("a", NA), exclude = NULL)),
 41 |     matrix(
 42 |       data = c(1L, 0L),
 43 |       nrow = 1,
 44 |       ncol = 2,
 45 |       dimnames = list(NULL, c("a", NA))
 46 |     )
 47 |   )
 48 | })
 49 | 
 50 | test_that("works with empty factors", {
 51 |   expect_identical(
 52 |     fct_encode_one_hot(factor()),
 53 |     matrix(data = integer(), nrow = 0, ncol = 0, dimnames = list(NULL, NULL))
 54 |   )
 55 | })
 56 | 
 57 | test_that("works with empty factors with levels", {
 58 |   expect_identical(
 59 |     fct_encode_one_hot(factor(levels = c("a", "b"))),
 60 |     matrix(
 61 |       data = integer(),
 62 |       nrow = 0,
 63 |       ncol = 2,
 64 |       dimnames = list(NULL, c("a", "b"))
 65 |     )
 66 |   )
 67 | })
 68 | 
 69 | test_that("propagates names onto the row names", {
 70 |   x <- set_names(factor(c("a", "b", "a")), c("x", "y", "z"))
 71 |   expect_identical(rownames(fct_encode_one_hot(x)), c("x", "y", "z"))
 72 | })
 73 | 
 74 | test_that("works with ordered factors", {
 75 |   x <- factor(
 76 |     c("a", "b", "a", "a", "c"),
 77 |     levels = c("c", "b", "a"),
 78 |     ordered = TRUE
 79 |   )
 80 | 
 81 |   expect <- matrix(
 82 |     0L,
 83 |     nrow = 5,
 84 |     ncol = 3,
 85 |     dimnames = list(NULL, c("c", "b", "a"))
 86 |   )
 87 |   expect[c(5, 7, 11, 13, 14)] <- 1L
 88 | 
 89 |   expect_identical(fct_encode_one_hot(x), expect)
 90 | })
 91 | 
 92 | test_that("errors on missing values", {
 93 |   x <- factor(c("a", NA))
 94 | 
 95 |   expect_snapshot(error = TRUE, {
 96 |     fct_encode_one_hot(x)
 97 |   })
 98 | })
 99 | 
100 | test_that("errors on non-factors", {
101 |   expect_snapshot(error = TRUE, {
102 |     fct_encode_one_hot(1)
103 |   })
104 | })
105 | 


--------------------------------------------------------------------------------
/tests/testthat/test-forge.R:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # run_forge()
3 | 
4 | test_that("`run_forge()` throws an informative default error", {
5 |   expect_snapshot(error = TRUE, run_forge(1))
6 | })
7 | 


--------------------------------------------------------------------------------
/tests/testthat/test-intercept.R:
--------------------------------------------------------------------------------
 1 | test_that("can add an intercept column", {
 2 |   x <- add_intercept_column(mtcars)
 3 | 
 4 |   expect_equal(colnames(x)[1], "(Intercept)")
 5 |   expect_type(x[, 1], "integer")
 6 | 
 7 |   xx <- add_intercept_column(as.matrix(mtcars))
 8 | 
 9 |   expect_matrix(xx)
10 |   expect_equal(colnames(xx)[1], "(Intercept)")
11 | })
12 | 
13 | test_that("existing intercepts are skipped with a warning", {
14 |   x <- add_intercept_column(mtcars)
15 | 
16 |   expect_snapshot({
17 |     xx <- add_intercept_column(x)
18 |   })
19 | 
20 |   expect_equal(
21 |     xx,
22 |     x
23 |   )
24 | })
25 | 
26 | test_that("can change the intercept column name", {
27 |   x <- add_intercept_column(mtcars, name = "intercept")
28 | 
29 |   expect_equal(colnames(x)[1], "intercept")
30 | })
31 | 
32 | test_that("name can only be a single character", {
33 |   expect_snapshot(error = TRUE, {
34 |     add_intercept_column(mtcars, name = c("x", "y"))
35 |   })
36 |   expect_snapshot(error = TRUE, {
37 |     add_intercept_column(mtcars, name = 1)
38 |   })
39 | })
40 | 
41 | test_that("data has to be a data frame or matrix", {
42 |   expect_snapshot(error = TRUE, {
43 |     add_intercept_column(1)
44 |   })
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test-levels.R:
--------------------------------------------------------------------------------
 1 | test_that("can extract levels", {
 2 |   x <- data.frame(
 3 |     x = factor(letters[1:5]),
 4 |     y = factor(letters[6:10])
 5 |   )
 6 | 
 7 |   expect_equal(
 8 |     get_levels(x),
 9 |     list(
10 |       x = letters[1:5],
11 |       y = letters[6:10]
12 |     )
13 |   )
14 | })
15 | 
16 | test_that("non-factors are ignored", {
17 |   x <- data.frame(
18 |     x = factor(letters[1:5]),
19 |     y = 6:10
20 |   )
21 | 
22 |   expect_equal(
23 |     get_levels(x),
24 |     list(
25 |       x = letters[1:5]
26 |     )
27 |   )
28 | })
29 | 
30 | test_that("NULL returned when no factors", {
31 |   x <- data.frame(
32 |     x = 1:5,
33 |     y = 6:10
34 |   )
35 | 
36 |   expect_equal(
37 |     get_levels(x),
38 |     NULL
39 |   )
40 | })
41 | 
42 | test_that("Only data frames are allowed, others return NULL", {
43 |   x <- matrix(
44 |     1:5
45 |   )
46 | 
47 |   expect_equal(
48 |     get_levels(x),
49 |     NULL
50 |   )
51 | })
52 | 
53 | test_that("Multivariate columns are skipped over", {
54 |   x <- data.frame(x = factor(letters[1:5]))
55 |   x$y <- matrix(1:10, ncol = 2, dimnames = list(NULL, c("c1", "c2")))
56 | 
57 |   expect_equal(
58 |     get_levels(x),
59 |     list(x = letters[1:5])
60 |   )
61 | })
62 | 
63 | test_that("Can extract levels from an outcome", {
64 |   expect_equal(
65 |     get_outcome_levels(1:5),
66 |     NULL
67 |   )
68 | 
69 |   expect_snapshot(
70 |     error = TRUE,
71 |     get_outcome_levels("a")
72 |   )
73 | 
74 |   expect_equal(
75 |     get_outcome_levels(factor("a")),
76 |     list(.outcome = "a")
77 |   )
78 | 
79 |   expect_equal(
80 |     get_outcome_levels(matrix(1:5, dimnames = list(NULL, "c1"))),
81 |     NULL
82 |   )
83 | 
84 |   expect_equal(
85 |     get_outcome_levels(data.frame(x = factor(letters[1:5]))),
86 |     list(x = letters[1:5])
87 |   )
88 | })
89 | 


--------------------------------------------------------------------------------
/tests/testthat/test-model-matrix.R:
--------------------------------------------------------------------------------
 1 | test_that("`model_matrix()` strips all attributes from the `model.matrix()` results", {
 2 |   framed <- model_frame(Sepal.Width ~ Species + 0, iris)
 3 |   matrix <- model_matrix(framed$terms, framed$data)
 4 | 
 5 |   # Mock what `model_matrix()` does by stripping all attributes
 6 |   f <- Sepal.Width ~ Species + 0
 7 |   expect <- model.matrix(f, model.frame(f, iris))
 8 |   expect <- expect[, 1, drop = TRUE]
 9 |   attributes(expect) <- NULL
10 | 
11 |   # `tibble:::matrixToDataFrame()` would propagate any attributes besides
12 |   # column names to each individual column. `model.matrix()` would have
13 |   # attached "assign" and "contrasts" attributes here
14 |   expect_identical(matrix$Speciessetosa, expect)
15 | })
16 | 
17 | test_that("`contr_one_hot()` input checks", {
18 |   expect_snapshot(contr_one_hot(n = 2, sparse = TRUE))
19 |   expect_snapshot(contr_one_hot(n = 2, contrasts = FALSE))
20 | 
21 |   expect_snapshot(error = TRUE, {
22 |     contr_one_hot(n = 1:2)
23 |   })
24 |   expect_snapshot(error = TRUE, {
25 |     contr_one_hot(n = list(1:2))
26 |   })
27 |   expect_snapshot(error = TRUE, {
28 |     contr_one_hot(character(0))
29 |   })
30 |   expect_snapshot(error = TRUE, {
31 |     contr_one_hot(-1)
32 |   })
33 |   expect_snapshot(error = TRUE, {
34 |     contr_one_hot(list())
35 |   })
36 | })
37 | 
38 | test_that("one-hot encoding contrasts", {
39 |   contr_mat <- contr_one_hot(12)
40 |   expect_equal(colnames(contr_mat), paste(1:12))
41 |   expect_equal(rownames(contr_mat), paste(1:12))
42 |   expect_true(all(apply(contr_mat, 1, sum) == 1))
43 |   expect_true(all(apply(contr_mat, 2, sum) == 1))
44 | 
45 |   chr_contr_mat <- contr_one_hot(letters[1:12])
46 |   expect_equal(colnames(chr_contr_mat), letters[1:12])
47 |   expect_equal(rownames(chr_contr_mat), letters[1:12])
48 |   expect_true(all(apply(chr_contr_mat, 1, sum) == 1))
49 |   expect_true(all(apply(chr_contr_mat, 2, sum) == 1))
50 | })
51 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mold-xy.R:
--------------------------------------------------------------------------------
 1 | test_that("unknown mold() inputs throw an error", {
 2 |   expect_snapshot(error = TRUE, {
 3 |     mold("hi")
 4 |   })
 5 | })
 6 | 
 7 | test_that("can use x-y mold interface", {
 8 |   sparse_bp <- default_xy_blueprint(composition = "dgCMatrix")
 9 |   matrix_bp <- default_xy_blueprint(composition = "matrix")
10 | 
11 |   x1 <- mold(iris[, "Sepal.Length", drop = FALSE], iris$Species)
12 |   x2 <- mold(
13 |     iris[, "Sepal.Length", drop = FALSE],
14 |     iris$Species,
15 |     blueprint = sparse_bp
16 |   )
17 |   x3 <- mold(
18 |     iris[, "Sepal.Length", drop = FALSE],
19 |     iris$Species,
20 |     blueprint = matrix_bp
21 |   )
22 | 
23 |   expect_s3_class(x1$predictors, "tbl_df")
24 |   expect_s4_class(x2$predictors, "dgCMatrix")
25 |   expect_matrix(x3$predictors)
26 | 
27 |   expect_equal(colnames(x1$predictors), "Sepal.Length")
28 |   expect_equal(colnames(x2$predictors), "Sepal.Length")
29 |   expect_equal(colnames(x3$predictors), "Sepal.Length")
30 | 
31 |   expect_s3_class(x1$outcomes, "tbl_df")
32 |   expect_s3_class(x2$outcomes, "tbl_df")
33 |   expect_s3_class(x3$outcomes, "tbl_df")
34 |   expect_equal(colnames(x1$outcomes), ".outcome")
35 |   expect_equal(colnames(x2$outcomes), ".outcome")
36 |   expect_equal(colnames(x3$outcomes), ".outcome")
37 |   expect_s3_class(x1$blueprint, "default_xy_blueprint")
38 | })
39 | 
40 | test_that("xy intercepts can be added", {
41 |   x1 <- mold(
42 |     iris[, "Sepal.Length", drop = FALSE],
43 |     iris$Species,
44 |     blueprint = default_xy_blueprint(intercept = TRUE)
45 |   )
46 |   x2 <- mold(
47 |     iris[, "Sepal.Length", drop = FALSE],
48 |     iris$Species,
49 |     blueprint = default_xy_blueprint(intercept = TRUE, composition = "matrix")
50 |   )
51 | 
52 |   expect_true("(Intercept)" %in% colnames(x1$predictors))
53 |   expect_true("(Intercept)" %in% colnames(x2$predictors))
54 | })
55 | 
56 | test_that("cannot pass anything in the dots", {
57 |   expect_snapshot(error = TRUE, {
58 |     mold(
59 |       iris[, "Sepal.Length", drop = FALSE],
60 |       iris$Species,
61 |       z = "in the dots"
62 |     )
63 |   })
64 |   expect_snapshot(error = TRUE, {
65 |     mold(
66 |       iris[, "Sepal.Length", drop = FALSE],
67 |       iris$Species,
68 |       blueprint = default_xy_blueprint(composition = "dgCMatrix"),
69 |       z = "in the dots"
70 |     )
71 |   })
72 | })
73 | 
74 | test_that("`NULL` y value returns a 0 column tibble for `outcomes`", {
75 |   x <- mold(iris, y = NULL)
76 | 
77 |   expect_equal(nrow(x$outcomes), 150)
78 |   expect_equal(ncol(x$outcomes), 0)
79 | })
80 | 
81 | test_that("Missing y value returns a 0 column / 0 row tibble for `ptype`", {
82 |   x <- mold(iris, y = NULL)
83 |   expect_equal(x$blueprint$ptypes$outcomes, tibble())
84 | })
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mold.R:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # run_mold()
3 | 
4 | test_that("`run_mold()` throws an informative default error", {
5 |   expect_snapshot(error = TRUE, run_mold(1))
6 | })
7 | 


--------------------------------------------------------------------------------
/tests/testthat/test-print.R:
--------------------------------------------------------------------------------
 1 | test_that("print - formula", {
 2 |   expect_snapshot({
 3 |     mold(Species ~ Sepal.Length, iris)$blueprint
 4 |     mold(~Sepal.Length, iris)$blueprint
 5 |   })
 6 | })
 7 | 
 8 | test_that("print - default", {
 9 |   expect_snapshot({
10 |     mold(iris[, c("Sepal.Length"), drop = FALSE], iris$Species)$blueprint
11 |   })
12 | })
13 | 
14 | test_that("print - recipe", {
15 |   skip_if_not_installed("recipes")
16 | 
17 |   expect_snapshot({
18 |     mold(recipes::recipe(Species ~ Sepal.Length, iris), iris)$blueprint
19 |   })
20 | })
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-quantile-pred.R:
--------------------------------------------------------------------------------
  1 | test_that("quantile_pred error types", {
  2 |   expect_snapshot(
  3 |     error = TRUE,
  4 |     quantile_pred(1:10, 1:4 / 5)
  5 |   )
  6 |   expect_snapshot(
  7 |     error = TRUE,
  8 |     quantile_pred(matrix(1:20, 5), -1:4 / 5)
  9 |   )
 10 |   expect_snapshot(
 11 |     error = TRUE,
 12 |     quantile_pred(matrix(1:20, 5), 1:5 / 6)
 13 |   )
 14 |   expect_snapshot(
 15 |     error = TRUE,
 16 |     quantile_pred(matrix(1:20, 5), 4:1 / 5)
 17 |   )
 18 | })
 19 | 
 20 | test_that("quantile levels are checked", {
 21 |   expect_snapshot(error = TRUE, {
 22 |     quantile_pred(matrix(1:20, 5), quantile_levels = NULL)
 23 |   })
 24 |   expect_snapshot(error = TRUE, {
 25 |     quantile_pred(matrix(1:20, 5), quantile_levels = c(0.7, 0.7, 0.7))
 26 |   })
 27 |   expect_snapshot(error = TRUE, {
 28 |     quantile_pred(
 29 |       matrix(1:20, 5),
 30 |       quantile_levels = c(rep(0.7, 2), rep(0.8, 3))
 31 |     )
 32 |   })
 33 |   expect_snapshot(error = TRUE, {
 34 |     quantile_pred(matrix(1:20, 5), quantile_levels = c(0.8, 0.7))
 35 |   })
 36 | })
 37 | 
 38 | test_that("quantile_pred outputs", {
 39 |   v <- quantile_pred(matrix(1:20, 5), 1:4 / 5)
 40 |   expect_s3_class(v, "quantile_pred")
 41 |   expect_identical(attr(v, "quantile_levels"), 1:4 / 5)
 42 |   expect_identical(
 43 |     vctrs::vec_data(v),
 44 |     lapply(vctrs::vec_chop(matrix(1:20, 5)), drop)
 45 |   )
 46 | })
 47 | 
 48 | test_that("extract_quantile_levels", {
 49 |   v <- quantile_pred(matrix(1:20, 5), 1:4 / 5)
 50 |   expect_identical(extract_quantile_levels(v), 1:4 / 5)
 51 | 
 52 |   expect_snapshot(
 53 |     error = TRUE,
 54 |     extract_quantile_levels(1:10)
 55 |   )
 56 | })
 57 | 
 58 | test_that("quantile_pred formatting", {
 59 |   # multiple quantiles
 60 |   v <- quantile_pred(matrix(1:20, 5), 1:4 / 5)
 61 |   expect_snapshot(v)
 62 |   expect_snapshot(quantile_pred(matrix(1:18, 9), c(1 / 3, 2 / 3)))
 63 |   expect_snapshot(
 64 |     quantile_pred(matrix(seq(0.01, 1 - 0.01, length.out = 6), 3), c(.2, .8))
 65 |   )
 66 |   expect_snapshot(tibble(qntls = v))
 67 |   m <- matrix(1:20, 5)
 68 |   m[2, 3] <- NA
 69 |   m[4, 2] <- NA
 70 |   expect_snapshot(quantile_pred(m, 1:4 / 5))
 71 | 
 72 |   # single quantile
 73 |   m <- matrix(1:5)
 74 |   one_quantile <- quantile_pred(m, 5 / 9)
 75 |   expect_snapshot(one_quantile)
 76 |   expect_snapshot(tibble(qntls = one_quantile))
 77 |   m[2] <- NA
 78 |   expect_snapshot(quantile_pred(m, 5 / 9))
 79 | 
 80 |   set.seed(393)
 81 |   v <- quantile_pred(matrix(exp(rnorm(20)), ncol = 4), 1:4 / 5)
 82 |   expect_snapshot(format(v))
 83 |   expect_snapshot(format(v, digits = 5))
 84 | })
 85 | 
 86 | test_that("as_tibble() for quantile_pred", {
 87 |   v <- quantile_pred(matrix(1:20, 5), 1:4 / 5)
 88 |   tbl <- as_tibble(v)
 89 |   expect_s3_class(tbl, c("tbl_df", "tbl", "data.frame"))
 90 |   expect_named(tbl, c(".pred_quantile", ".quantile_levels", ".row"))
 91 |   expect_true(nrow(tbl) == 20)
 92 | })
 93 | 
 94 | test_that("as.matrix() for quantile_pred", {
 95 |   x <- matrix(1:20, 5)
 96 |   v <- quantile_pred(x, 1:4 / 5)
 97 |   m <- as.matrix(v)
 98 |   expect_true(is.matrix(m))
 99 |   expect_identical(m, x)
100 | })
101 | 


--------------------------------------------------------------------------------
/tests/testthat/test-scream.R:
--------------------------------------------------------------------------------
 1 | test_that("novel levels can be ignored", {
 2 |   dat <- data.frame(
 3 |     y = 1:4,
 4 |     f = factor(letters[1:4])
 5 |   )
 6 | 
 7 |   new <- data.frame(
 8 |     y = 1:5,
 9 |     f = factor(letters[1:5])
10 |   )
11 | 
12 |   ptype <- vec_ptype(dat)
13 | 
14 |   # Silent
15 |   expect_snapshot({
16 |     x <- scream(new, ptype, allow_novel_levels = TRUE)
17 |   })
18 | 
19 |   expect_equal(levels(x$f), letters[1:5])
20 | })
21 | 
22 | test_that("novel levels in a new character vector can be ignored", {
23 |   dat <- data.frame(
24 |     y = 1:4,
25 |     f = factor(letters[1:4])
26 |   )
27 | 
28 |   new <- data.frame(
29 |     y = 1:5,
30 |     f = letters[1:5],
31 |     stringsAsFactors = FALSE
32 |   )
33 | 
34 |   ptype <- vec_ptype(dat)
35 | 
36 |   # Silent
37 |   expect_snapshot({
38 |     x <- scream(new, ptype, allow_novel_levels = TRUE)
39 |   })
40 | 
41 |   expect_equal(levels(x$f), new$f)
42 | })
43 | 
44 | test_that("ignoring novel levels still passes through incompatible classes", {
45 |   dat <- data.frame(f = factor(letters[1:4]))
46 |   new <- data.frame(f = 1:5)
47 | 
48 |   ptype <- vec_ptype(dat)
49 | 
50 |   expect_error(
51 |     scream(new, ptype, allow_novel_levels = TRUE),
52 |     class = "vctrs_error_incompatible_type"
53 |   )
54 | })
55 | 


--------------------------------------------------------------------------------
/tests/testthat/test-shrink.R:
--------------------------------------------------------------------------------
 1 | test_that("an outcome can also be a predictor and is only returned once", {
 2 |   x <- mold(Sepal.Length ~ Sepal.Length, iris)
 3 | 
 4 |   expect_equal(
 5 |     colnames(shrink(iris, x$blueprint$ptypes$predictors)),
 6 |     "Sepal.Length"
 7 |   )
 8 | })
 9 | 
10 | test_that("`data` must be data-like", {
11 |   ptype <- data.frame(x = integer())
12 | 
13 |   expect_snapshot(error = TRUE, {
14 |     shrink(1, ptype)
15 |   })
16 | })
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test-standardize.R:
--------------------------------------------------------------------------------
 1 | test_that("standardize - factor", {
 2 |   std <- standardize(factor(letters[1:5]))
 3 |   expect_s3_class(std, "tbl_df")
 4 |   expect_equal(colnames(std), ".outcome")
 5 | })
 6 | 
 7 | test_that("standardize - numeric", {
 8 |   std <- standardize(1:5)
 9 |   expect_s3_class(std, "tbl_df")
10 |   expect_equal(colnames(std), ".outcome")
11 | 
12 |   std2 <- standardize(as.double(1:5))
13 |   expect_s3_class(std2, "tbl_df")
14 |   expect_equal(colnames(std2), ".outcome")
15 | })
16 | 
17 | test_that("standardize - matrix", {
18 |   mat_bad <- matrix(1:10, ncol = 2)
19 |   mat_bad2 <- matrix("a", dimnames = list(NULL, "c1"))
20 | 
21 |   mat_good <- mat_bad
22 |   colnames(mat_good) <- c("a", "b")
23 | 
24 |   expect_snapshot(error = TRUE, {
25 |     standardize(mat_bad)
26 |   })
27 | 
28 |   expect_snapshot(error = TRUE, {
29 |     standardize(mat_bad2)
30 |   })
31 | 
32 |   std <- standardize(mat_good)
33 |   expect_s3_class(std, "tbl_df")
34 |   expect_equal(colnames(std), c("a", "b"))
35 | })
36 | 
37 | test_that("standardize - array", {
38 |   bad <- array(1:10, c(5, 2))
39 | 
40 |   expect_snapshot(error = TRUE, {
41 |     standardize(bad)
42 |   })
43 | 
44 |   bad2 <- array("a", c(1, 1), dimnames = list(NULL, "c1"))
45 | 
46 |   expect_snapshot(error = TRUE, {
47 |     standardize(bad2)
48 |   })
49 | 
50 |   good <- bad
51 |   colnames(good) <- c("a", "b")
52 | 
53 |   std <- standardize(good)
54 |   expect_s3_class(std, "tbl_df")
55 |   expect_equal(colnames(std), c("a", "b"))
56 | 
57 |   good2 <- array(1:5)
58 | 
59 |   std2 <- standardize(good2)
60 |   expect_s3_class(std2, "tbl_df")
61 |   expect_equal(colnames(std2), ".outcome")
62 | })
63 | 
64 | test_that("standardize - data.frame", {
65 |   bad <- data.frame(1:5, 6:10)
66 |   colnames(bad) <- NULL
67 | 
68 |   expect_snapshot(error = TRUE, {
69 |     standardize(bad)
70 |   })
71 | 
72 |   bad2 <- data.frame(x = "a", stringsAsFactors = FALSE)
73 | 
74 |   expect_snapshot(error = TRUE, standardize(bad2))
75 | 
76 |   bad3 <- data.frame(x = "a", y = "b", stringsAsFactors = FALSE)
77 | 
78 |   expect_snapshot(error = TRUE, standardize(bad3))
79 | 
80 |   good <- bad
81 |   colnames(good) <- c("a", "b")
82 | 
83 |   std <- standardize(good)
84 |   expect_s3_class(std, "tbl_df")
85 |   expect_equal(colnames(std), c("a", "b"))
86 | 
87 |   good2 <- data.frame(x = factor(letters[1:5]), y = factor(letters[6:10]))
88 | 
89 |   std2 <- standardize(good2)
90 |   expect_s3_class(std2, "tbl_df")
91 |   expect_equal(colnames(std2), c("x", "y"))
92 | })
93 | 
94 | test_that("standardize - unknown", {
95 |   expect_snapshot(error = TRUE, standardize("hi"))
96 |   expect_snapshot(error = TRUE, standardize(Sys.time()))
97 | })
98 | 


--------------------------------------------------------------------------------
/tests/testthat/test-tune.R:
--------------------------------------------------------------------------------
 1 | test_that("tune creates a call", {
 2 |   expect_true(is.call(tune()))
 3 |   expect_true(is.call(tune("foo")))
 4 | })
 5 | 
 6 | test_that("tune `id` value", {
 7 |   expect_identical(tune(), call("tune"))
 8 |   expect_identical(tune(""), call("tune"))
 9 |   expect_identical(tune("foo"), call("tune", "foo"))
10 | })
11 | 
12 | test_that("`id` is validated", {
13 |   expect_snapshot(error = TRUE, tune(1))
14 |   expect_snapshot(error = TRUE, tune(c("x", "y")))
15 |   expect_snapshot(error = TRUE, tune(NA_character_))
16 | })
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test-use.R:
--------------------------------------------------------------------------------
 1 | test_that("can create a modeling package", {
 2 |   skip_on_cran()
 3 |   skip_if_not_installed("recipes")
 4 | 
 5 |   local_options(usethis.quiet = TRUE)
 6 | 
 7 |   dir <- withr::local_tempdir("model")
 8 | 
 9 |   model <- "linear_regression"
10 | 
11 |   # `usethis.quiet = TRUE` silences most of the messages, but there is an
12 |   # unavoidable `i Loading model` that we get from devtools if we don't do this
13 |   suppressMessages({
14 |     create_modeling_package(dir, model, open = FALSE)
15 |   })
16 | 
17 |   top_level_files <- list.files(dir)
18 |   script_files <- list.files(file.path(dir, "R"))
19 | 
20 |   expect_true("DESCRIPTION" %in% top_level_files)
21 |   expect_true("R" %in% top_level_files)
22 |   expect_true("man" %in% top_level_files)
23 |   expect_true("NAMESPACE" %in% top_level_files)
24 | 
25 |   expect_true(glue::glue("{model}-constructor.R") %in% script_files)
26 |   expect_true(glue::glue("{model}-fit.R") %in% script_files)
27 |   expect_true(glue::glue("{model}-predict.R") %in% script_files)
28 | })
29 | 
30 | test_that("can add a second model to a modeling package", {
31 |   skip_on_cran()
32 |   skip_if_not_installed("recipes")
33 | 
34 |   local_options(usethis.quiet = TRUE)
35 | 
36 |   dir <- withr::local_tempdir("model")
37 | 
38 |   model1 <- "linear_regression"
39 |   model2 <- "random_forest"
40 | 
41 |   # `usethis.quiet = TRUE` silences most of the messages, but there is an
42 |   # unavoidable `i Loading model` that we get from devtools if we don't do this
43 |   suppressMessages({
44 |     create_modeling_package(dir, model1, open = FALSE)
45 |   })
46 | 
47 |   usethis::with_project(dir, use_modeling_files(model2))
48 | 
49 |   script_files <- list.files(file.path(dir, "R"))
50 | 
51 |   expect_true(glue::glue("{model1}-constructor.R") %in% script_files)
52 |   expect_true(glue::glue("{model1}-fit.R") %in% script_files)
53 |   expect_true(glue::glue("{model1}-predict.R") %in% script_files)
54 | 
55 |   expect_true(glue::glue("{model2}-constructor.R") %in% script_files)
56 |   expect_true(glue::glue("{model2}-fit.R") %in% script_files)
57 |   expect_true(glue::glue("{model2}-predict.R") %in% script_files)
58 | })
59 | 
60 | test_that("no `model` aborts normally", {
61 |   expect_snapshot(error = TRUE, create_modeling_package(path = "my/path"))
62 | })
63 | 
64 | test_that("no `path` aborts normally", {
65 |   expect_snapshot(error = TRUE, create_modeling_package(model = "my_model"))
66 | })
67 | 
68 | test_that("`model` can only be a single string", {
69 |   skip_if_not_installed("recipes")
70 | 
71 |   expect_snapshot(
72 |     error = TRUE,
73 |     create_modeling_package(path = "my/path", model = c("model1", "model2"))
74 |   )
75 |   expect_snapshot(
76 |     error = TRUE,
77 |     create_modeling_package(path = "my/path", model = 1)
78 |   )
79 |   expect_snapshot(
80 |     error = TRUE,
81 |     create_modeling_package(path = "my/path", model = "model with space")
82 |   )
83 | })
84 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validation.R:
--------------------------------------------------------------------------------
  1 | test_that("validate_outcomes_are_univariate()", {
  2 |   expect_silent(validate_outcomes_are_univariate(data.frame(x = 1)))
  3 | 
  4 |   expect_silent(validate_outcomes_are_univariate(matrix()))
  5 | 
  6 |   expect_silent(validate_outcomes_are_univariate(1))
  7 | 
  8 |   expect_snapshot(
  9 |     error = TRUE,
 10 |     validate_outcomes_are_univariate(iris)
 11 |   )
 12 | })
 13 | 
 14 | test_that("validate_outcomes_are_numeric()", {
 15 |   expect_silent(
 16 |     validate_outcomes_are_numeric(mtcars)
 17 |   )
 18 | 
 19 |   expect_snapshot(
 20 |     error = TRUE,
 21 |     validate_outcomes_are_numeric(iris)
 22 |   )
 23 | 
 24 |   date <- as.POSIXct(as.POSIXlt(as.Date("2019-01-01")))
 25 |   x <- data.frame(x = date, y = factor("hi"))
 26 | 
 27 |   expect_snapshot(
 28 |     error = TRUE,
 29 |     validate_outcomes_are_numeric(x)
 30 |   )
 31 | })
 32 | 
 33 | test_that("validate_no_formula_duplication()", {
 34 |   expect_silent(validate_no_formula_duplication(y ~ x))
 35 | 
 36 |   expect_snapshot(
 37 |     error = TRUE,
 38 |     validate_no_formula_duplication(y ~ y)
 39 |   )
 40 | 
 41 |   expect_silent(validate_no_formula_duplication(y ~ log(y)))
 42 | 
 43 |   expect_snapshot(
 44 |     error = TRUE,
 45 |     validate_no_formula_duplication(y ~ log(y), original = TRUE)
 46 |   )
 47 | 
 48 |   expect_snapshot(
 49 |     error = TRUE,
 50 |     validate_no_formula_duplication(y + x ~ y + x)
 51 |   )
 52 | 
 53 |   expect_silent(validate_no_formula_duplication(y ~ .))
 54 | 
 55 |   expect_snapshot(
 56 |     error = TRUE,
 57 |     validate_no_formula_duplication(y ~ . + y)
 58 |   )
 59 | 
 60 |   # offset() is a weird special case but this is ok
 61 |   expect_silent(validate_no_formula_duplication(offset(y) ~ offset(y)))
 62 | 
 63 |   expect_snapshot(
 64 |     error = TRUE,
 65 |     validate_no_formula_duplication(y ~ offset(y), original = TRUE)
 66 |   )
 67 | })
 68 | 
 69 | test_that("validate_outcomes_are_factors()", {
 70 |   expect_silent(
 71 |     validate_outcomes_are_factors(data.frame(x = factor(c("A", "B"))))
 72 |   )
 73 | 
 74 |   date <- as.POSIXct(as.POSIXlt(as.Date("2019-01-01")))
 75 |   x <- data.frame(x = date, y = "hi", stringsAsFactors = FALSE)
 76 | 
 77 |   expect_snapshot(
 78 |     error = TRUE,
 79 |     validate_outcomes_are_factors(x)
 80 |   )
 81 | })
 82 | 
 83 | test_that("validate_outcomes_are_binary()", {
 84 |   expect_silent(
 85 |     validate_outcomes_are_binary(data.frame(x = factor(c("A", "B"))))
 86 |   )
 87 | 
 88 |   expect_snapshot(
 89 |     error = TRUE,
 90 |     validate_outcomes_are_binary(iris)
 91 |   )
 92 | })
 93 | 
 94 | test_that("validate_predictors_are_numeric()", {
 95 |   expect_silent(
 96 |     validate_predictors_are_numeric(mtcars)
 97 |   )
 98 | 
 99 |   expect_snapshot(
100 |     error = TRUE,
101 |     validate_predictors_are_numeric(iris)
102 |   )
103 | 
104 |   date <- as.POSIXct(as.POSIXlt(as.Date("2019-01-01")))
105 |   x <- data.frame(x = date, y = factor("hi"))
106 | 
107 |   expect_snapshot(
108 |     error = TRUE,
109 |     validate_predictors_are_numeric(x)
110 |   )
111 | })
112 | 
113 | test_that("validate_prediction_size()", {
114 |   expect_silent(
115 |     validate_prediction_size(mtcars, mtcars)
116 |   )
117 | 
118 |   expect_snapshot(
119 |     error = TRUE,
120 |     validate_prediction_size(mtcars[1:5, ], mtcars)
121 |   )
122 | })
123 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 
4 | /.quarto/
5 | 


--------------------------------------------------------------------------------