├── .Rbuildignore ├── .github ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md └── workflows │ ├── R-CMD-check-hard.yaml │ ├── R-CMD-check.yaml │ ├── lock.yaml │ ├── pkgdown.yaml │ ├── pr-commands.yaml │ └── test-coverage.yaml ├── .gitignore ├── .vscode ├── extensions.json └── settings.json ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── blueprint-formula-default.R ├── blueprint-formula.R ├── blueprint-recipe-default.R ├── blueprint-recipe.R ├── blueprint-xy-default.R ├── blueprint-xy.R ├── blueprint.R ├── case-weights.R ├── classes.R ├── compost.R ├── constructor.R ├── delete-response.R ├── encoding.R ├── extract.R ├── forge.R ├── hardhat-example-data.R ├── hardhat-package.R ├── import-standalone-obj-type.R ├── import-standalone-purrr.R ├── import-standalone-types-check.R ├── intercept.R ├── levels.R ├── model-frame.R ├── model-matrix.R ├── model-offset.R ├── mold.R ├── new.R ├── print.R ├── ptype.R ├── quantile-pred.R ├── recompose.R ├── scream.R ├── shrink.R ├── sparsevctrs.R ├── spruce.R ├── standardize.R ├── table.R ├── tune.R ├── use.R ├── util.R └── validation.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── air.toml ├── codecov.yml ├── cran-comments.md ├── data └── hardhat-example-data.RData ├── graphics ├── factor-handling │ ├── factor-handling.graffle │ └── factor-handling.png ├── modeling-package-design.graffle └── modeling-package-design │ ├── Fitting.png │ └── Prediction.png ├── hardhat.Rproj ├── inst └── templates │ └── R │ ├── constructor.R │ ├── fit.R │ └── predict.R ├── man-roxygen └── section-validation.R ├── man ├── add_intercept_column.Rd ├── check_quantile_levels.Rd ├── contr_one_hot.Rd ├── default_formula_blueprint.Rd ├── default_recipe_blueprint.Rd ├── default_xy_blueprint.Rd ├── delete_response.Rd ├── extract_ptype.Rd ├── fct_encode_one_hot.Rd ├── figures │ ├── Fitting.png │ ├── Prediction.png │ ├── factor-handling.png │ ├── lifecycle-archived.svg │ ├── lifecycle-defunct.svg │ ├── lifecycle-deprecated.svg │ ├── lifecycle-experimental.svg │ ├── lifecycle-maturing.svg │ ├── lifecycle-questioning.svg │ ├── lifecycle-soft-deprecated.svg │ ├── lifecycle-stable.svg │ ├── lifecycle-superseded.svg │ └── logo.png ├── forge.Rd ├── frequency_weights.Rd ├── get_data_classes.Rd ├── get_levels.Rd ├── hardhat-example-data.Rd ├── hardhat-extract.Rd ├── hardhat-package.Rd ├── importance_weights.Rd ├── is_blueprint.Rd ├── is_case_weights.Rd ├── is_frequency_weights.Rd ├── is_importance_weights.Rd ├── model_frame.Rd ├── model_matrix.Rd ├── model_offset.Rd ├── modeling-usethis.Rd ├── mold.Rd ├── new-blueprint.Rd ├── new-default-blueprint.Rd ├── new_case_weights.Rd ├── new_frequency_weights.Rd ├── new_importance_weights.Rd ├── new_model.Rd ├── quantile_pred.Rd ├── recompose.Rd ├── refresh_blueprint.Rd ├── rmd │ └── one-hot.Rmd ├── run-forge.Rd ├── run-mold.Rd ├── scream.Rd ├── shrink.Rd ├── spruce-multiple.Rd ├── spruce.Rd ├── standardize.Rd ├── tune.Rd ├── update_blueprint.Rd ├── validate_column_names.Rd ├── validate_no_formula_duplication.Rd ├── validate_outcomes_are_binary.Rd ├── validate_outcomes_are_factors.Rd ├── validate_outcomes_are_numeric.Rd ├── validate_outcomes_are_univariate.Rd ├── validate_prediction_size.Rd ├── validate_predictors_are_numeric.Rd └── weighted_table.Rd ├── pkgdown └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── revdep ├── .gitignore ├── README.md ├── cran.md ├── email.yml ├── failures.md └── problems.md ├── tests ├── testthat.R └── testthat │ ├── _snaps │ ├── blueprint-formula-default.md │ ├── blueprint-recipe.md │ ├── blueprint.md │ ├── case-weights.md │ ├── constructor.md │ ├── delete-response.md │ ├── encoding.md │ ├── forge-formula.md │ ├── forge-recipe.md │ ├── forge-xy.md │ ├── forge.md │ ├── intercept.md │ ├── levels.md │ ├── model-matrix.md │ ├── model-offset.md │ ├── mold-formula.md │ ├── mold-recipe.md │ ├── mold-xy.md │ ├── mold.md │ ├── print.md │ ├── quantile-pred.md │ ├── recompose.md │ ├── scream.md │ ├── shrink.md │ ├── spruce.md │ ├── standardize.md │ ├── table.md │ ├── tune.md │ ├── use.md │ └── validation.md │ ├── data-raw │ └── hardhat-0.2.0.R │ ├── data │ ├── hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds │ ├── hardhat-0.2.0-post-mold-recipe.rds │ └── hardhat-0.2.0-pre-mold-recipe.rds │ ├── helper-matrix.R │ ├── helper-sparsevctrs.R │ ├── test-blueprint-formula-default.R │ ├── test-blueprint-recipe.R │ ├── test-blueprint.R │ ├── test-case-weights.R │ ├── test-constructor.R │ ├── test-delete-response.R │ ├── test-encoding.R │ ├── test-forge-formula.R │ ├── test-forge-recipe.R │ ├── test-forge-xy.R │ ├── test-forge.R │ ├── test-intercept.R │ ├── test-levels.R │ ├── test-model-matrix.R │ ├── test-model-offset.R │ ├── test-mold-formula.R │ ├── test-mold-recipe.R │ ├── test-mold-xy.R │ ├── test-mold.R │ ├── test-print.R │ ├── test-quantile-pred.R │ ├── test-recompose.R │ ├── test-scream.R │ ├── test-shrink.R │ ├── test-spruce.R │ ├── test-standardize.R │ ├── test-table.R │ ├── test-tune.R │ ├── test-use.R │ └── test-validation.R └── vignettes ├── .gitignore ├── forge.Rmd ├── mold.Rmd └── package.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^pkgdown$ 2 | ^_pkgdown\.yml$ 3 | ^docs$ 4 | ^codecov\.yml$ 5 | ^README\.Rmd$ 6 | ^\.travis\.yml$ 7 | ^hardhat\.Rproj$ 8 | ^\.Rproj\.user$ 9 | ^man-roxygen$ 10 | ^graphics$ 11 | ^\.github$ 12 | ^LICENSE\.md$ 13 | ^cran-comments\.md$ 14 | ^CRAN-RELEASE$ 15 | ^revdep$ 16 | ^CODE_OF_CONDUCT\.md$ 17 | ^CRAN-SUBMISSION$ 18 | ^[\.]?air\.toml$ 19 | ^\.vscode$ 20 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to tidymodels 2 | 3 | For more detailed information about contributing to tidymodels packages, see our [**development contributing guide**](https://www.tidymodels.org/contribute/). 4 | 5 | ## Documentation 6 | 7 | Typos or grammatical errors in documentation may be edited directly using the GitHub web interface, as long as the changes are made in the _source_ file. 8 | 9 | * YES ✅: you edit a roxygen comment in an `.R` file in the `R/` directory. 10 | * NO 🚫: you edit an `.Rd` file in the `man/` directory. 11 | 12 | We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation. 13 | 14 | ## Code 15 | 16 | Before you submit 🎯 a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal. 17 | 18 | The [tidymodels packages](https://www.tidymodels.org/packages/) work together. Each package contains its own unit tests, while integration tests and other tests using all the packages are contained in [extratests](https://github.com/tidymodels/extratests). 19 | 20 | * We recommend that you create a Git branch for each pull request (PR). 21 | * Look at the build status before and after making changes. The `README` contains badges for any continuous integration services used by the package. 22 | * New code should follow the tidyverse [style guide](http://style.tidyverse.org). You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR. 23 | * For user-facing changes, add a bullet to the top of `NEWS.md` below the current development version header describing the changes made followed by your GitHub username, and links to relevant issue(s)/PR(s). 24 | * We use [testthat](https://cran.r-project.org/package=testthat). Contributions with test cases included are easier to accept. 25 | * If your contribution spans the use of more than one package, consider building [extratests](https://github.com/tidymodels/extratests) with your changes to check for breakages and/or adding new tests there. Let us know in your PR if you ran these extra tests. 26 | 27 | ### Code of Conduct 28 | 29 | This project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 30 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check-hard.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends, 5 | # Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never 6 | # installed, with the exception of testthat, knitr, and rmarkdown. The cache is 7 | # never used to avoid accidentally restoring a cache containing a suggested 8 | # dependency. 9 | on: 10 | push: 11 | branches: [main, master] 12 | pull_request: 13 | branches: [main, master] 14 | 15 | name: R-CMD-check-no-suggests.yaml 16 | 17 | permissions: read-all 18 | 19 | jobs: 20 | check-no-suggests: 21 | runs-on: ${{ matrix.config.os }} 22 | 23 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 24 | 25 | strategy: 26 | fail-fast: false 27 | matrix: 28 | config: 29 | - {os: ubuntu-latest, r: 'release'} 30 | 31 | env: 32 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 33 | R_KEEP_PKG_SOURCE: yes 34 | 35 | steps: 36 | - uses: actions/checkout@v4 37 | 38 | - uses: r-lib/actions/setup-pandoc@v2 39 | 40 | - uses: r-lib/actions/setup-r@v2 41 | with: 42 | r-version: ${{ matrix.config.r }} 43 | http-user-agent: ${{ matrix.config.http-user-agent }} 44 | use-public-rspm: true 45 | 46 | - uses: r-lib/actions/setup-r-dependencies@v2 47 | with: 48 | dependencies: '"hard"' 49 | cache: false 50 | extra-packages: | 51 | any::rcmdcheck 52 | any::testthat 53 | any::knitr 54 | any::rmarkdown 55 | needs: check 56 | 57 | - uses: r-lib/actions/check-r-package@v2 58 | with: 59 | upload-snapshots: true 60 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 61 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master] 10 | pull_request: 11 | 12 | name: R-CMD-check.yaml 13 | 14 | permissions: read-all 15 | 16 | jobs: 17 | R-CMD-check: 18 | runs-on: ${{ matrix.config.os }} 19 | 20 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | config: 26 | - {os: macos-latest, r: 'release'} 27 | 28 | - {os: windows-latest, r: 'release'} 29 | # use 4.0 or 4.1 to check with rtools40's older compiler 30 | - {os: windows-latest, r: 'oldrel-4'} 31 | 32 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 33 | - {os: ubuntu-latest, r: 'release'} 34 | - {os: ubuntu-latest, r: 'oldrel-1'} 35 | - {os: ubuntu-latest, r: 'oldrel-2'} 36 | - {os: ubuntu-latest, r: 'oldrel-3'} 37 | - {os: ubuntu-latest, r: 'oldrel-4'} 38 | 39 | env: 40 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 41 | R_KEEP_PKG_SOURCE: yes 42 | 43 | steps: 44 | - uses: actions/checkout@v4 45 | 46 | - uses: r-lib/actions/setup-pandoc@v2 47 | 48 | - uses: r-lib/actions/setup-r@v2 49 | with: 50 | r-version: ${{ matrix.config.r }} 51 | http-user-agent: ${{ matrix.config.http-user-agent }} 52 | use-public-rspm: true 53 | 54 | - uses: r-lib/actions/setup-r-dependencies@v2 55 | with: 56 | extra-packages: any::rcmdcheck 57 | needs: check 58 | 59 | - uses: r-lib/actions/check-r-package@v2 60 | with: 61 | upload-snapshots: true 62 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 63 | -------------------------------------------------------------------------------- /.github/workflows/lock.yaml: -------------------------------------------------------------------------------- 1 | name: 'Lock Threads' 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | jobs: 8 | lock: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: dessant/lock-threads@v2 12 | with: 13 | github-token: ${{ github.token }} 14 | issue-lock-inactive-days: '14' 15 | # issue-exclude-labels: '' 16 | # issue-lock-labels: 'outdated' 17 | issue-lock-comment: > 18 | This issue has been automatically locked. If you believe you have 19 | found a related problem, please file a new issue (with a reprex: 20 | ) and link to this issue. 21 | issue-lock-reason: '' 22 | pr-lock-inactive-days: '14' 23 | # pr-exclude-labels: 'wip' 24 | pr-lock-labels: '' 25 | pr-lock-comment: > 26 | This pull request has been automatically locked. If you believe you 27 | have found a related problem, please file a new issue (with a reprex: 28 | ) and link to this issue. 29 | pr-lock-reason: '' 30 | # process-only: 'issues' 31 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | release: 8 | types: [published] 9 | workflow_dispatch: 10 | 11 | name: pkgdown.yaml 12 | 13 | permissions: read-all 14 | 15 | jobs: 16 | pkgdown: 17 | runs-on: ubuntu-latest 18 | # Only restrict concurrency for non-PR jobs 19 | concurrency: 20 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 21 | env: 22 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 23 | permissions: 24 | contents: write 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | with: 32 | use-public-rspm: true 33 | 34 | - uses: r-lib/actions/setup-r-dependencies@v2 35 | with: 36 | extra-packages: any::pkgdown, local::. 37 | needs: website 38 | 39 | - name: Build site 40 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 41 | shell: Rscript {0} 42 | 43 | - name: Deploy to GitHub pages 🚀 44 | if: github.event_name != 'pull_request' 45 | uses: JamesIves/github-pages-deploy-action@v4.5.0 46 | with: 47 | clean: false 48 | branch: gh-pages 49 | folder: docs 50 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: pr-commands.yaml 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | document: 13 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 14 | name: document 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | permissions: 19 | contents: write 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: r-lib/actions/pr-fetch@v2 24 | with: 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::roxygen2 34 | needs: pr-document 35 | 36 | - name: Document 37 | run: roxygen2::roxygenise() 38 | shell: Rscript {0} 39 | 40 | - name: commit 41 | run: | 42 | git config --local user.name "$GITHUB_ACTOR" 43 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 44 | git add man/\* NAMESPACE 45 | git commit -m 'Document' 46 | 47 | - uses: r-lib/actions/pr-push@v2 48 | with: 49 | repo-token: ${{ secrets.GITHUB_TOKEN }} 50 | 51 | style: 52 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 53 | name: style 54 | runs-on: ubuntu-latest 55 | env: 56 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 57 | permissions: 58 | contents: write 59 | steps: 60 | - uses: actions/checkout@v4 61 | 62 | - uses: r-lib/actions/pr-fetch@v2 63 | with: 64 | repo-token: ${{ secrets.GITHUB_TOKEN }} 65 | 66 | - uses: r-lib/actions/setup-r@v2 67 | 68 | - name: Install dependencies 69 | run: install.packages("styler") 70 | shell: Rscript {0} 71 | 72 | - name: Style 73 | run: styler::style_pkg() 74 | shell: Rscript {0} 75 | 76 | - name: commit 77 | run: | 78 | git config --local user.name "$GITHUB_ACTOR" 79 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 80 | git add \*.R 81 | git commit -m 'Style' 82 | 83 | - uses: r-lib/actions/pr-push@v2 84 | with: 85 | repo-token: ${{ secrets.GITHUB_TOKEN }} 86 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: test-coverage.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | test-coverage: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: any::covr, any::xml2 28 | needs: coverage 29 | 30 | - name: Test coverage 31 | run: | 32 | cov <- covr::package_coverage( 33 | quiet = FALSE, 34 | clean = FALSE, 35 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 36 | ) 37 | print(cov) 38 | covr::to_cobertura(cov) 39 | shell: Rscript {0} 40 | 41 | - uses: codecov/codecov-action@v5 42 | with: 43 | # Fail if error if not on PR, or if on PR and token is given 44 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} 45 | files: ./cobertura.xml 46 | plugins: noop 47 | disable_search: true 48 | token: ${{ secrets.CODECOV_TOKEN }} 49 | 50 | - name: Show testthat output 51 | if: always() 52 | run: | 53 | ## -------------------------------------------------------------------- 54 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 55 | shell: bash 56 | 57 | - name: Upload test results 58 | if: failure() 59 | uses: actions/upload-artifact@v4 60 | with: 61 | name: coverage-test-failures 62 | path: ${{ runner.temp }}/package 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | inst/doc 2 | docs/ 3 | .Rhistory 4 | .RData 5 | .Rproj.user 6 | .DS_Store 7 | docs 8 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "Posit.air-vscode" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[r]": { 3 | "editor.formatOnSave": true, 4 | "editor.defaultFormatter": "Posit.air-vscode" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: hardhat 2 | Title: Construct Modeling Packages 3 | Version: 1.4.1.9000 4 | Authors@R: c( 5 | person("Hannah", "Frick", , "hannah@posit.co", role = c("aut", "cre"), 6 | comment = c(ORCID = "0000-0002-6049-5258")), 7 | person("Davis", "Vaughan", , "davis@posit.co", role = "aut"), 8 | person("Max", "Kuhn", , "max@posit.co", role = "aut"), 9 | person("Posit Software, PBC", role = c("cph", "fnd"), 10 | comment = c(ROR = "03wc8by49")) 11 | ) 12 | Description: Building modeling packages is hard. A large amount of effort 13 | generally goes into providing an implementation for a new method that 14 | is efficient, fast, and correct, but often less emphasis is put on the 15 | user interface. A good interface requires specialized knowledge about 16 | S3 methods and formulas, which the average package developer might not 17 | have. The goal of 'hardhat' is to reduce the burden around building 18 | new modeling packages by providing functionality for preprocessing, 19 | predicting, and validating input. 20 | License: MIT + file LICENSE 21 | URL: https://github.com/tidymodels/hardhat, https://hardhat.tidymodels.org 22 | BugReports: https://github.com/tidymodels/hardhat/issues 23 | Depends: 24 | R (>= 4.1) 25 | Imports: 26 | cli (>= 3.6.0), 27 | glue (>= 1.6.2), 28 | rlang (>= 1.1.0), 29 | sparsevctrs (>= 0.2.0), 30 | tibble (>= 3.2.1), 31 | vctrs (>= 0.6.0) 32 | Suggests: 33 | covr, 34 | crayon, 35 | devtools, 36 | knitr, 37 | Matrix, 38 | modeldata (>= 0.0.2), 39 | recipes (>= 1.0.5), 40 | rmarkdown (>= 2.3), 41 | roxygen2, 42 | testthat (>= 3.0.0), 43 | usethis (>= 2.1.5), 44 | withr (>= 3.0.0) 45 | VignetteBuilder: 46 | knitr 47 | Config/Needs/website: tidyverse/tidytemplate 48 | Config/testthat/edition: 3 49 | Config/usethis/last-upkeep: 2025-04-23 50 | Encoding: UTF-8 51 | LazyData: true 52 | Roxygen: list(markdown = TRUE) 53 | RoxygenNote: 7.3.2 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: hardhat authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 hardhat authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/blueprint-formula.R: -------------------------------------------------------------------------------- 1 | #' @param formula Either `NULL`, or a formula that specifies how the 2 | #' predictors and outcomes should be preprocessed. This argument is set 3 | #' automatically at [mold()] time. 4 | #' 5 | #' @param indicators A single character string. Control how factors are 6 | #' expanded into dummy variable indicator columns. One of: 7 | #' 8 | #' - `"traditional"` - The default. Create dummy variables using the 9 | #' traditional [model.matrix()] infrastructure. Generally this creates 10 | #' `K - 1` indicator columns for each factor, where `K` is the number of 11 | #' levels in that factor. 12 | #' 13 | #' - `"none"` - Leave factor variables alone. No expansion is done. 14 | #' 15 | #' - `"one_hot"` - Create dummy variables using a one-hot encoding approach 16 | #' that expands unordered factors into all `K` indicator columns, rather than 17 | #' `K - 1`. 18 | #' 19 | #' @rdname new-blueprint 20 | #' @export 21 | new_formula_blueprint <- function( 22 | intercept = FALSE, 23 | allow_novel_levels = FALSE, 24 | ptypes = NULL, 25 | formula = NULL, 26 | indicators = "traditional", 27 | composition = "tibble", 28 | ..., 29 | subclass = character() 30 | ) { 31 | check_formula(formula, allow_null = TRUE) 32 | check_indicators(indicators) 33 | 34 | new_blueprint( 35 | intercept = intercept, 36 | allow_novel_levels = allow_novel_levels, 37 | ptypes = ptypes, 38 | formula = formula, 39 | indicators = indicators, 40 | composition = composition, 41 | ..., 42 | subclass = c(subclass, "formula_blueprint") 43 | ) 44 | } 45 | 46 | #' @export 47 | refresh_blueprint.formula_blueprint <- function(blueprint) { 48 | do.call(new_formula_blueprint, as.list(blueprint)) 49 | } 50 | 51 | check_formula_blueprint <- function( 52 | x, 53 | ..., 54 | arg = caller_arg(x), 55 | call = caller_env() 56 | ) { 57 | check_inherits(x, "formula_blueprint", arg = arg, call = call) 58 | } 59 | 60 | # ------------------------------------------------------------------------------ 61 | 62 | check_indicators <- function(indicators, error_call = caller_env()) { 63 | arg_match0( 64 | arg = indicators, 65 | values = c("traditional", "none", "one_hot"), 66 | error_call = error_call 67 | ) 68 | } 69 | -------------------------------------------------------------------------------- /R/blueprint-recipe.R: -------------------------------------------------------------------------------- 1 | #' @param recipe Either `NULL`, or an unprepped recipe. This argument is set 2 | #' automatically at [mold()] time. 3 | #' 4 | #' @param fresh Should already trained operations be re-trained when `prep()` is 5 | #' called? 6 | #' 7 | #' @param strings_as_factors Should character columns be converted to factors 8 | #' when `prep()` is called? 9 | #' 10 | #' @rdname new-blueprint 11 | #' @export 12 | new_recipe_blueprint <- function( 13 | intercept = FALSE, 14 | allow_novel_levels = FALSE, 15 | fresh = TRUE, 16 | strings_as_factors = TRUE, 17 | composition = "tibble", 18 | ptypes = NULL, 19 | recipe = NULL, 20 | ..., 21 | subclass = character() 22 | ) { 23 | check_bool(fresh) 24 | check_bool(strings_as_factors) 25 | check_recipe(recipe, allow_null = TRUE) 26 | 27 | new_blueprint( 28 | intercept = intercept, 29 | allow_novel_levels = allow_novel_levels, 30 | fresh = fresh, 31 | strings_as_factors = strings_as_factors, 32 | composition = composition, 33 | ptypes = ptypes, 34 | recipe = recipe, 35 | ..., 36 | subclass = c(subclass, "recipe_blueprint") 37 | ) 38 | } 39 | 40 | #' @export 41 | refresh_blueprint.recipe_blueprint <- function(blueprint) { 42 | do.call(new_recipe_blueprint, as.list(blueprint)) 43 | } 44 | 45 | check_recipe_blueprint <- function( 46 | x, 47 | ..., 48 | arg = caller_arg(x), 49 | call = caller_env() 50 | ) { 51 | check_inherits(x, "recipe_blueprint", arg = arg, call = call) 52 | } 53 | 54 | # ------------------------------------------------------------------------------ 55 | 56 | blueprint_strings_as_factors <- function(x) { 57 | # See #228 58 | if (has_name(x, "strings_as_factors")) { 59 | # Blueprint is new enough to have this field 60 | x[["strings_as_factors"]] 61 | } else { 62 | # Backwards compatible support for the `recipes::prep()` default if the 63 | # blueprint is old 64 | TRUE 65 | } 66 | } 67 | 68 | # ------------------------------------------------------------------------------ 69 | 70 | is_recipe <- function(x) { 71 | inherits(x, "recipe") 72 | } 73 | 74 | check_recipe <- function( 75 | x, 76 | ..., 77 | allow_null = FALSE, 78 | arg = caller_arg(x), 79 | call = caller_env() 80 | ) { 81 | if (!missing(x)) { 82 | if (is_recipe(x)) { 83 | return(invisible(NULL)) 84 | } 85 | if (allow_null && is_null(x)) { 86 | return(invisible(NULL)) 87 | } 88 | } 89 | 90 | stop_input_type( 91 | x = x, 92 | what = "a recipe", 93 | allow_null = allow_null, 94 | arg = arg, 95 | call = call 96 | ) 97 | } 98 | -------------------------------------------------------------------------------- /R/blueprint-xy.R: -------------------------------------------------------------------------------- 1 | #' @rdname new-blueprint 2 | #' @export 3 | new_xy_blueprint <- function( 4 | intercept = FALSE, 5 | allow_novel_levels = FALSE, 6 | composition = "tibble", 7 | ptypes = NULL, 8 | ..., 9 | subclass = character() 10 | ) { 11 | new_blueprint( 12 | intercept = intercept, 13 | allow_novel_levels = allow_novel_levels, 14 | composition = composition, 15 | ptypes = ptypes, 16 | ..., 17 | subclass = c(subclass, "xy_blueprint") 18 | ) 19 | } 20 | 21 | #' @export 22 | refresh_blueprint.xy_blueprint <- function(blueprint) { 23 | do.call(new_xy_blueprint, as.list(blueprint)) 24 | } 25 | 26 | check_xy_blueprint <- function( 27 | x, 28 | ..., 29 | arg = caller_arg(x), 30 | call = caller_env() 31 | ) { 32 | check_inherits(x, "xy_blueprint", arg = arg, call = call) 33 | } 34 | -------------------------------------------------------------------------------- /R/classes.R: -------------------------------------------------------------------------------- 1 | #' Extract data classes from a data frame or matrix 2 | #' 3 | #' When predicting from a model, it is often important for the `new_data` to 4 | #' have the same classes as the original data used to fit the model. 5 | #' `get_data_classes()` extracts the classes from the original training data. 6 | #' 7 | #' @param data A data frame or matrix. 8 | #' 9 | #' @inheritParams validate_column_names 10 | #' 11 | #' @return 12 | #' 13 | #' A named list. The names are the column names of `data` and the values are 14 | #' character vectors containing the class of that column. 15 | #' 16 | #' @examples 17 | #' get_data_classes(iris) 18 | #' 19 | #' get_data_classes(as.matrix(mtcars)) 20 | #' 21 | #' # Unlike .MFclass(), the full class 22 | #' # vector is returned 23 | #' data <- data.frame(col = ordered(c("a", "b"))) 24 | #' 25 | #' .MFclass(data$col) 26 | #' 27 | #' get_data_classes(data) 28 | #' @export 29 | get_data_classes <- function(data, ..., call = current_env()) { 30 | check_dots_empty0(...) 31 | data <- extract_ptype(data, call = call) 32 | check_unique_column_names(data, call = call) 33 | lapply(data, class) 34 | } 35 | -------------------------------------------------------------------------------- /R/compost.R: -------------------------------------------------------------------------------- 1 | # This will eventually live in recipes 2 | # https://github.com/tidymodels/recipes/issues/268 3 | 4 | compost <- function(object) { 5 | if (!recipes::fully_trained(object)) { 6 | return(object) 7 | } 8 | 9 | object$template <- NULL 10 | object$retained <- FALSE 11 | 12 | object 13 | } 14 | -------------------------------------------------------------------------------- /R/constructor.R: -------------------------------------------------------------------------------- 1 | #' Constructor for a base model 2 | #' 3 | #' A __model__ is a _scalar object_, as classified in 4 | #' [Advanced R](https://adv-r.hadley.nz/s3.html#object-styles). As such, it 5 | #' takes uniquely named elements in `...` and combines them into a list with 6 | #' a class of `class`. This entire object represent a single model. 7 | #' 8 | #' Because every model should have multiple interfaces, including formula 9 | #' and `recipes` interfaces, all models should have a `blueprint` that 10 | #' can process new data when `predict()` is called. The easiest way to generate 11 | #' an blueprint with all of the information required at prediction time is to 12 | #' use the one that is returned from a call to [mold()]. 13 | #' 14 | #' @param ... Name-value pairs for elements specific to the model defined by 15 | #' `class`. 16 | #' 17 | #' @param blueprint A preprocessing `blueprint` returned from a call to [mold()]. 18 | #' 19 | #' @param class A character vector representing the class of the model. 20 | #' 21 | #' @return 22 | #' 23 | #' A new scalar model object, represented as a classed list with named elements 24 | #' specified in `...`. 25 | #' 26 | #' @examples 27 | #' new_model( 28 | #' custom_element = "my-elem", 29 | #' blueprint = default_xy_blueprint(), 30 | #' class = "custom_model" 31 | #' ) 32 | #' @export 33 | new_model <- function( 34 | ..., 35 | blueprint = default_xy_blueprint(), 36 | class = character() 37 | ) { 38 | check_blueprint(blueprint) 39 | 40 | new_abstract_model( 41 | ..., 42 | blueprint = blueprint, 43 | class = c(class, "hardhat_model") 44 | ) 45 | } 46 | 47 | # ------------------------------------------------------------------------------ 48 | 49 | #' @export 50 | print.hardhat_model <- function(x, ...) { 51 | cat_line("<", class(x)[1], ">") 52 | x$blueprint <- NULL 53 | print(unclass(x)) 54 | } 55 | 56 | cat_line <- function(...) { 57 | cat(paste0(..., "\n", collapse = "")) 58 | } 59 | 60 | # ------------------------------------------------------------------------------ 61 | 62 | new_abstract_model <- function(..., class) { 63 | elems <- list2(...) 64 | check_unique_names(elems, arg = "...") 65 | 66 | new_scalar(elems, class = class) 67 | } 68 | 69 | new_scalar <- function(elems, ..., class = character()) { 70 | check_elems(elems) 71 | structure(elems, ..., class = c(class, "hardhat_scalar")) 72 | } 73 | 74 | # ------------------------------------------------------------------------------ 75 | 76 | check_elems <- function(elems, ..., call = caller_env()) { 77 | check_dots_empty0(...) 78 | 79 | if (!is.list(elems) || length(elems) == 0) { 80 | cli::cli_abort( 81 | "{.arg elems} must be a list of length 1 or greater.", 82 | call = call 83 | ) 84 | } 85 | 86 | if (!has_unique_names(elems)) { 87 | cli::cli_abort( 88 | "{.arg elems} must have unique names.", 89 | call = call 90 | ) 91 | } 92 | 93 | if (!identical(names(attributes(elems)), "names")) { 94 | cli::cli_abort( 95 | "{.arg elems} must have no attributes (apart from names).", 96 | call = call 97 | ) 98 | } 99 | 100 | invisible(elems) 101 | } 102 | -------------------------------------------------------------------------------- /R/delete-response.R: -------------------------------------------------------------------------------- 1 | #' Delete the response from a terms object 2 | #' 3 | #' `delete_response()` is exactly the same as `delete.response()`, except 4 | #' that it fixes a long standing bug by also removing the part of the 5 | #' `"dataClasses"` attribute corresponding to the response, if it exists. 6 | #' 7 | #' @param terms A terms object. 8 | #' 9 | #' @return 10 | #' 11 | #' `terms` with the response sections removed. 12 | #' 13 | #' @details 14 | #' 15 | #' The bug is described here: 16 | #' 17 | #' \url{https://stat.ethz.ch/pipermail/r-devel/2012-January/062942.html} 18 | #' 19 | #' @examples 20 | #' 21 | #' framed <- model_frame(Species ~ Sepal.Width, iris) 22 | #' 23 | #' attr(delete.response(framed$terms), "dataClasses") 24 | #' 25 | #' attr(delete_response(framed$terms), "dataClasses") 26 | #' @export 27 | delete_response <- function(terms) { 28 | check_terms(terms) 29 | 30 | resp <- attr(terms, "response") 31 | data_class <- attr(terms, "dataClasses") 32 | 33 | response_exists <- !(is.null(resp) || (resp == 0L)) 34 | data_class_exists <- !is.null(data_class) 35 | 36 | # Remove dataClass corresponding to y if it exists 37 | if (response_exists & data_class_exists) { 38 | attr(terms, "dataClasses") <- data_class[-resp] 39 | } 40 | 41 | delete.response(terms) 42 | } 43 | -------------------------------------------------------------------------------- /R/encoding.R: -------------------------------------------------------------------------------- 1 | #' Encode a factor as a one-hot indicator matrix 2 | #' 3 | #' @description 4 | #' `fct_encode_one_hot()` encodes a factor as a one-hot indicator matrix. 5 | #' 6 | #' This matrix consists of `length(x)` rows and `length(levels(x))` columns. 7 | #' Every value in row `i` of the matrix is filled with `0L` except for the 8 | #' column that has the same name as `x[[i]]`, which is instead filled with `1L`. 9 | #' 10 | #' @details 11 | #' The columns are returned in the same order as `levels(x)`. 12 | #' 13 | #' If `x` has names, the names are propagated onto the result as the row names. 14 | #' 15 | #' @param x A factor. 16 | #' 17 | #' `x` can't contain missing values. 18 | #' 19 | #' `x` is allowed to be an ordered factor. 20 | #' 21 | #' @return An integer matrix with `length(x)` rows and `length(levels(x))` 22 | #' columns. 23 | #' 24 | #' @export 25 | #' @examples 26 | #' fct_encode_one_hot(factor(letters)) 27 | #' 28 | #' fct_encode_one_hot(factor(letters[1:2], levels = letters)) 29 | #' 30 | #' set.seed(1234) 31 | #' fct_encode_one_hot(factor(sample(letters[1:4], 10, TRUE))) 32 | fct_encode_one_hot <- function(x) { 33 | if (!is.factor(x)) { 34 | cli::cli_abort("{.arg x} must be a factor, not {.obj_type_friendly {x}}.") 35 | } 36 | 37 | row_names <- names(x) 38 | col_names <- levels(x) 39 | dim_names <- list(row_names, col_names) 40 | 41 | n_cols <- length(col_names) 42 | n_rows <- length(x) 43 | 44 | x <- unclass(x) 45 | 46 | if (vec_any_missing(x)) { 47 | cli::cli_abort("{.arg x} can't contain missing values.") 48 | } 49 | 50 | out <- matrix(0L, nrow = n_rows, ncol = n_cols, dimnames = dim_names) 51 | 52 | # Use integer matrix indexing to assign the `1`s 53 | loc <- cbind(row = seq_len(n_rows), col = x) 54 | out[loc] <- 1L 55 | 56 | out 57 | } 58 | -------------------------------------------------------------------------------- /R/hardhat-example-data.R: -------------------------------------------------------------------------------- 1 | #' Example data for hardhat 2 | #' 3 | #' @details Data objects for a training and test set with the same variables: 4 | #' three numeric and two factor columns. 5 | #' 6 | #' @name hardhat-example-data 7 | #' @aliases example_train example_test 8 | #' @docType data 9 | #' @return \item{example_train,example_test}{tibbles} 10 | #' 11 | #' @keywords datasets 12 | #' @examples 13 | #' data("hardhat-example-data") 14 | NULL 15 | -------------------------------------------------------------------------------- /R/hardhat-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | # The following block is used by usethis to automatically manage 5 | # roxygen namespace tags. Modify with care! 6 | ## usethis namespace: start 7 | #' @import rlang 8 | #' @import vctrs 9 | #' @importFrom glue glue 10 | #' @importFrom tibble as_tibble 11 | #' @importFrom tibble tibble 12 | #' @importFrom stats model.frame 13 | #' @importFrom stats model.matrix 14 | #' @importFrom stats delete.response 15 | #' @importFrom stats get_all_vars 16 | #' @importFrom stats terms 17 | #' @importFrom stats median 18 | ## usethis namespace: end 19 | NULL 20 | -------------------------------------------------------------------------------- /R/intercept.R: -------------------------------------------------------------------------------- 1 | #' Add an intercept column to `data` 2 | #' 3 | #' This function adds an integer column of `1`'s to `data`. 4 | #' 5 | #' If a column named `name` already exists in `data`, then `data` is returned 6 | #' unchanged and a warning is issued. 7 | #' 8 | #' @param data A data frame or matrix. 9 | #' 10 | #' @param name The name for the intercept column. Defaults to `"(Intercept)"`, 11 | #' which is the same name that [stats::lm()] uses. 12 | #' 13 | #' @inheritParams validate_column_names 14 | #' 15 | #' @return 16 | #' 17 | #' `data` with an intercept column. 18 | #' 19 | #' @examples 20 | #' add_intercept_column(mtcars) 21 | #' 22 | #' add_intercept_column(mtcars, "intercept") 23 | #' 24 | #' add_intercept_column(as.matrix(mtcars)) 25 | #' @export 26 | add_intercept_column <- function( 27 | data, 28 | name = "(Intercept)", 29 | ..., 30 | call = current_env() 31 | ) { 32 | check_dots_empty0(...) 33 | check_data_frame_or_matrix(data, call = call) 34 | check_name(name, call = call) 35 | 36 | if (name %in% colnames(data)) { 37 | cli::cli_warn(c( 38 | "{.arg data} already has a column named {.val {name}}.", 39 | "i" = "Returning {.arg data} unchanged." 40 | )) 41 | 42 | return(data) 43 | } 44 | 45 | if (is.matrix(data)) { 46 | new_col <- matrix( 47 | data = 1L, 48 | nrow = nrow(data), 49 | dimnames = list(NULL, name) 50 | ) 51 | 52 | data <- cbind(new_col, data) 53 | 54 | return(data) 55 | } 56 | 57 | if (is.data.frame(data)) { 58 | data <- tibble::add_column(data, !!name := 1L, .before = 1L) 59 | 60 | return(data) 61 | } 62 | } 63 | 64 | maybe_add_intercept_column <- function( 65 | data, 66 | intercept = FALSE, 67 | ..., 68 | call = caller_env() 69 | ) { 70 | check_dots_empty0(...) 71 | if (!intercept) { 72 | return(data) 73 | } 74 | 75 | add_intercept_column(data, call = call) 76 | } 77 | -------------------------------------------------------------------------------- /R/levels.R: -------------------------------------------------------------------------------- 1 | #' Extract factor levels from a data frame 2 | #' 3 | #' `get_levels()` extracts the levels from any factor columns in `data`. It is 4 | #' mainly useful for extracting the original factor levels from the predictors 5 | #' in the training set. `get_outcome_levels()` is a small wrapper around 6 | #' `get_levels()` for extracting levels from a factor outcome 7 | #' that first calls [standardize()] on `y`. 8 | #' 9 | #' @inheritParams standardize 10 | #' 11 | #' @param data A data.frame to extract levels from. 12 | #' 13 | #' @return 14 | #' 15 | #' A named list with as many elements as there are factor columns in `data` 16 | #' or `y`. The names are the names of the factor columns, and the values 17 | #' are character vectors of the levels. 18 | #' 19 | #' If there are no factor columns, `NULL` is returned. 20 | #' 21 | #' @seealso [stats::.getXlevels()] 22 | #' 23 | #' @examples 24 | #' 25 | #' # Factor columns are returned with their levels 26 | #' get_levels(iris) 27 | #' 28 | #' # No factor columns 29 | #' get_levels(mtcars) 30 | #' 31 | #' # standardize() is first run on `y` 32 | #' # which converts the input to a data frame 33 | #' # with an automatically named column, `".outcome"` 34 | #' get_outcome_levels(y = factor(letters[1:5])) 35 | #' @export 36 | get_levels <- function(data) { 37 | if (!is.data.frame(data)) { 38 | return(NULL) 39 | } 40 | 41 | list_of_levels <- lapply(data, levels) 42 | 43 | null_elems <- vapply(list_of_levels, is.null, logical(1)) 44 | 45 | if (all(null_elems)) { 46 | return(NULL) 47 | } 48 | 49 | list_of_levels[!null_elems] 50 | } 51 | 52 | #' @rdname get_levels 53 | #' @export 54 | get_outcome_levels <- function(y) { 55 | y <- standardize(y) 56 | get_levels(y) 57 | } 58 | -------------------------------------------------------------------------------- /R/model-offset.R: -------------------------------------------------------------------------------- 1 | #' Extract a model offset 2 | #' 3 | #' `model_offset()` extracts a numeric offset from a model frame. It is 4 | #' inspired by [stats::model.offset()], but has nicer error messages and 5 | #' is slightly stricter. 6 | #' 7 | #' @param terms A `"terms"` object corresponding to `data`, returned from a 8 | #' call to `model_frame()`. 9 | #' 10 | #' @param data A data frame returned from a call to `model_frame()`. 11 | #' 12 | #' @inheritParams validate_column_names 13 | #' 14 | #' @return 15 | #' 16 | #' A numeric vector representing the offset. 17 | #' 18 | #' @details 19 | #' 20 | #' If a column that has been tagged as an offset is not numeric, a nice error 21 | #' message is thrown telling you exactly which column was problematic. 22 | #' 23 | #' [stats::model.offset()] also allows for a column named `"(offset)"` to be 24 | #' considered an offset along with any others that have been tagged by 25 | #' [stats::offset()]. However, [stats::model.matrix()] does not recognize 26 | #' these columns as offsets (so it doesn't remove them as it should). Because 27 | #' of this inconsistency, columns named `"(offset)"` are _not_ treated specially 28 | #' by `model_offset()`. 29 | #' 30 | #' @examples 31 | #' 32 | #' x <- model.frame(Species ~ offset(Sepal.Width), iris) 33 | #' 34 | #' model_offset(terms(x), x) 35 | #' 36 | #' xx <- model.frame(Species ~ offset(Sepal.Width) + offset(Sepal.Length), iris) 37 | #' 38 | #' model_offset(terms(xx), xx) 39 | #' 40 | #' # Problematic columns are caught with intuitive errors 41 | #' tryCatch( 42 | #' expr = { 43 | #' x <- model.frame(~ offset(Species), iris) 44 | #' model_offset(terms(x), x) 45 | #' }, 46 | #' error = function(e) { 47 | #' print(e$message) 48 | #' } 49 | #' ) 50 | #' @export 51 | model_offset <- function(terms, data, ..., call = caller_env()) { 52 | check_dots_empty0(...) 53 | 54 | .offset_pos <- attr(terms, "offset") 55 | 56 | has_offset <- !is.null(.offset_pos) 57 | 58 | if (!has_offset) { 59 | return(NULL) 60 | } 61 | 62 | ans <- rep(0, times = nrow(data)) 63 | 64 | for (.pos in .offset_pos) { 65 | .offset_val <- data[[.pos]] 66 | 67 | if (!is.numeric(.offset_val)) { 68 | bad_col <- colnames(data)[.pos] 69 | 70 | cli::cli_abort( 71 | "Column {.val {bad_col}} is tagged as an offset and thus must be 72 | numeric, not {.obj_type_friendly { .offset_val }}.", 73 | call = call 74 | ) 75 | } 76 | 77 | ans <- ans + .offset_val 78 | } 79 | 80 | ans 81 | } 82 | 83 | extract_offset <- function(terms, data, ..., call = caller_env()) { 84 | check_dots_empty0(...) 85 | 86 | .offset <- model_offset(terms, data, call = call) 87 | 88 | if (is.null(.offset)) { 89 | NULL 90 | } else { 91 | tibble::tibble(.offset = .offset) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /R/new.R: -------------------------------------------------------------------------------- 1 | # These are standardized constructors for internal objects returned from 2 | # different blueprint handlers 3 | 4 | # ------------------------------------------------------------------------------ 5 | # Mold 6 | 7 | new_mold_clean <- function(blueprint, data) { 8 | list( 9 | blueprint = blueprint, 10 | data = data 11 | ) 12 | } 13 | 14 | new_mold_clean_xy <- function(blueprint, x, y) { 15 | list( 16 | blueprint = blueprint, 17 | x = x, 18 | y = y 19 | ) 20 | } 21 | 22 | new_mold_process <- function(predictors, outcomes, blueprint, extras) { 23 | list( 24 | predictors = predictors, 25 | outcomes = outcomes, 26 | blueprint = blueprint, 27 | extras = extras 28 | ) 29 | } 30 | 31 | new_mold_process_terms <- function(blueprint, data, ptype, extras = NULL) { 32 | list( 33 | blueprint = blueprint, 34 | data = data, 35 | ptype = ptype, 36 | extras = extras 37 | ) 38 | } 39 | 40 | # ------------------------------------------------------------------------------ 41 | # Forge 42 | 43 | new_forge_clean <- function(blueprint, predictors, outcomes, extras = NULL) { 44 | list( 45 | blueprint = blueprint, 46 | predictors = predictors, 47 | outcomes = outcomes, 48 | extras = extras 49 | ) 50 | } 51 | 52 | new_forge_process <- function(predictors, outcomes, extras) { 53 | list( 54 | predictors = predictors, 55 | outcomes = outcomes, 56 | extras = extras 57 | ) 58 | } 59 | 60 | new_forge_process_terms <- function(blueprint, data, extras = NULL) { 61 | list( 62 | blueprint = blueprint, 63 | data = data, 64 | extras = extras 65 | ) 66 | } 67 | 68 | # ------------------------------------------------------------------------------ 69 | # ptypes 70 | 71 | new_ptypes <- function(predictors, outcomes) { 72 | list( 73 | predictors = predictors, 74 | outcomes = outcomes 75 | ) 76 | } 77 | 78 | # ------------------------------------------------------------------------------ 79 | # Extras 80 | 81 | # Just c() them together 82 | # Extras aren't predictor or outcome specific 83 | new_extras <- function(predictors_extras, outcomes_extras) { 84 | c(predictors_extras, outcomes_extras) 85 | } 86 | -------------------------------------------------------------------------------- /R/print.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | format.xy_blueprint <- function(x, ...) "XY" 3 | 4 | #' @export 5 | format.recipe_blueprint <- function(x, ...) "Recipe" 6 | 7 | #' @export 8 | format.formula_blueprint <- function(x, ...) "Formula" 9 | 10 | #' @export 11 | print.hardhat_blueprint <- function(x, ...) { 12 | cli::cli_text("{format(x)} blueprint:") 13 | 14 | cli::cli_par() 15 | cli::cli_text("# Predictors: {n_blueprint_predictors(x)}") 16 | cli::cli_text("# Outcomes: {n_blueprint_outcomes(x)}") 17 | cli::cli_text("Intercept: {x$intercept}") 18 | cli::cli_text("Novel Levels: {x$allow_novel_levels}") 19 | cli::cli_text("Composition: {x$composition}") 20 | if (inherits(x, "formula_blueprint")) { 21 | cli::cli_text("Indicators: {x$indicators}") 22 | } 23 | cli::cli_end() 24 | invisible(x) 25 | } 26 | 27 | n_blueprint_predictors <- function(x) { 28 | ncol(x$ptypes$predictors) %||% 0L 29 | } 30 | 31 | n_blueprint_outcomes <- function(x) { 32 | ncol(x$ptypes$outcomes) %||% 0L 33 | } 34 | -------------------------------------------------------------------------------- /R/ptype.R: -------------------------------------------------------------------------------- 1 | #' Extract a prototype 2 | #' 3 | #' @description 4 | #' 5 | #' `extract_ptype()` extracts a tibble with 0 rows from `data`. This contains 6 | #' all of the required information about column names, classes, and factor 7 | #' levels that are required to check the structure of new data at prediction 8 | #' time. 9 | #' 10 | #' @param data A data frame or matrix. 11 | #' 12 | #' @inheritParams validate_column_names 13 | #' 14 | #' @return 15 | #' 16 | #' A 0 row slice of `data` after converting it to a tibble. 17 | #' 18 | #' @details 19 | #' 20 | #' `extract_ptype()` is useful when creating a new preprocessing `blueprint`. It 21 | #' extracts the required information that will be used by the validation checks 22 | #' at prediction time. 23 | #' 24 | #' @examples 25 | #' 26 | #' hardhat:::extract_ptype(iris) 27 | #' @keywords internal 28 | #' 29 | extract_ptype <- function(data, ..., call = current_env()) { 30 | check_dots_empty0(...) 31 | 32 | if (is.null(data)) { 33 | return(NULL) 34 | } 35 | 36 | check_data_frame_or_matrix(data, call = call) 37 | data <- coerce_to_tibble(data) 38 | 39 | vec_slice(data, 0L) 40 | } 41 | -------------------------------------------------------------------------------- /R/recompose.R: -------------------------------------------------------------------------------- 1 | #' Recompose a data frame into another form 2 | #' 3 | #' @description 4 | #' `recompose()` takes a data frame and converts it into one of: 5 | #' - A tibble 6 | #' - A data frame 7 | #' - A matrix 8 | #' - A sparse matrix (using the Matrix package) 9 | #' 10 | #' This is an internal function used only by hardhat and recipes. 11 | #' 12 | #' @inheritParams rlang::args_dots_empty 13 | #' 14 | #' @param data A data frame. 15 | #' 16 | #' @param composition One of: 17 | #' - `"tibble"` to convert to a tibble. 18 | #' - `"data.frame"` to convert to a base data frame. 19 | #' - `"matrix"` to convert to a matrix. All columns must be numeric. 20 | #' - `"dgCMatrix"` to convert to a sparse matrix. All columns must be numeric, 21 | #' and the Matrix package must be installed. 22 | #' 23 | #' @inheritParams validate_column_names 24 | #' 25 | #' @returns 26 | #' The output type is determined from the `composition`. 27 | #' 28 | #' @export 29 | #' @keywords internal 30 | #' 31 | #' @examples 32 | #' df <- vctrs::data_frame(x = 1) 33 | #' 34 | #' recompose(df) 35 | #' recompose(df, composition = "matrix") 36 | #' 37 | #' # All columns must be numeric to convert to a matrix 38 | #' df <- vctrs::data_frame(x = 1, y = "a") 39 | #' try(recompose(df, composition = "matrix")) 40 | recompose <- function(data, ..., composition = "tibble", call = caller_env()) { 41 | check_dots_empty0(...) 42 | check_data_frame(data, call = call) 43 | 44 | composition <- arg_match0( 45 | arg = composition, 46 | values = c("tibble", "data.frame", "matrix", "dgCMatrix"), 47 | error_call = call 48 | ) 49 | 50 | switch( 51 | composition, 52 | tibble = { 53 | coerce_to_tibble(data) 54 | }, 55 | data.frame = { 56 | new_data_frame(data, n = vec_size(data)) 57 | }, 58 | matrix = { 59 | coerce_to_matrix(data, error_call = call) 60 | }, 61 | dgCMatrix = { 62 | if (is_sparse_tibble(data)) { 63 | sparsevctrs::coerce_to_sparse_matrix(data, call = call) 64 | } else { 65 | data <- coerce_to_matrix(data, error_call = call) 66 | coerce_to_sparse(data, error_call = call) 67 | } 68 | } 69 | ) 70 | } 71 | 72 | coerce_to_matrix <- function(data, error_call = caller_env()) { 73 | numeric <- map_lgl(data, is.numeric) 74 | 75 | if (!all(numeric)) { 76 | loc <- which(!numeric) 77 | loc <- names(data)[loc] 78 | 79 | message <- c( 80 | "{.arg data} must only contain numeric columns.", 81 | i = "{cli::qty(length(loc))}{?This/These} column{?s} {?isn't/aren't} 82 | numeric: {.val {loc}}." 83 | ) 84 | 85 | cli::cli_abort(message, call = error_call) 86 | } 87 | 88 | as.matrix(data) 89 | } 90 | 91 | coerce_to_sparse <- function(data, error_call = caller_env()) { 92 | check_installed("Matrix", call = error_call) 93 | Matrix::Matrix(data, sparse = TRUE) 94 | } 95 | -------------------------------------------------------------------------------- /R/shrink.R: -------------------------------------------------------------------------------- 1 | #' Subset only required columns 2 | #' 3 | #' @description 4 | #' 5 | #' `shrink()` subsets `data` to only contain the required columns specified by 6 | #' the prototype, `ptype`. 7 | #' 8 | #' @details 9 | #' 10 | #' `shrink()` is called by [forge()] before [scream()] and before the actual 11 | #' processing is done. 12 | #' 13 | #' @param data A data frame containing the data to subset. 14 | #' 15 | #' @param ptype A data frame prototype containing the required columns. 16 | #' 17 | #' @inheritParams validate_column_names 18 | #' 19 | #' @return 20 | #' 21 | #' A tibble containing the required columns. 22 | #' 23 | #' @examples 24 | #' # --------------------------------------------------------------------------- 25 | #' # Setup 26 | #' 27 | #' train <- iris[1:100, ] 28 | #' test <- iris[101:150, ] 29 | #' 30 | #' # --------------------------------------------------------------------------- 31 | #' # shrink() 32 | #' 33 | #' # mold() is run at model fit time 34 | #' # and a formula preprocessing blueprint is recorded 35 | #' x <- mold(log(Sepal.Width) ~ Species, train) 36 | #' 37 | #' # Inside the result of mold() are the prototype tibbles 38 | #' # for the predictors and the outcomes 39 | #' ptype_pred <- x$blueprint$ptypes$predictors 40 | #' ptype_out <- x$blueprint$ptypes$outcomes 41 | #' 42 | #' # Pass the test data, along with a prototype, to 43 | #' # shrink() to extract the prototype columns 44 | #' shrink(test, ptype_pred) 45 | #' 46 | #' # To extract the outcomes, just use the 47 | #' # outcome prototype 48 | #' shrink(test, ptype_out) 49 | #' 50 | #' # shrink() makes sure that the columns 51 | #' # required by `ptype` actually exist in the data 52 | #' # and errors nicely when they don't 53 | #' test2 <- subset(test, select = -Species) 54 | #' try(shrink(test2, ptype_pred)) 55 | #' @export 56 | shrink <- function(data, ptype, ..., call = current_env()) { 57 | check_dots_empty0(...) 58 | 59 | if (is.null(data)) { 60 | return(NULL) 61 | } 62 | 63 | check_data_frame_or_matrix(data, call = call) 64 | data <- coerce_to_tibble(data) 65 | 66 | cols <- colnames(ptype) 67 | validate_column_names(data, cols, call = call) 68 | 69 | out <- data[cols] 70 | 71 | out 72 | } 73 | -------------------------------------------------------------------------------- /R/sparsevctrs.R: -------------------------------------------------------------------------------- 1 | is_sparse_tibble <- function(x) { 2 | any(vapply(x, sparsevctrs::is_sparse_vector, logical(1))) 3 | } 4 | -------------------------------------------------------------------------------- /R/tune.R: -------------------------------------------------------------------------------- 1 | #' Mark arguments for tuning 2 | #' 3 | #' `tune()` is an argument placeholder to be used with the recipes, parsnip, and 4 | #' tune packages. It marks recipes step and parsnip model arguments for tuning. 5 | #' 6 | #' @param id A single character value that can be used to differentiate 7 | #' parameters that are used in multiple places but have the same name, or if 8 | #' the user wants to add a note to the specified parameter. 9 | #' 10 | #' @return A call object that echos the user's input. 11 | #' 12 | #' @seealso `tune::tune_grid()`, `tune::tune_bayes()` 13 | #' 14 | #' @export 15 | #' 16 | #' @examplesIf rlang::is_installed(c("recipes")) 17 | #' tune() 18 | #' tune("your name here") 19 | #' 20 | #' # In practice, `tune()` is used alongside recipes or parsnip to mark 21 | #' # specific arguments for tuning 22 | #' library(recipes) 23 | #' 24 | #' recipe(mpg ~ ., data = mtcars) |> 25 | #' step_normalize(all_numeric_predictors()) |> 26 | #' step_pca(all_numeric_predictors, num_comp = tune()) 27 | tune <- function(id = "") { 28 | check_string(id) 29 | 30 | if (id == "") { 31 | call("tune") 32 | } else { 33 | call("tune", id) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://hardhat.tidymodels.org 2 | 3 | template: 4 | bootstrap: 5 5 | package: tidytemplate 6 | bslib: 7 | primary: '#CA225E' 8 | includes: 9 | in_header: | 10 | 11 | 12 | development: 13 | mode: auto 14 | 15 | news: 16 | releases: 17 | - text: Version 0.1.0 18 | href: https://www.tidyverse.org/blog/2019/12/hardhat-0-1-0/ 19 | 20 | reference: 21 | - title: Preprocessing 22 | contents: 23 | - mold 24 | - forge 25 | 26 | - title: Prediction 27 | contents: 28 | - contains("spruce") 29 | - quantile_pred 30 | 31 | - title: Utility 32 | contents: 33 | - contains("model_") 34 | - delete_response 35 | - standardize 36 | - new_model 37 | - add_intercept_column 38 | - weighted_table 39 | - fct_encode_one_hot 40 | 41 | - title: Validation 42 | contents: 43 | - scream 44 | - shrink 45 | - contains("validate") 46 | - contains("check") 47 | 48 | - title: Blueprint 49 | contents: 50 | - contains("blueprint") 51 | - run_mold 52 | - run_forge 53 | 54 | - title: Case Weights 55 | contents: 56 | - new_case_weights 57 | - is_case_weights 58 | 59 | - subtitle: Importance Weights 60 | contents: 61 | - importance_weights 62 | - new_importance_weights 63 | - is_importance_weights 64 | 65 | - subtitle: Frequency Weights 66 | contents: 67 | - frequency_weights 68 | - new_frequency_weights 69 | - is_frequency_weights 70 | 71 | - title: Setup 72 | contents: 73 | - contains("use_") 74 | - contains("create_") 75 | 76 | - title: Information 77 | contents: contains("get_") 78 | 79 | - title: Development 80 | contents: 81 | - tune 82 | - hardhat-extract 83 | 84 | - title: Data 85 | contents: hardhat-example-data 86 | -------------------------------------------------------------------------------- /air.toml: -------------------------------------------------------------------------------- 1 | [format] 2 | exclude = ["inst/templates/"] 3 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/cran-comments.md -------------------------------------------------------------------------------- /data/hardhat-example-data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/data/hardhat-example-data.RData -------------------------------------------------------------------------------- /graphics/factor-handling/factor-handling.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/factor-handling/factor-handling.graffle -------------------------------------------------------------------------------- /graphics/factor-handling/factor-handling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/factor-handling/factor-handling.png -------------------------------------------------------------------------------- /graphics/modeling-package-design.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/modeling-package-design.graffle -------------------------------------------------------------------------------- /graphics/modeling-package-design/Fitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/modeling-package-design/Fitting.png -------------------------------------------------------------------------------- /graphics/modeling-package-design/Prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/graphics/modeling-package-design/Prediction.png -------------------------------------------------------------------------------- /hardhat.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /inst/templates/R/constructor.R: -------------------------------------------------------------------------------- 1 | new_{{model}} <- function(coefs, blueprint) { 2 | hardhat::new_model(coefs = coefs, blueprint = blueprint, class = "{{model}}") 3 | } 4 | -------------------------------------------------------------------------------- /inst/templates/R/fit.R: -------------------------------------------------------------------------------- 1 | #' Fit a `{{model}}` 2 | #' 3 | #' `{{model}}()` fits a model. 4 | #' 5 | #' @param x Depending on the context: 6 | #' 7 | #' * A __data frame__ of predictors. 8 | #' * A __matrix__ of predictors. 9 | #' * A __recipe__ specifying a set of preprocessing steps 10 | #' created from [recipes::recipe()]. 11 | #' 12 | #' @param y When `x` is a __data frame__ or __matrix__, `y` is the outcome 13 | #' specified as: 14 | #' 15 | #' * A __data frame__ with 1 numeric column. 16 | #' * A __matrix__ with 1 numeric column. 17 | #' * A numeric __vector__. 18 | #' 19 | #' @param data When a __recipe__ or __formula__ is used, `data` is specified as: 20 | #' 21 | #' * A __data frame__ containing both the predictors and the outcome. 22 | #' 23 | #' @param formula A formula specifying the outcome terms on the left-hand side, 24 | #' and the predictor terms on the right-hand side. 25 | #' 26 | #' @param ... Not currently used, but required for extensibility. 27 | #' 28 | #' @return 29 | #' 30 | #' A `{{model}}` object. 31 | #' 32 | #' @examples 33 | #' predictors <- mtcars[, -1] 34 | #' outcome <- mtcars[, 1] 35 | #' 36 | #' # XY interface 37 | #' mod <- {{model}}(predictors, outcome) 38 | #' 39 | #' # Formula interface 40 | #' mod2 <- {{model}}(mpg ~ ., mtcars) 41 | #' 42 | #' # Recipes interface 43 | #' library(recipes) 44 | #' rec <- recipe(mpg ~ ., mtcars) 45 | #' rec <- step_log(rec, disp) 46 | #' mod3 <- {{model}}(rec, mtcars) 47 | #' 48 | #' @export 49 | {{model}} <- function(x, ...) { 50 | UseMethod("{{model}}") 51 | } 52 | 53 | #' @export 54 | #' @rdname {{model}} 55 | {{model}}.default <- function(x, ...) { 56 | stop("`{{model}}()` is not defined for a '", class(x)[1], "'.", call. = FALSE) 57 | } 58 | 59 | # XY method - data frame 60 | 61 | #' @export 62 | #' @rdname {{model}} 63 | {{model}}.data.frame <- function(x, y, ...) { 64 | processed <- hardhat::mold(x, y) 65 | {{model}}_bridge(processed, ...) 66 | } 67 | 68 | # XY method - matrix 69 | 70 | #' @export 71 | #' @rdname {{model}} 72 | {{model}}.matrix <- function(x, y, ...) { 73 | processed <- hardhat::mold(x, y) 74 | {{model}}_bridge(processed, ...) 75 | } 76 | 77 | # Formula method 78 | 79 | #' @export 80 | #' @rdname {{model}} 81 | {{model}}.formula <- function(formula, data, ...) { 82 | processed <- hardhat::mold(formula, data) 83 | {{model}}_bridge(processed, ...) 84 | } 85 | 86 | # Recipe method 87 | 88 | #' @export 89 | #' @rdname {{model}} 90 | {{model}}.recipe <- function(x, data, ...) { 91 | processed <- hardhat::mold(x, data) 92 | {{model}}_bridge(processed, ...) 93 | } 94 | 95 | # ------------------------------------------------------------------------------ 96 | # Bridge 97 | 98 | {{model}}_bridge <- function(processed, ...) { 99 | predictors <- processed$predictors 100 | outcome <- processed$outcomes[[1]] 101 | 102 | fit <- {{model}}_impl(predictors, outcome) 103 | 104 | new_{{model}}( 105 | coefs = fit$coefs, 106 | blueprint = processed$blueprint 107 | ) 108 | } 109 | 110 | 111 | # ------------------------------------------------------------------------------ 112 | # Implementation 113 | 114 | {{model}}_impl <- function(predictors, outcome) { 115 | list(coefs = 1) 116 | } 117 | -------------------------------------------------------------------------------- /inst/templates/R/predict.R: -------------------------------------------------------------------------------- 1 | #' Predict from a `{{model}}` 2 | #' 3 | #' @param object A `{{model}}` object. 4 | #' 5 | #' @param new_data A data frame or matrix of new predictors. 6 | #' 7 | #' @param type A single character. The type of predictions to generate. 8 | #' Valid options are: 9 | #' 10 | #' - `"numeric"` for numeric predictions. 11 | #' 12 | #' @param ... Not used, but required for extensibility. 13 | #' 14 | #' @return 15 | #' 16 | #' A tibble of predictions. The number of rows in the tibble is guaranteed 17 | #' to be the same as the number of rows in `new_data`. 18 | #' 19 | #' @examples 20 | #' train <- mtcars[1:20,] 21 | #' test <- mtcars[21:32, -1] 22 | #' 23 | #' # Fit 24 | #' mod <- {{model}}(mpg ~ cyl + log(drat), train) 25 | #' 26 | #' # Predict, with preprocessing 27 | #' predict(mod, test) 28 | #' 29 | #' @export 30 | predict.{{model}} <- function(object, new_data, type = "numeric", ...) { 31 | forged <- hardhat::forge(new_data, object$blueprint) 32 | rlang::arg_match(type, valid_{{model}}_predict_types()) 33 | predict_{{model}}_bridge(type, object, forged$predictors) 34 | } 35 | 36 | valid_{{model}}_predict_types <- function() { 37 | c("numeric") 38 | } 39 | 40 | # ------------------------------------------------------------------------------ 41 | # Bridge 42 | 43 | predict_{{model}}_bridge <- function(type, model, predictors) { 44 | predictors <- as.matrix(predictors) 45 | 46 | predict_function <- get_{{model}}_predict_function(type) 47 | predictions <- predict_function(model, predictors) 48 | 49 | hardhat::validate_prediction_size(predictions, predictors) 50 | 51 | predictions 52 | } 53 | 54 | get_{{model}}_predict_function <- function(type) { 55 | switch( 56 | type, 57 | numeric = predict_{{model}}_numeric 58 | ) 59 | } 60 | 61 | # ------------------------------------------------------------------------------ 62 | # Implementation 63 | 64 | predict_{{model}}_numeric <- function(model, predictors) { 65 | predictions <- rep(1L, times = nrow(predictors)) 66 | hardhat::spruce_numeric(predictions) 67 | } 68 | -------------------------------------------------------------------------------- /man-roxygen/section-validation.R: -------------------------------------------------------------------------------- 1 | #' @section Validation: 2 | #' 3 | #' hardhat provides validation functions at two levels. 4 | #' 5 | #' - `check_*()`: _check a condition, and return a list_. The list 6 | #' always contains at least one element, `ok`, a logical that specifies if the 7 | #' check passed. Each check also has check specific elements in the returned 8 | #' list that can be used to construct meaningful error messages. 9 | #' 10 | #' - `validate_*()`: _check a condition, and error if it does not pass_. These 11 | #' functions call their corresponding check function, and 12 | #' then provide a default error message. If you, as a developer, want a 13 | #' different error message, then call the `check_*()` function yourself, 14 | #' and provide your own validation function. 15 | -------------------------------------------------------------------------------- /man/add_intercept_column.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/intercept.R 3 | \name{add_intercept_column} 4 | \alias{add_intercept_column} 5 | \title{Add an intercept column to \code{data}} 6 | \usage{ 7 | add_intercept_column(data, name = "(Intercept)", ..., call = current_env()) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame or matrix.} 11 | 12 | \item{name}{The name for the intercept column. Defaults to \code{"(Intercept)"}, 13 | which is the same name that \code{\link[stats:lm]{stats::lm()}} uses.} 14 | 15 | \item{...}{These dots are for future extensions and must be empty.} 16 | 17 | \item{call}{The call used for errors and warnings.} 18 | } 19 | \value{ 20 | \code{data} with an intercept column. 21 | } 22 | \description{ 23 | This function adds an integer column of \code{1}'s to \code{data}. 24 | } 25 | \details{ 26 | If a column named \code{name} already exists in \code{data}, then \code{data} is returned 27 | unchanged and a warning is issued. 28 | } 29 | \examples{ 30 | add_intercept_column(mtcars) 31 | 32 | add_intercept_column(mtcars, "intercept") 33 | 34 | add_intercept_column(as.matrix(mtcars)) 35 | } 36 | -------------------------------------------------------------------------------- /man/check_quantile_levels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quantile-pred.R 3 | \name{check_quantile_levels} 4 | \alias{check_quantile_levels} 5 | \title{Check levels of quantiles} 6 | \usage{ 7 | check_quantile_levels(levels, call = rlang::caller_env()) 8 | } 9 | \arguments{ 10 | \item{levels}{The quantile levels.} 11 | 12 | \item{call}{Call shown in the error messages.} 13 | } 14 | \value{ 15 | Invisible \code{TRUE} 16 | } 17 | \description{ 18 | Check levels of quantiles 19 | } 20 | \details{ 21 | Checks the levels for their data type, range, uniqueness, order and missingness. 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/delete_response.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/delete-response.R 3 | \name{delete_response} 4 | \alias{delete_response} 5 | \title{Delete the response from a terms object} 6 | \usage{ 7 | delete_response(terms) 8 | } 9 | \arguments{ 10 | \item{terms}{A terms object.} 11 | } 12 | \value{ 13 | \code{terms} with the response sections removed. 14 | } 15 | \description{ 16 | \code{delete_response()} is exactly the same as \code{delete.response()}, except 17 | that it fixes a long standing bug by also removing the part of the 18 | \code{"dataClasses"} attribute corresponding to the response, if it exists. 19 | } 20 | \details{ 21 | The bug is described here: 22 | 23 | \url{https://stat.ethz.ch/pipermail/r-devel/2012-January/062942.html} 24 | } 25 | \examples{ 26 | 27 | framed <- model_frame(Species ~ Sepal.Width, iris) 28 | 29 | attr(delete.response(framed$terms), "dataClasses") 30 | 31 | attr(delete_response(framed$terms), "dataClasses") 32 | } 33 | -------------------------------------------------------------------------------- /man/extract_ptype.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ptype.R 3 | \name{extract_ptype} 4 | \alias{extract_ptype} 5 | \title{Extract a prototype} 6 | \usage{ 7 | extract_ptype(data, ..., call = current_env()) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame or matrix.} 11 | 12 | \item{...}{These dots are for future extensions and must be empty.} 13 | 14 | \item{call}{The call used for errors and warnings.} 15 | } 16 | \value{ 17 | A 0 row slice of \code{data} after converting it to a tibble. 18 | } 19 | \description{ 20 | \code{extract_ptype()} extracts a tibble with 0 rows from \code{data}. This contains 21 | all of the required information about column names, classes, and factor 22 | levels that are required to check the structure of new data at prediction 23 | time. 24 | } 25 | \details{ 26 | \code{extract_ptype()} is useful when creating a new preprocessing \code{blueprint}. It 27 | extracts the required information that will be used by the validation checks 28 | at prediction time. 29 | } 30 | \examples{ 31 | 32 | hardhat:::extract_ptype(iris) 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /man/fct_encode_one_hot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/encoding.R 3 | \name{fct_encode_one_hot} 4 | \alias{fct_encode_one_hot} 5 | \title{Encode a factor as a one-hot indicator matrix} 6 | \usage{ 7 | fct_encode_one_hot(x) 8 | } 9 | \arguments{ 10 | \item{x}{A factor. 11 | 12 | \code{x} can't contain missing values. 13 | 14 | \code{x} is allowed to be an ordered factor.} 15 | } 16 | \value{ 17 | An integer matrix with \code{length(x)} rows and \code{length(levels(x))} 18 | columns. 19 | } 20 | \description{ 21 | \code{fct_encode_one_hot()} encodes a factor as a one-hot indicator matrix. 22 | 23 | This matrix consists of \code{length(x)} rows and \code{length(levels(x))} columns. 24 | Every value in row \code{i} of the matrix is filled with \code{0L} except for the 25 | column that has the same name as \code{x[[i]]}, which is instead filled with \code{1L}. 26 | } 27 | \details{ 28 | The columns are returned in the same order as \code{levels(x)}. 29 | 30 | If \code{x} has names, the names are propagated onto the result as the row names. 31 | } 32 | \examples{ 33 | fct_encode_one_hot(factor(letters)) 34 | 35 | fct_encode_one_hot(factor(letters[1:2], levels = letters)) 36 | 37 | set.seed(1234) 38 | fct_encode_one_hot(factor(sample(letters[1:4], 10, TRUE))) 39 | } 40 | -------------------------------------------------------------------------------- /man/figures/Fitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/Fitting.png -------------------------------------------------------------------------------- /man/figures/Prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/Prediction.png -------------------------------------------------------------------------------- /man/figures/factor-handling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/factor-handling.png -------------------------------------------------------------------------------- /man/figures/lifecycle-archived.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: archived 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | archived 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-defunct.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: defunct 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | defunct 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: experimental 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | experimental 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-maturing.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: maturing 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | maturing 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-questioning.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: questioning 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | questioning 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-soft-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: soft-deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | soft-deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: stable 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | lifecycle 21 | 22 | 25 | 26 | stable 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: superseded 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | superseded 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/man/figures/logo.png -------------------------------------------------------------------------------- /man/forge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forge.R 3 | \name{forge} 4 | \alias{forge} 5 | \title{Forge prediction-ready data} 6 | \usage{ 7 | forge(new_data, blueprint, ..., outcomes = FALSE) 8 | } 9 | \arguments{ 10 | \item{new_data}{A data frame or matrix of predictors to process. If 11 | \code{outcomes = TRUE}, this should also contain the outcomes to process.} 12 | 13 | \item{blueprint}{A preprocessing \code{blueprint}.} 14 | 15 | \item{...}{Not used.} 16 | 17 | \item{outcomes}{A logical. Should the outcomes be processed and returned 18 | as well?} 19 | } 20 | \value{ 21 | A named list with 3 elements: 22 | \itemize{ 23 | \item \code{predictors}: A tibble containing the preprocessed 24 | \code{new_data} predictors. 25 | \item \code{outcomes}: If \code{outcomes = TRUE}, a tibble containing the preprocessed 26 | outcomes found in \code{new_data}. Otherwise, \code{NULL}. 27 | \item \code{extras}: Either \code{NULL} if the blueprint returns no extra information, 28 | or a named list containing the extra information. 29 | } 30 | } 31 | \description{ 32 | \code{forge()} applies the transformations requested by the specific \code{blueprint} 33 | on a set of \code{new_data}. This \code{new_data} contains new predictors 34 | (and potentially outcomes) that will be used to generate predictions. 35 | 36 | All blueprints have consistent return values with the others, but each is 37 | unique enough to have its own help page. Click through below to learn 38 | how to use each one in conjunction with \code{forge()}. 39 | \itemize{ 40 | \item XY Method - \code{\link[=default_xy_blueprint]{default_xy_blueprint()}} 41 | \item Formula Method - \code{\link[=default_formula_blueprint]{default_formula_blueprint()}} 42 | \item Recipes Method - \code{\link[=default_recipe_blueprint]{default_recipe_blueprint()}} 43 | } 44 | } 45 | \details{ 46 | If the outcomes are present in \code{new_data}, they can optionally be processed 47 | and returned in the \code{outcomes} slot of the returned list by setting 48 | \code{outcomes = TRUE}. This is very useful when doing cross validation where 49 | you need to preprocess the outcomes of a test set before computing 50 | performance. 51 | } 52 | \examples{ 53 | # See the blueprint specific documentation linked above 54 | # for various ways to call forge with different 55 | # blueprints. 56 | 57 | train <- iris[1:100, ] 58 | test <- iris[101:150, ] 59 | 60 | # Formula 61 | processed <- mold( 62 | log(Sepal.Width) ~ Species, 63 | train, 64 | blueprint = default_formula_blueprint(indicators = "none") 65 | ) 66 | 67 | forge(test, processed$blueprint, outcomes = TRUE) 68 | } 69 | -------------------------------------------------------------------------------- /man/frequency_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{frequency_weights} 4 | \alias{frequency_weights} 5 | \title{Frequency weights} 6 | \usage{ 7 | frequency_weights(x) 8 | } 9 | \arguments{ 10 | \item{x}{An integer vector.} 11 | } 12 | \value{ 13 | A new frequency weights vector. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 17 | 18 | \code{frequency_weights()} creates a vector of frequency weights which allow you 19 | to compactly repeat an observation a set number of times. Frequency weights 20 | are supplied as a non-negative integer vector, where only whole numbers are 21 | allowed. 22 | } 23 | \details{ 24 | Frequency weights are integers that denote how many times a particular row of 25 | the data has been observed. They help compress redundant rows into a single 26 | entry. 27 | 28 | In tidymodels, frequency weights are used for all parts of the preprocessing, 29 | model fitting, and performance estimation operations. 30 | } 31 | \examples{ 32 | # Record that the first observation has 10 replicates, the second has 12 33 | # replicates, and so on 34 | frequency_weights(c(10, 12, 2, 1)) 35 | 36 | # Fractional values are not allowed 37 | try(frequency_weights(c(1.5, 2.3, 10))) 38 | } 39 | \seealso{ 40 | \code{\link[=importance_weights]{importance_weights()}} 41 | } 42 | -------------------------------------------------------------------------------- /man/get_data_classes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/classes.R 3 | \name{get_data_classes} 4 | \alias{get_data_classes} 5 | \title{Extract data classes from a data frame or matrix} 6 | \usage{ 7 | get_data_classes(data, ..., call = current_env()) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame or matrix.} 11 | 12 | \item{...}{These dots are for future extensions and must be empty.} 13 | 14 | \item{call}{The call used for errors and warnings.} 15 | } 16 | \value{ 17 | A named list. The names are the column names of \code{data} and the values are 18 | character vectors containing the class of that column. 19 | } 20 | \description{ 21 | When predicting from a model, it is often important for the \code{new_data} to 22 | have the same classes as the original data used to fit the model. 23 | \code{get_data_classes()} extracts the classes from the original training data. 24 | } 25 | \examples{ 26 | get_data_classes(iris) 27 | 28 | get_data_classes(as.matrix(mtcars)) 29 | 30 | # Unlike .MFclass(), the full class 31 | # vector is returned 32 | data <- data.frame(col = ordered(c("a", "b"))) 33 | 34 | .MFclass(data$col) 35 | 36 | get_data_classes(data) 37 | } 38 | -------------------------------------------------------------------------------- /man/get_levels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/levels.R 3 | \name{get_levels} 4 | \alias{get_levels} 5 | \alias{get_outcome_levels} 6 | \title{Extract factor levels from a data frame} 7 | \usage{ 8 | get_levels(data) 9 | 10 | get_outcome_levels(y) 11 | } 12 | \arguments{ 13 | \item{data}{A data.frame to extract levels from.} 14 | 15 | \item{y}{The outcome. This can be: 16 | \itemize{ 17 | \item A factor vector 18 | \item A numeric vector 19 | \item A 1D numeric array 20 | \item A numeric matrix with column names 21 | \item A 2D numeric array with column names 22 | \item A data frame with numeric or factor columns 23 | }} 24 | } 25 | \value{ 26 | A named list with as many elements as there are factor columns in \code{data} 27 | or \code{y}. The names are the names of the factor columns, and the values 28 | are character vectors of the levels. 29 | 30 | If there are no factor columns, \code{NULL} is returned. 31 | } 32 | \description{ 33 | \code{get_levels()} extracts the levels from any factor columns in \code{data}. It is 34 | mainly useful for extracting the original factor levels from the predictors 35 | in the training set. \code{get_outcome_levels()} is a small wrapper around 36 | \code{get_levels()} for extracting levels from a factor outcome 37 | that first calls \code{\link[=standardize]{standardize()}} on \code{y}. 38 | } 39 | \examples{ 40 | 41 | # Factor columns are returned with their levels 42 | get_levels(iris) 43 | 44 | # No factor columns 45 | get_levels(mtcars) 46 | 47 | # standardize() is first run on `y` 48 | # which converts the input to a data frame 49 | # with an automatically named column, `".outcome"` 50 | get_outcome_levels(y = factor(letters[1:5])) 51 | } 52 | \seealso{ 53 | \code{\link[stats:checkMFClasses]{stats::.getXlevels()}} 54 | } 55 | -------------------------------------------------------------------------------- /man/hardhat-example-data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hardhat-example-data.R 3 | \docType{data} 4 | \name{hardhat-example-data} 5 | \alias{hardhat-example-data} 6 | \alias{example_train} 7 | \alias{example_test} 8 | \title{Example data for hardhat} 9 | \value{ 10 | \item{example_train,example_test}{tibbles} 11 | } 12 | \description{ 13 | Example data for hardhat 14 | } 15 | \details{ 16 | Data objects for a training and test set with the same variables: 17 | three numeric and two factor columns. 18 | } 19 | \examples{ 20 | data("hardhat-example-data") 21 | } 22 | \keyword{datasets} 23 | -------------------------------------------------------------------------------- /man/hardhat-extract.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract.R 3 | \name{hardhat-extract} 4 | \alias{hardhat-extract} 5 | \alias{extract_workflow} 6 | \alias{extract_recipe} 7 | \alias{extract_spec_parsnip} 8 | \alias{extract_fit_parsnip} 9 | \alias{extract_fit_engine} 10 | \alias{extract_mold} 11 | \alias{extract_preprocessor} 12 | \alias{extract_postprocessor} 13 | \alias{extract_parameter_dials} 14 | \alias{extract_parameter_set_dials} 15 | \alias{extract_fit_time} 16 | \title{Generics for object extraction} 17 | \usage{ 18 | extract_workflow(x, ...) 19 | 20 | extract_recipe(x, ...) 21 | 22 | extract_spec_parsnip(x, ...) 23 | 24 | extract_fit_parsnip(x, ...) 25 | 26 | extract_fit_engine(x, ...) 27 | 28 | extract_mold(x, ...) 29 | 30 | extract_preprocessor(x, ...) 31 | 32 | extract_postprocessor(x, ...) 33 | 34 | extract_parameter_dials(x, ...) 35 | 36 | extract_parameter_set_dials(x, ...) 37 | 38 | extract_fit_time(x, ...) 39 | } 40 | \arguments{ 41 | \item{x}{An object.} 42 | 43 | \item{...}{Extra arguments passed on to methods.} 44 | } 45 | \description{ 46 | These generics are used to extract elements from various model 47 | objects. Methods are defined in other packages, such as tune, 48 | workflows, and workflowsets, but the returned object is always the same. 49 | \itemize{ 50 | \item \code{extract_fit_engine()} returns the engine specific fit embedded within 51 | a parsnip model fit. For example, when using \code{parsnip::linear_reg()} 52 | with the \code{"lm"} engine, this returns the underlying \code{lm} object. 53 | \item \code{extract_fit_parsnip()} returns a parsnip model fit. 54 | \item \code{extract_mold()} returns the preprocessed "mold" object returned 55 | from \code{\link[=mold]{mold()}}. It contains information about the preprocessing, 56 | including either the prepped recipe, the formula terms object, or 57 | variable selectors. 58 | \item \code{extract_spec_parsnip()} returns a parsnip model specification. 59 | \item \code{extract_preprocessor()} returns the formula, recipe, or variable 60 | expressions used for preprocessing. 61 | \item \code{extract_recipe()} returns a recipe, possibly estimated. 62 | \item \code{extract_workflow()} returns a workflow, possibly fit. 63 | \item \code{extract_parameter_dials()} returns a single dials parameter object. 64 | \item \code{extract_parameter_set_dials()} returns a set of dials parameter objects. 65 | \item \code{extract_fit_time()} returns a tibble with fit times. 66 | } 67 | } 68 | \examples{ 69 | # See packages where methods are defined for examples, such as `parsnip` or 70 | # `workflows`. 71 | } 72 | -------------------------------------------------------------------------------- /man/hardhat-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hardhat-package.R 3 | \docType{package} 4 | \name{hardhat-package} 5 | \alias{hardhat} 6 | \alias{hardhat-package} 7 | \title{hardhat: Construct Modeling Packages} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Building modeling packages is hard. A large amount of effort generally goes into providing an implementation for a new method that is efficient, fast, and correct, but often less emphasis is put on the user interface. A good interface requires specialized knowledge about S3 methods and formulas, which the average package developer might not have. The goal of 'hardhat' is to reduce the burden around building new modeling packages by providing functionality for preprocessing, predicting, and validating input. 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://github.com/tidymodels/hardhat} 17 | \item \url{https://hardhat.tidymodels.org} 18 | \item Report bugs at \url{https://github.com/tidymodels/hardhat/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Hannah Frick \email{hannah@posit.co} (\href{https://orcid.org/0000-0002-6049-5258}{ORCID}) 24 | 25 | Authors: 26 | \itemize{ 27 | \item Davis Vaughan \email{davis@posit.co} 28 | \item Max Kuhn \email{max@posit.co} 29 | } 30 | 31 | Other contributors: 32 | \itemize{ 33 | \item Posit Software, PBC [copyright holder, funder] 34 | } 35 | 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/importance_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{importance_weights} 4 | \alias{importance_weights} 5 | \title{Importance weights} 6 | \usage{ 7 | importance_weights(x) 8 | } 9 | \arguments{ 10 | \item{x}{A double vector.} 11 | } 12 | \value{ 13 | A new importance weights vector. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 17 | 18 | \code{importance_weights()} creates a vector of importance weights which allow you 19 | to apply a context dependent weight to your observations. Importance weights 20 | are supplied as a non-negative double vector, where fractional values are 21 | allowed. 22 | } 23 | \details{ 24 | Importance weights focus on how much each row of the data set should 25 | influence model estimation. These can be based on data or arbitrarily set to 26 | achieve some goal. 27 | 28 | In tidymodels, importance weights only affect the model estimation and 29 | \emph{supervised} recipes steps. They are not used with yardstick functions for 30 | calculating measures of model performance. 31 | } 32 | \examples{ 33 | importance_weights(c(1.5, 2.3, 10)) 34 | } 35 | \seealso{ 36 | \code{\link[=frequency_weights]{frequency_weights()}} 37 | } 38 | -------------------------------------------------------------------------------- /man/is_blueprint.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/blueprint.R 3 | \name{is_blueprint} 4 | \alias{is_blueprint} 5 | \title{Is \code{x} a preprocessing blueprint?} 6 | \usage{ 7 | is_blueprint(x) 8 | } 9 | \arguments{ 10 | \item{x}{An object.} 11 | } 12 | \description{ 13 | \code{is_blueprint()} checks if \code{x} inherits from \code{"hardhat_blueprint"}. 14 | } 15 | \examples{ 16 | is_blueprint(default_xy_blueprint()) 17 | } 18 | -------------------------------------------------------------------------------- /man/is_case_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{is_case_weights} 4 | \alias{is_case_weights} 5 | \title{Is \code{x} a case weights vector?} 6 | \usage{ 7 | is_case_weights(x) 8 | } 9 | \arguments{ 10 | \item{x}{An object.} 11 | } 12 | \value{ 13 | A single \code{TRUE} or \code{FALSE}. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 17 | 18 | \code{is_case_weights()} checks if \code{x} inherits from \code{"hardhat_case_weights"}. 19 | } 20 | \examples{ 21 | is_case_weights(1) 22 | is_case_weights(frequency_weights(1)) 23 | } 24 | -------------------------------------------------------------------------------- /man/is_frequency_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{is_frequency_weights} 4 | \alias{is_frequency_weights} 5 | \title{Is \code{x} a frequency weights vector?} 6 | \usage{ 7 | is_frequency_weights(x) 8 | } 9 | \arguments{ 10 | \item{x}{An object.} 11 | } 12 | \value{ 13 | A single \code{TRUE} or \code{FALSE}. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 17 | 18 | \code{is_frequency_weights()} checks if \code{x} inherits from 19 | \code{"hardhat_frequency_weights"}. 20 | } 21 | \examples{ 22 | is_frequency_weights(1) 23 | is_frequency_weights(frequency_weights(1)) 24 | is_frequency_weights(importance_weights(1)) 25 | } 26 | -------------------------------------------------------------------------------- /man/is_importance_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{is_importance_weights} 4 | \alias{is_importance_weights} 5 | \title{Is \code{x} an importance weights vector?} 6 | \usage{ 7 | is_importance_weights(x) 8 | } 9 | \arguments{ 10 | \item{x}{An object.} 11 | } 12 | \value{ 13 | A single \code{TRUE} or \code{FALSE}. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 17 | 18 | \code{is_importance_weights()} checks if \code{x} inherits from 19 | \code{"hardhat_importance_weights"}. 20 | } 21 | \examples{ 22 | is_importance_weights(1) 23 | is_importance_weights(frequency_weights(1)) 24 | is_importance_weights(importance_weights(1)) 25 | } 26 | -------------------------------------------------------------------------------- /man/model_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model-matrix.R 3 | \name{model_matrix} 4 | \alias{model_matrix} 5 | \title{Construct a design matrix} 6 | \usage{ 7 | model_matrix(terms, data, ..., call = current_env()) 8 | } 9 | \arguments{ 10 | \item{terms}{A terms object to construct a model matrix with. This is 11 | typically the terms object returned from the corresponding call to 12 | \code{\link[=model_frame]{model_frame()}}.} 13 | 14 | \item{data}{A tibble to construct the design matrix with. This is 15 | typically the tibble returned from the corresponding call to 16 | \code{\link[=model_frame]{model_frame()}}.} 17 | 18 | \item{...}{These dots are for future extensions and must be empty.} 19 | 20 | \item{call}{The call used for errors and warnings.} 21 | } 22 | \value{ 23 | A tibble containing the design matrix. 24 | } 25 | \description{ 26 | \code{model_matrix()} is a stricter version of \code{\link[stats:model.matrix]{stats::model.matrix()}}. Notably, 27 | \code{model_matrix()} will \emph{never} drop rows, and the result will be a tibble. 28 | } 29 | \details{ 30 | The following explains the rationale for some of the difference in arguments 31 | compared to \code{\link[stats:model.matrix]{stats::model.matrix()}}: 32 | \itemize{ 33 | \item \code{contrasts.arg}: Set the contrasts argument, \code{options("contrasts")} 34 | globally, or assign a contrast to the factor of interest directly using 35 | \code{\link[stats:contrasts]{stats::contrasts()}}. See the examples section. 36 | \item \code{xlev}: Not allowed because \code{model.frame()} is never called, so it is 37 | unnecessary. 38 | \item \code{...}: Not allowed because the default method of \code{model.matrix()} does 39 | not use it, and the \code{lm} method uses it to pass potential offsets and 40 | weights through, which are handled differently in hardhat. 41 | } 42 | } 43 | \examples{ 44 | # --------------------------------------------------------------------------- 45 | # Example usage 46 | 47 | framed <- model_frame(Sepal.Width ~ Species, iris) 48 | 49 | model_matrix(framed$terms, framed$data) 50 | 51 | # --------------------------------------------------------------------------- 52 | # Missing values never result in dropped rows 53 | 54 | iris2 <- iris 55 | iris2$Species[1] <- NA 56 | 57 | framed2 <- model_frame(Sepal.Width ~ Species, iris2) 58 | 59 | model_matrix(framed2$terms, framed2$data) 60 | 61 | # --------------------------------------------------------------------------- 62 | # Contrasts 63 | 64 | # Default contrasts 65 | y <- factor(c("a", "b")) 66 | x <- data.frame(y = y) 67 | framed <- model_frame(~y, x) 68 | 69 | # Setting contrasts directly 70 | y_with_contrast <- y 71 | contrasts(y_with_contrast) <- contr.sum(2) 72 | x2 <- data.frame(y = y_with_contrast) 73 | framed2 <- model_frame(~y, x2) 74 | 75 | # Compare! 76 | model_matrix(framed$terms, framed$data) 77 | model_matrix(framed2$terms, framed2$data) 78 | 79 | # Also, can set the contrasts globally 80 | global_override <- c(unordered = "contr.sum", ordered = "contr.poly") 81 | 82 | rlang::with_options( 83 | .expr = { 84 | model_matrix(framed$terms, framed$data) 85 | }, 86 | contrasts = global_override 87 | ) 88 | } 89 | -------------------------------------------------------------------------------- /man/model_offset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model-offset.R 3 | \name{model_offset} 4 | \alias{model_offset} 5 | \title{Extract a model offset} 6 | \usage{ 7 | model_offset(terms, data, ..., call = caller_env()) 8 | } 9 | \arguments{ 10 | \item{terms}{A \code{"terms"} object corresponding to \code{data}, returned from a 11 | call to \code{model_frame()}.} 12 | 13 | \item{data}{A data frame returned from a call to \code{model_frame()}.} 14 | 15 | \item{...}{These dots are for future extensions and must be empty.} 16 | 17 | \item{call}{The call used for errors and warnings.} 18 | } 19 | \value{ 20 | A numeric vector representing the offset. 21 | } 22 | \description{ 23 | \code{model_offset()} extracts a numeric offset from a model frame. It is 24 | inspired by \code{\link[stats:model.extract]{stats::model.offset()}}, but has nicer error messages and 25 | is slightly stricter. 26 | } 27 | \details{ 28 | If a column that has been tagged as an offset is not numeric, a nice error 29 | message is thrown telling you exactly which column was problematic. 30 | 31 | \code{\link[stats:model.extract]{stats::model.offset()}} also allows for a column named \code{"(offset)"} to be 32 | considered an offset along with any others that have been tagged by 33 | \code{\link[stats:offset]{stats::offset()}}. However, \code{\link[stats:model.matrix]{stats::model.matrix()}} does not recognize 34 | these columns as offsets (so it doesn't remove them as it should). Because 35 | of this inconsistency, columns named \code{"(offset)"} are \emph{not} treated specially 36 | by \code{model_offset()}. 37 | } 38 | \examples{ 39 | 40 | x <- model.frame(Species ~ offset(Sepal.Width), iris) 41 | 42 | model_offset(terms(x), x) 43 | 44 | xx <- model.frame(Species ~ offset(Sepal.Width) + offset(Sepal.Length), iris) 45 | 46 | model_offset(terms(xx), xx) 47 | 48 | # Problematic columns are caught with intuitive errors 49 | tryCatch( 50 | expr = { 51 | x <- model.frame(~ offset(Species), iris) 52 | model_offset(terms(x), x) 53 | }, 54 | error = function(e) { 55 | print(e$message) 56 | } 57 | ) 58 | } 59 | -------------------------------------------------------------------------------- /man/modeling-usethis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/use.R 3 | \name{modeling-usethis} 4 | \alias{modeling-usethis} 5 | \alias{create_modeling_package} 6 | \alias{use_modeling_deps} 7 | \alias{use_modeling_files} 8 | \title{Create a modeling package} 9 | \usage{ 10 | create_modeling_package(path, model, fields = NULL, open = interactive()) 11 | 12 | use_modeling_deps() 13 | 14 | use_modeling_files(model) 15 | } 16 | \arguments{ 17 | \item{path}{A path. If it exists, it is used. If it does not exist, 18 | it is created, provided that the parent path exists.} 19 | 20 | \item{model}{A string. The name of the high level modeling function that 21 | users will call. For example, \code{"linear_regression"}. This will be used to 22 | populate the skeleton. Spaces are not allowed.} 23 | 24 | \item{fields}{A named list of fields to add to DESCRIPTION, 25 | potentially overriding default values. See \code{usethis::use_description()} for 26 | how you can set personalized defaults using package options.} 27 | 28 | \item{open}{If TRUE, activates the new project: 29 | \itemize{ 30 | \item If RStudio desktop, the package is opened in a new session. 31 | \item If on RStudio server, the current RStudio project is activated. 32 | \item Otherwise, the working directory and active project is changed. 33 | }} 34 | } 35 | \value{ 36 | \code{create_modeling_package()} returns the project path invisibly. 37 | 38 | \code{use_modeling_deps()} returns invisibly. 39 | 40 | \code{use_modeling_files()} return \code{model} invisibly. 41 | } 42 | \description{ 43 | \code{create_modeling_package()} will: 44 | \itemize{ 45 | \item Call \code{usethis::create_package()} to set up a new R package. 46 | \item Call \code{use_modeling_deps()}. 47 | \item Call \code{use_modeling_files()}. 48 | } 49 | 50 | \code{use_modeling_deps()} will: 51 | \itemize{ 52 | \item Add hardhat, rlang, and stats to Imports 53 | \item Add recipes to Suggests 54 | \item If roxygen2 is available, use roxygen markdown 55 | } 56 | 57 | \code{use_modeling_files()} will: 58 | \itemize{ 59 | \item Add a package documentation file 60 | \item Generate and populate 3 files in \verb{R/}: 61 | \itemize{ 62 | \item \code{{{model}}-constructor.R} 63 | \item \code{{{model}}-fit.R} 64 | \item \code{{{model}}-predict.R} 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /man/mold.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mold.R 3 | \name{mold} 4 | \alias{mold} 5 | \title{Mold data for modeling} 6 | \usage{ 7 | mold(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object. See the method specific implementations linked in the 11 | Description for more information.} 12 | 13 | \item{...}{Not used.} 14 | } 15 | \value{ 16 | A named list containing 4 elements: 17 | \itemize{ 18 | \item \code{predictors}: A tibble containing the molded predictors to be used in the 19 | model. 20 | \item \code{outcomes}: A tibble containing the molded outcomes to be used in the 21 | model. 22 | \item \code{blueprint}: A method specific \code{"hardhat_blueprint"} object for use when 23 | making predictions. 24 | \item \code{extras}: Either \code{NULL} if the blueprint returns no extra information, 25 | or a named list containing the extra information. 26 | } 27 | } 28 | \description{ 29 | \code{mold()} applies the appropriate processing steps required to get training 30 | data ready to be fed into a model. It does this through the use of various 31 | \emph{blueprints} that understand how to preprocess data that come in various 32 | forms, such as a formula or a recipe. 33 | 34 | All blueprints have consistent return values with the others, but each is 35 | unique enough to have its own help page. Click through below to learn 36 | how to use each one in conjunction with \code{mold()}. 37 | \itemize{ 38 | \item XY Method - \code{\link[=default_xy_blueprint]{default_xy_blueprint()}} 39 | \item Formula Method - \code{\link[=default_formula_blueprint]{default_formula_blueprint()}} 40 | \item Recipes Method - \code{\link[=default_recipe_blueprint]{default_recipe_blueprint()}} 41 | } 42 | } 43 | \examples{ 44 | \dontshow{if (rlang::is_installed(c("recipes"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 45 | # See the method specific documentation linked in Description 46 | # for the details of each blueprint, and more examples. 47 | 48 | # XY 49 | mold(iris["Sepal.Width"], iris$Species) 50 | 51 | # Formula 52 | mold(Species ~ Sepal.Width, iris) 53 | 54 | # Recipe 55 | library(recipes) 56 | mold(recipe(Species ~ Sepal.Width, iris), iris) 57 | \dontshow{\}) # examplesIf} 58 | } 59 | -------------------------------------------------------------------------------- /man/new_case_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{new_case_weights} 4 | \alias{new_case_weights} 5 | \title{Extend case weights} 6 | \usage{ 7 | new_case_weights(x, ..., class) 8 | } 9 | \arguments{ 10 | \item{x}{An integer or double vector.} 11 | 12 | \item{...}{Name-value pairs defining attributes} 13 | 14 | \item{class}{Name of subclass.} 15 | } 16 | \value{ 17 | A new subclassed case weights vector. 18 | } 19 | \description{ 20 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 21 | 22 | \code{new_case_weights()} is a developer oriented function for constructing a new 23 | case weights type. The \verb{} type itself is an \emph{abstract} type 24 | with very little functionality. Because of this, \code{class} is a required 25 | argument. 26 | } 27 | \examples{ 28 | new_case_weights(1:5, class = "my_weights") 29 | } 30 | -------------------------------------------------------------------------------- /man/new_frequency_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{new_frequency_weights} 4 | \alias{new_frequency_weights} 5 | \title{Construct a frequency weights vector} 6 | \usage{ 7 | new_frequency_weights(x = integer(), ..., class = character()) 8 | } 9 | \arguments{ 10 | \item{x}{An integer vector.} 11 | 12 | \item{...}{Name-value pairs defining attributes} 13 | 14 | \item{class}{Name of subclass.} 15 | } 16 | \value{ 17 | A new frequency weights vector. 18 | } 19 | \description{ 20 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 21 | 22 | \code{new_frequency_weights()} is a developer oriented function for constructing 23 | a new frequency weights vector. Generally, you should use 24 | \code{\link[=frequency_weights]{frequency_weights()}} instead. 25 | } 26 | \examples{ 27 | new_frequency_weights() 28 | new_frequency_weights(1:5) 29 | } 30 | -------------------------------------------------------------------------------- /man/new_importance_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/case-weights.R 3 | \name{new_importance_weights} 4 | \alias{new_importance_weights} 5 | \title{Construct an importance weights vector} 6 | \usage{ 7 | new_importance_weights(x = double(), ..., class = character()) 8 | } 9 | \arguments{ 10 | \item{x}{A double vector.} 11 | 12 | \item{...}{Name-value pairs defining attributes} 13 | 14 | \item{class}{Name of subclass.} 15 | } 16 | \value{ 17 | A new importance weights vector. 18 | } 19 | \description{ 20 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 21 | 22 | \code{new_importance_weights()} is a developer oriented function for constructing 23 | a new importance weights vector. Generally, you should use 24 | \code{\link[=importance_weights]{importance_weights()}} instead. 25 | } 26 | \examples{ 27 | new_importance_weights() 28 | new_importance_weights(c(1.5, 2.3, 10)) 29 | } 30 | -------------------------------------------------------------------------------- /man/new_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{new_model} 4 | \alias{new_model} 5 | \title{Constructor for a base model} 6 | \usage{ 7 | new_model(..., blueprint = default_xy_blueprint(), class = character()) 8 | } 9 | \arguments{ 10 | \item{...}{Name-value pairs for elements specific to the model defined by 11 | \code{class}.} 12 | 13 | \item{blueprint}{A preprocessing \code{blueprint} returned from a call to \code{\link[=mold]{mold()}}.} 14 | 15 | \item{class}{A character vector representing the class of the model.} 16 | } 17 | \value{ 18 | A new scalar model object, represented as a classed list with named elements 19 | specified in \code{...}. 20 | } 21 | \description{ 22 | A \strong{model} is a \emph{scalar object}, as classified in 23 | \href{https://adv-r.hadley.nz/s3.html#object-styles}{Advanced R}. As such, it 24 | takes uniquely named elements in \code{...} and combines them into a list with 25 | a class of \code{class}. This entire object represent a single model. 26 | } 27 | \details{ 28 | Because every model should have multiple interfaces, including formula 29 | and \code{recipes} interfaces, all models should have a \code{blueprint} that 30 | can process new data when \code{predict()} is called. The easiest way to generate 31 | an blueprint with all of the information required at prediction time is to 32 | use the one that is returned from a call to \code{\link[=mold]{mold()}}. 33 | } 34 | \examples{ 35 | new_model( 36 | custom_element = "my-elem", 37 | blueprint = default_xy_blueprint(), 38 | class = "custom_model" 39 | ) 40 | } 41 | -------------------------------------------------------------------------------- /man/quantile_pred.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quantile-pred.R 3 | \name{quantile_pred} 4 | \alias{quantile_pred} 5 | \alias{extract_quantile_levels} 6 | \alias{as_tibble.quantile_pred} 7 | \alias{as.matrix.quantile_pred} 8 | \title{Create a vector containing sets of quantiles} 9 | \usage{ 10 | quantile_pred(values, quantile_levels = double()) 11 | 12 | extract_quantile_levels(x) 13 | 14 | \method{as_tibble}{quantile_pred}(x, ..., .rows = NULL, .name_repair = "minimal", rownames = NULL) 15 | 16 | \method{as.matrix}{quantile_pred}(x, ...) 17 | } 18 | \arguments{ 19 | \item{values}{A matrix of values. Each column should correspond to one of 20 | the quantile levels.} 21 | 22 | \item{quantile_levels}{A vector of probabilities corresponding to \code{values}.} 23 | 24 | \item{x}{An object produced by \code{quantile_pred()}.} 25 | 26 | \item{...}{Not currently used.} 27 | 28 | \item{.rows, .name_repair, rownames}{Arguments not used but required by the 29 | original S3 method.} 30 | } 31 | \value{ 32 | \itemize{ 33 | \item \code{quantile_pred()} returns a vector of values associated with the 34 | quantile levels. 35 | \item \code{extract_quantile_levels()} returns a numeric vector of levels. 36 | \item \code{as_tibble()} returns a tibble with rows \code{".pred_quantile"}, 37 | \code{".quantile_levels"}, and \code{".row"}. 38 | \item \code{as.matrix()} returns an unnamed matrix with rows as samples, columns as 39 | quantile levels, and entries are predictions. 40 | } 41 | } 42 | \description{ 43 | \code{quantile_pred()} is a special vector class used to efficiently store 44 | predictions from a quantile regression model. It requires the same quantile 45 | levels for each row being predicted. 46 | } 47 | \examples{ 48 | .pred_quantile <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8)) 49 | 50 | unclass(.pred_quantile) 51 | 52 | # Access the underlying information 53 | extract_quantile_levels(.pred_quantile) 54 | 55 | # Matrix format 56 | as.matrix(.pred_quantile) 57 | 58 | # Tidy format 59 | library(tibble) 60 | as_tibble(.pred_quantile) 61 | } 62 | -------------------------------------------------------------------------------- /man/recompose.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/recompose.R 3 | \name{recompose} 4 | \alias{recompose} 5 | \title{Recompose a data frame into another form} 6 | \usage{ 7 | recompose(data, ..., composition = "tibble", call = caller_env()) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame.} 11 | 12 | \item{...}{These dots are for future extensions and must be empty.} 13 | 14 | \item{composition}{One of: 15 | \itemize{ 16 | \item \code{"tibble"} to convert to a tibble. 17 | \item \code{"data.frame"} to convert to a base data frame. 18 | \item \code{"matrix"} to convert to a matrix. All columns must be numeric. 19 | \item \code{"dgCMatrix"} to convert to a sparse matrix. All columns must be numeric, 20 | and the Matrix package must be installed. 21 | }} 22 | 23 | \item{call}{The call used for errors and warnings.} 24 | } 25 | \value{ 26 | The output type is determined from the \code{composition}. 27 | } 28 | \description{ 29 | \code{recompose()} takes a data frame and converts it into one of: 30 | \itemize{ 31 | \item A tibble 32 | \item A data frame 33 | \item A matrix 34 | \item A sparse matrix (using the Matrix package) 35 | } 36 | 37 | This is an internal function used only by hardhat and recipes. 38 | } 39 | \examples{ 40 | df <- vctrs::data_frame(x = 1) 41 | 42 | recompose(df) 43 | recompose(df, composition = "matrix") 44 | 45 | # All columns must be numeric to convert to a matrix 46 | df <- vctrs::data_frame(x = 1, y = "a") 47 | try(recompose(df, composition = "matrix")) 48 | } 49 | \keyword{internal} 50 | -------------------------------------------------------------------------------- /man/refresh_blueprint.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/blueprint.R 3 | \name{refresh_blueprint} 4 | \alias{refresh_blueprint} 5 | \title{Refresh a preprocessing blueprint} 6 | \usage{ 7 | refresh_blueprint(blueprint) 8 | } 9 | \arguments{ 10 | \item{blueprint}{A preprocessing blueprint.} 11 | } 12 | \value{ 13 | \code{blueprint} is returned after a call to the corresponding constructor. 14 | } 15 | \description{ 16 | \code{refresh_blueprint()} is a developer facing generic function that is called 17 | at the end of \code{\link[=update_blueprint]{update_blueprint()}}. It simply is a wrapper around the 18 | method specific \verb{new_*_blueprint()} function that runs the updated blueprint 19 | through the constructor again to ensure that all of the elements of the 20 | blueprint are still valid after the update. 21 | } 22 | \details{ 23 | If you implement your own custom \code{blueprint}, you should export a 24 | \code{refresh_blueprint()} method that just calls the constructor for your blueprint 25 | and passes through all of the elements of the blueprint to the constructor. 26 | } 27 | \examples{ 28 | 29 | blueprint <- default_xy_blueprint() 30 | 31 | # This should never be done manually, but is essentially 32 | # what `update_blueprint(blueprint, intercept = TRUE)` does for you 33 | blueprint$intercept <- TRUE 34 | 35 | # Then update_blueprint() will call refresh_blueprint() 36 | # to ensure that the structure is correct 37 | refresh_blueprint(blueprint) 38 | 39 | # So you can't do something like... 40 | blueprint_bad <- blueprint 41 | blueprint_bad$intercept <- 1 42 | 43 | # ...because the constructor will catch it 44 | try(refresh_blueprint(blueprint_bad)) 45 | 46 | # And update_blueprint() catches this automatically 47 | try(update_blueprint(blueprint, intercept = 1)) 48 | } 49 | -------------------------------------------------------------------------------- /man/rmd/one-hot.Rmd: -------------------------------------------------------------------------------- 1 | ```{r load, include = FALSE} 2 | library(dplyr) 3 | ``` 4 | 5 | By default, `model.matrix()` generates binary indicator variables for factor predictors. When the formula does not remove an intercept, an incomplete set of indicators are created; no indicator is made for the first level of the factor. 6 | 7 | For example, `species` and `island` both have three levels but `model.matrix()` creates two indicator variables for each: 8 | 9 | ```{r ref-cell} 10 | library(dplyr) 11 | library(modeldata) 12 | data(penguins) 13 | 14 | levels(penguins$species) 15 | levels(penguins$island) 16 | 17 | model.matrix(~ species + island, data = penguins) |> 18 | colnames() 19 | ``` 20 | 21 | For a formula with no intercept, the first factor is expanded to indicators for _all_ factor levels but all other factors are expanded to all but one (as above): 22 | 23 | ```{r hybrid} 24 | model.matrix(~ 0 + species + island, data = penguins) |> 25 | colnames() 26 | ``` 27 | 28 | For inference, this hybrid encoding can be problematic. 29 | 30 | To generate all indicators, use this contrast: 31 | 32 | ```{r one-hot} 33 | # Switch out the contrast method 34 | old_contr <- options("contrasts")$contrasts 35 | new_contr <- old_contr 36 | new_contr["unordered"] <- "contr_one_hot" 37 | options(contrasts = new_contr) 38 | 39 | model.matrix(~ species + island, data = penguins) |> 40 | colnames() 41 | 42 | options(contrasts = old_contr) 43 | ``` 44 | 45 | Removing the intercept here does not affect the factor encodings. 46 | 47 | 48 | -------------------------------------------------------------------------------- /man/run-forge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forge.R, R/blueprint-formula-default.R, 3 | % R/blueprint-recipe-default.R, R/blueprint-xy-default.R 4 | \name{run-forge} 5 | \alias{run-forge} 6 | \alias{run_forge} 7 | \alias{run_forge.default_formula_blueprint} 8 | \alias{run_forge.default_recipe_blueprint} 9 | \alias{run_forge.default_xy_blueprint} 10 | \title{\code{forge()} according to a blueprint} 11 | \usage{ 12 | run_forge(blueprint, new_data, ..., outcomes = FALSE) 13 | 14 | \method{run_forge}{default_formula_blueprint}(blueprint, new_data, ..., outcomes = FALSE, call = caller_env()) 15 | 16 | \method{run_forge}{default_recipe_blueprint}(blueprint, new_data, ..., outcomes = FALSE, call = caller_env()) 17 | 18 | \method{run_forge}{default_xy_blueprint}(blueprint, new_data, ..., outcomes = FALSE, call = caller_env()) 19 | } 20 | \arguments{ 21 | \item{blueprint}{A preprocessing \code{blueprint}.} 22 | 23 | \item{new_data}{A data frame or matrix of predictors to process. If 24 | \code{outcomes = TRUE}, this should also contain the outcomes to process.} 25 | 26 | \item{...}{Not used.} 27 | 28 | \item{outcomes}{A logical. Should the outcomes be processed and returned 29 | as well?} 30 | 31 | \item{call}{The call used for errors and warnings.} 32 | } 33 | \value{ 34 | \code{run_forge()} methods return the object that is then immediately returned 35 | from \code{forge()}. See the return value section of \code{\link[=forge]{forge()}} to understand what 36 | the structure of the return value should look like. 37 | } 38 | \description{ 39 | This is a developer facing function that is \emph{only} used if you are creating 40 | your own blueprint subclass. It is called from \code{\link[=forge]{forge()}} and dispatches off 41 | the S3 class of the \code{blueprint}. This gives you an opportunity to forge the 42 | new data in a way that is specific to your blueprint. 43 | 44 | \code{run_forge()} is always called from \code{forge()} with the same arguments, unlike 45 | \code{\link[=run_mold]{run_mold()}}, because there aren't different interfaces for calling 46 | \code{forge()}. \code{run_forge()} is always called as: 47 | 48 | \code{run_forge(blueprint, new_data = new_data, outcomes = outcomes)} 49 | 50 | If you write a blueprint subclass for \code{\link[=new_xy_blueprint]{new_xy_blueprint()}}, 51 | \code{\link[=new_recipe_blueprint]{new_recipe_blueprint()}}, \code{\link[=new_formula_blueprint]{new_formula_blueprint()}}, or \code{\link[=new_blueprint]{new_blueprint()}}, 52 | then your \code{run_forge()} method signature must match this. 53 | } 54 | \examples{ 55 | bp <- default_xy_blueprint() 56 | 57 | outcomes <- mtcars["mpg"] 58 | predictors <- mtcars 59 | predictors$mpg <- NULL 60 | 61 | mold <- run_mold(bp, x = predictors, y = outcomes) 62 | 63 | run_forge(mold$blueprint, new_data = predictors) 64 | } 65 | -------------------------------------------------------------------------------- /man/run-mold.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mold.R, R/blueprint-formula-default.R, 3 | % R/blueprint-recipe-default.R, R/blueprint-xy-default.R 4 | \name{run-mold} 5 | \alias{run-mold} 6 | \alias{run_mold} 7 | \alias{run_mold.default_formula_blueprint} 8 | \alias{run_mold.default_recipe_blueprint} 9 | \alias{run_mold.default_xy_blueprint} 10 | \title{\code{mold()} according to a blueprint} 11 | \usage{ 12 | run_mold(blueprint, ...) 13 | 14 | \method{run_mold}{default_formula_blueprint}(blueprint, ..., data, call = caller_env()) 15 | 16 | \method{run_mold}{default_recipe_blueprint}(blueprint, ..., data, call = caller_env()) 17 | 18 | \method{run_mold}{default_xy_blueprint}(blueprint, ..., x, y, call = caller_env()) 19 | } 20 | \arguments{ 21 | \item{blueprint}{A preprocessing blueprint.} 22 | 23 | \item{...}{Not used. Required for extensibility.} 24 | 25 | \item{data}{A data frame or matrix containing the outcomes and predictors.} 26 | 27 | \item{call}{The call used for errors and warnings.} 28 | 29 | \item{x}{A data frame or matrix containing the predictors.} 30 | 31 | \item{y}{A data frame, matrix, or vector containing the outcomes.} 32 | } 33 | \value{ 34 | \code{run_mold()} methods return the object that is then immediately returned from 35 | \code{mold()}. See the return value section of \code{\link[=mold]{mold()}} to understand what the 36 | structure of the return value should look like. 37 | } 38 | \description{ 39 | This is a developer facing function that is \emph{only} used if you are creating 40 | your own blueprint subclass. It is called from \code{\link[=mold]{mold()}} and dispatches off 41 | the S3 class of the \code{blueprint}. This gives you an opportunity to mold the 42 | data in a way that is specific to your blueprint. 43 | 44 | \code{run_mold()} will be called with different arguments depending on the 45 | interface to \code{mold()} that is used: 46 | \itemize{ 47 | \item XY interface: 48 | \itemize{ 49 | \item \code{run_mold(blueprint, x = x, y = y)} 50 | } 51 | \item Formula interface: 52 | \itemize{ 53 | \item \code{run_mold(blueprint, data = data)} 54 | \item Additionally, the \code{blueprint} will have been updated to contain the 55 | \code{formula}. 56 | } 57 | \item Recipe interface: 58 | \itemize{ 59 | \item \code{run_mold(blueprint, data = data)} 60 | \item Additionally, the \code{blueprint} will have been updated to contain the 61 | \code{recipe}. 62 | } 63 | } 64 | 65 | If you write a blueprint subclass for \code{\link[=new_xy_blueprint]{new_xy_blueprint()}}, 66 | \code{\link[=new_recipe_blueprint]{new_recipe_blueprint()}}, or \code{\link[=new_formula_blueprint]{new_formula_blueprint()}} then your \code{run_mold()} 67 | method signature must match whichever interface listed above will be used. 68 | 69 | If you write a completely new blueprint inheriting only from 70 | \code{\link[=new_blueprint]{new_blueprint()}} and write a new \code{\link[=mold]{mold()}} method (because you aren't using 71 | an xy, formula, or recipe interface), then you will have full control over 72 | how \code{run_mold()} will be called. 73 | } 74 | \examples{ 75 | bp <- default_xy_blueprint() 76 | 77 | outcomes <- mtcars["mpg"] 78 | predictors <- mtcars 79 | predictors$mpg <- NULL 80 | 81 | run_mold(bp, x = predictors, y = outcomes) 82 | } 83 | -------------------------------------------------------------------------------- /man/shrink.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shrink.R 3 | \name{shrink} 4 | \alias{shrink} 5 | \title{Subset only required columns} 6 | \usage{ 7 | shrink(data, ptype, ..., call = current_env()) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame containing the data to subset.} 11 | 12 | \item{ptype}{A data frame prototype containing the required columns.} 13 | 14 | \item{...}{These dots are for future extensions and must be empty.} 15 | 16 | \item{call}{The call used for errors and warnings.} 17 | } 18 | \value{ 19 | A tibble containing the required columns. 20 | } 21 | \description{ 22 | \code{shrink()} subsets \code{data} to only contain the required columns specified by 23 | the prototype, \code{ptype}. 24 | } 25 | \details{ 26 | \code{shrink()} is called by \code{\link[=forge]{forge()}} before \code{\link[=scream]{scream()}} and before the actual 27 | processing is done. 28 | } 29 | \examples{ 30 | # --------------------------------------------------------------------------- 31 | # Setup 32 | 33 | train <- iris[1:100, ] 34 | test <- iris[101:150, ] 35 | 36 | # --------------------------------------------------------------------------- 37 | # shrink() 38 | 39 | # mold() is run at model fit time 40 | # and a formula preprocessing blueprint is recorded 41 | x <- mold(log(Sepal.Width) ~ Species, train) 42 | 43 | # Inside the result of mold() are the prototype tibbles 44 | # for the predictors and the outcomes 45 | ptype_pred <- x$blueprint$ptypes$predictors 46 | ptype_out <- x$blueprint$ptypes$outcomes 47 | 48 | # Pass the test data, along with a prototype, to 49 | # shrink() to extract the prototype columns 50 | shrink(test, ptype_pred) 51 | 52 | # To extract the outcomes, just use the 53 | # outcome prototype 54 | shrink(test, ptype_out) 55 | 56 | # shrink() makes sure that the columns 57 | # required by `ptype` actually exist in the data 58 | # and errors nicely when they don't 59 | test2 <- subset(test, select = -Species) 60 | try(shrink(test2, ptype_pred)) 61 | } 62 | -------------------------------------------------------------------------------- /man/spruce-multiple.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/spruce.R 3 | \name{spruce-multiple} 4 | \alias{spruce-multiple} 5 | \alias{spruce_numeric_multiple} 6 | \alias{spruce_class_multiple} 7 | \alias{spruce_prob_multiple} 8 | \title{Spruce up multi-outcome predictions} 9 | \usage{ 10 | spruce_numeric_multiple(...) 11 | 12 | spruce_class_multiple(...) 13 | 14 | spruce_prob_multiple(...) 15 | } 16 | \arguments{ 17 | \item{...}{Multiple vectors of predictions: 18 | \itemize{ 19 | \item For \code{spruce_numeric_multiple()}, numeric vectors of equal size. 20 | \item For \code{spruce_class_multiple()}, factors of "hard" class predictions of 21 | equal size. 22 | \item For \code{spruce_prob_multiple()}, tibbles of equal size, which are the result 23 | of calling \code{\link[=spruce_prob]{spruce_prob()}} on each matrix of prediction probabilities. 24 | } 25 | 26 | If the \code{...} are named, then this name will be used as the suffix on the 27 | resulting column name, otherwise a positional index will be used.} 28 | } 29 | \value{ 30 | \itemize{ 31 | \item For \code{spruce_numeric_multiple()}, a tibble of numeric columns named with the 32 | pattern \verb{.pred_*}. 33 | \item For \code{spruce_class_multiple()}, a tibble of factor columns named with the 34 | pattern \verb{.pred_class_*}. 35 | \item For \code{spruce_prob_multiple()}, a tibble of data frame columns named with the 36 | pattern \verb{.pred_*}. 37 | } 38 | } 39 | \description{ 40 | This family of \verb{spruce_*_multiple()} functions converts multi-outcome 41 | predictions into a standardized format. They are generally called from a 42 | prediction implementation function for the specific \code{type} of prediction to 43 | return. 44 | } 45 | \examples{ 46 | spruce_numeric_multiple(1:3, foo = 2:4) 47 | 48 | spruce_class_multiple( 49 | one_step = factor(c("a", "b", "c")), 50 | two_step = factor(c("a", "c", "c")) 51 | ) 52 | 53 | one_step <- matrix(c(.3, .7, .0, .1, .3, .6), nrow = 2, byrow = TRUE) 54 | two_step <- matrix(c(.2, .7, .1, .2, .4, .4), nrow = 2, byrow = TRUE) 55 | binary <- matrix(c(.5, .5, .4, .6), nrow = 2, byrow = TRUE) 56 | 57 | spruce_prob_multiple( 58 | one_step = spruce_prob(c("a", "b", "c"), one_step), 59 | two_step = spruce_prob(c("a", "b", "c"), two_step), 60 | binary = spruce_prob(c("yes", "no"), binary) 61 | ) 62 | } 63 | -------------------------------------------------------------------------------- /man/spruce.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/spruce.R 3 | \name{spruce} 4 | \alias{spruce} 5 | \alias{spruce_numeric} 6 | \alias{spruce_class} 7 | \alias{spruce_prob} 8 | \title{Spruce up predictions} 9 | \usage{ 10 | spruce_numeric(pred) 11 | 12 | spruce_class(pred_class) 13 | 14 | spruce_prob(pred_levels, prob_matrix) 15 | } 16 | \arguments{ 17 | \item{pred}{(\code{type = "numeric"}) A numeric vector of predictions.} 18 | 19 | \item{pred_class}{(\code{type = "class"}) A factor of "hard" class predictions.} 20 | 21 | \item{pred_levels, prob_matrix}{(\code{type = "prob"}) 22 | \itemize{ 23 | \item \code{pred_levels} should be a character vector of the original levels of 24 | the outcome used in training. 25 | \item \code{prob_matrix} should be a numeric matrix of class probabilities with 26 | as many columns as levels in \code{pred_levels}, and in the same order. 27 | }} 28 | } 29 | \value{ 30 | A tibble, ideally with the same number of rows as the \code{new_data} passed 31 | to \code{predict()}. The column names and number of columns vary based on the 32 | function used, but are standardized. 33 | } 34 | \description{ 35 | The family of \verb{spruce_*()} functions convert predictions into a 36 | standardized format. They are generally called from a prediction 37 | implementation function for the specific \code{type} of prediction to return. 38 | } 39 | \details{ 40 | After running a \verb{spruce_*()} function, you should \emph{always} use the validation 41 | function \code{validate_prediction_size()} to ensure that the number of rows 42 | being returned is the same as the number of rows in the input (\code{new_data}). 43 | } 44 | -------------------------------------------------------------------------------- /man/standardize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/standardize.R 3 | \name{standardize} 4 | \alias{standardize} 5 | \title{Standardize the outcome} 6 | \usage{ 7 | standardize(y) 8 | } 9 | \arguments{ 10 | \item{y}{The outcome. This can be: 11 | \itemize{ 12 | \item A factor vector 13 | \item A numeric vector 14 | \item A 1D numeric array 15 | \item A numeric matrix with column names 16 | \item A 2D numeric array with column names 17 | \item A data frame with numeric or factor columns 18 | }} 19 | } 20 | \value{ 21 | All possible values of \code{y} are transformed into a \code{tibble} for 22 | standardization. Vectors are transformed into a \code{tibble} with 23 | a single column named \code{".outcome"}. 24 | } 25 | \description{ 26 | Most of the time, the input to a model should be flexible enough to capture 27 | a number of different input types from the user. \code{standardize()} focuses 28 | on capturing the flexibility in the \emph{outcome}. 29 | } 30 | \details{ 31 | \code{standardize()} is called from \code{\link[=mold]{mold()}} when using an XY interface (i.e. 32 | a \code{y} argument was supplied). 33 | } 34 | \examples{ 35 | standardize(1:5) 36 | 37 | standardize(factor(letters[1:5])) 38 | 39 | mat <- matrix(1:10, ncol = 2) 40 | colnames(mat) <- c("a", "b") 41 | standardize(mat) 42 | 43 | df <- data.frame(x = 1:5, y = 6:10) 44 | standardize(df) 45 | } 46 | -------------------------------------------------------------------------------- /man/tune.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tune.R 3 | \name{tune} 4 | \alias{tune} 5 | \title{Mark arguments for tuning} 6 | \usage{ 7 | tune(id = "") 8 | } 9 | \arguments{ 10 | \item{id}{A single character value that can be used to differentiate 11 | parameters that are used in multiple places but have the same name, or if 12 | the user wants to add a note to the specified parameter.} 13 | } 14 | \value{ 15 | A call object that echos the user's input. 16 | } 17 | \description{ 18 | \code{tune()} is an argument placeholder to be used with the recipes, parsnip, and 19 | tune packages. It marks recipes step and parsnip model arguments for tuning. 20 | } 21 | \examples{ 22 | \dontshow{if (rlang::is_installed(c("recipes"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 23 | tune() 24 | tune("your name here") 25 | 26 | # In practice, `tune()` is used alongside recipes or parsnip to mark 27 | # specific arguments for tuning 28 | library(recipes) 29 | 30 | recipe(mpg ~ ., data = mtcars) |> 31 | step_normalize(all_numeric_predictors()) |> 32 | step_pca(all_numeric_predictors, num_comp = tune()) 33 | \dontshow{\}) # examplesIf} 34 | } 35 | \seealso{ 36 | \code{tune::tune_grid()}, \code{tune::tune_bayes()} 37 | } 38 | -------------------------------------------------------------------------------- /man/update_blueprint.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/blueprint.R 3 | \name{update_blueprint} 4 | \alias{update_blueprint} 5 | \title{Update a preprocessing blueprint} 6 | \usage{ 7 | update_blueprint(blueprint, ...) 8 | } 9 | \arguments{ 10 | \item{blueprint}{A preprocessing blueprint.} 11 | 12 | \item{...}{Name-value pairs of \emph{existing} elements in \code{blueprint} that should 13 | be updated.} 14 | } 15 | \description{ 16 | \code{update_blueprint()} is the correct way to alter elements of an existing 17 | \code{blueprint} object. It has two benefits over just doing 18 | \code{blueprint$elem <- new_elem}. 19 | \itemize{ 20 | \item The name you are updating \emph{must} already exist in the blueprint. This prevents 21 | you from accidentally updating non-existent elements. 22 | \item The constructor for the blueprint is automatically run after the update by 23 | \code{refresh_blueprint()} to ensure that the blueprint is still valid. 24 | } 25 | } 26 | \examples{ 27 | 28 | blueprint <- default_xy_blueprint() 29 | 30 | # `intercept` defaults to FALSE 31 | blueprint 32 | 33 | update_blueprint(blueprint, intercept = TRUE) 34 | 35 | # Can't update non-existent elements 36 | try(update_blueprint(blueprint, intercpt = TRUE)) 37 | 38 | # Can't add non-valid elements 39 | try(update_blueprint(blueprint, intercept = 1)) 40 | } 41 | -------------------------------------------------------------------------------- /man/validate_no_formula_duplication.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{validate_no_formula_duplication} 4 | \alias{validate_no_formula_duplication} 5 | \alias{check_no_formula_duplication} 6 | \title{Ensure no duplicate terms appear in \code{formula}} 7 | \usage{ 8 | validate_no_formula_duplication(formula, original = FALSE) 9 | 10 | check_no_formula_duplication(formula, original = FALSE) 11 | } 12 | \arguments{ 13 | \item{formula}{A formula to check.} 14 | 15 | \item{original}{A logical. Should the original names be checked, or should 16 | the names after processing be used? If \code{FALSE}, \code{y ~ log(y)} is allowed 17 | because the names are \code{"y"} and \code{"log(y)"}, if \code{TRUE}, \code{y ~ log(y)} is not 18 | allowed because the original names are both \code{"y"}.} 19 | } 20 | \value{ 21 | \code{validate_no_formula_duplication()} returns \code{formula} invisibly. 22 | 23 | \code{check_no_formula_duplication()} returns a named list of two components, 24 | \code{ok} and \code{duplicates}. 25 | } 26 | \description{ 27 | validate - asserts the following: 28 | \itemize{ 29 | \item \code{formula} must not have duplicates terms on the left and right hand 30 | side of the formula. 31 | } 32 | 33 | check - returns the following: 34 | \itemize{ 35 | \item \code{ok} A logical. Does the check pass? 36 | \item \code{duplicates} A character vector. The duplicate terms. 37 | } 38 | } 39 | \section{Validation}{ 40 | 41 | 42 | hardhat provides validation functions at two levels. 43 | \itemize{ 44 | \item \verb{check_*()}: \emph{check a condition, and return a list}. The list 45 | always contains at least one element, \code{ok}, a logical that specifies if the 46 | check passed. Each check also has check specific elements in the returned 47 | list that can be used to construct meaningful error messages. 48 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These 49 | functions call their corresponding check function, and 50 | then provide a default error message. If you, as a developer, want a 51 | different error message, then call the \verb{check_*()} function yourself, 52 | and provide your own validation function. 53 | } 54 | } 55 | 56 | \examples{ 57 | # All good 58 | check_no_formula_duplication(y ~ x) 59 | 60 | # Not good! 61 | check_no_formula_duplication(y ~ y) 62 | 63 | # This is generally okay 64 | check_no_formula_duplication(y ~ log(y)) 65 | 66 | # But you can be more strict 67 | check_no_formula_duplication(y ~ log(y), original = TRUE) 68 | 69 | # This would throw an error 70 | try(validate_no_formula_duplication(log(y) ~ log(y))) 71 | } 72 | \seealso{ 73 | Other validation functions: 74 | \code{\link{validate_column_names}()}, 75 | \code{\link{validate_outcomes_are_binary}()}, 76 | \code{\link{validate_outcomes_are_factors}()}, 77 | \code{\link{validate_outcomes_are_numeric}()}, 78 | \code{\link{validate_outcomes_are_univariate}()}, 79 | \code{\link{validate_prediction_size}()}, 80 | \code{\link{validate_predictors_are_numeric}()} 81 | } 82 | \concept{validation functions} 83 | -------------------------------------------------------------------------------- /man/validate_outcomes_are_binary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{validate_outcomes_are_binary} 4 | \alias{validate_outcomes_are_binary} 5 | \alias{check_outcomes_are_binary} 6 | \title{Ensure that the outcome has binary factors} 7 | \usage{ 8 | validate_outcomes_are_binary(outcomes) 9 | 10 | check_outcomes_are_binary(outcomes, ..., call = caller_env()) 11 | } 12 | \arguments{ 13 | \item{outcomes}{An object to check.} 14 | 15 | \item{...}{These dots are for future extensions and must be empty.} 16 | 17 | \item{call}{The call used for errors and warnings.} 18 | } 19 | \value{ 20 | \code{validate_outcomes_are_binary()} returns \code{outcomes} invisibly. 21 | 22 | \code{check_outcomes_are_binary()} returns a named list of three components, 23 | \code{ok}, \code{bad_cols}, and \code{num_levels}. 24 | } 25 | \description{ 26 | validate - asserts the following: 27 | \itemize{ 28 | \item \code{outcomes} must have binary factor columns. 29 | } 30 | 31 | check - returns the following: 32 | \itemize{ 33 | \item \code{ok} A logical. Does the check pass? 34 | \item \code{bad_cols} A character vector. The names of the columns with problems. 35 | \item \code{num_levels} An integer vector. The actual number of levels of the columns 36 | with problems. 37 | } 38 | } 39 | \details{ 40 | The expected way to use this validation function is to supply it the 41 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}. 42 | } 43 | \section{Validation}{ 44 | 45 | 46 | hardhat provides validation functions at two levels. 47 | \itemize{ 48 | \item \verb{check_*()}: \emph{check a condition, and return a list}. The list 49 | always contains at least one element, \code{ok}, a logical that specifies if the 50 | check passed. Each check also has check specific elements in the returned 51 | list that can be used to construct meaningful error messages. 52 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These 53 | functions call their corresponding check function, and 54 | then provide a default error message. If you, as a developer, want a 55 | different error message, then call the \verb{check_*()} function yourself, 56 | and provide your own validation function. 57 | } 58 | } 59 | 60 | \examples{ 61 | # Not a binary factor. 0 levels 62 | check_outcomes_are_binary(data.frame(x = 1)) 63 | 64 | # Not a binary factor. 1 level 65 | check_outcomes_are_binary(data.frame(x = factor("A"))) 66 | 67 | # All good 68 | check_outcomes_are_binary(data.frame(x = factor(c("A", "B")))) 69 | } 70 | \seealso{ 71 | Other validation functions: 72 | \code{\link{validate_column_names}()}, 73 | \code{\link{validate_no_formula_duplication}()}, 74 | \code{\link{validate_outcomes_are_factors}()}, 75 | \code{\link{validate_outcomes_are_numeric}()}, 76 | \code{\link{validate_outcomes_are_univariate}()}, 77 | \code{\link{validate_prediction_size}()}, 78 | \code{\link{validate_predictors_are_numeric}()} 79 | } 80 | \concept{validation functions} 81 | -------------------------------------------------------------------------------- /man/validate_outcomes_are_factors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{validate_outcomes_are_factors} 4 | \alias{validate_outcomes_are_factors} 5 | \alias{check_outcomes_are_factors} 6 | \title{Ensure that the outcome has only factor columns} 7 | \usage{ 8 | validate_outcomes_are_factors(outcomes) 9 | 10 | check_outcomes_are_factors(outcomes, ..., call = caller_env()) 11 | } 12 | \arguments{ 13 | \item{outcomes}{An object to check.} 14 | 15 | \item{...}{These dots are for future extensions and must be empty.} 16 | 17 | \item{call}{The call used for errors and warnings.} 18 | } 19 | \value{ 20 | \code{validate_outcomes_are_factors()} returns \code{outcomes} invisibly. 21 | 22 | \code{check_outcomes_are_factors()} returns a named list of two components, 23 | \code{ok} and \code{bad_classes}. 24 | } 25 | \description{ 26 | validate - asserts the following: 27 | \itemize{ 28 | \item \code{outcomes} must have factor columns. 29 | } 30 | 31 | check - returns the following: 32 | \itemize{ 33 | \item \code{ok} A logical. Does the check pass? 34 | \item \code{bad_classes} A named list. The names are the names of problematic columns, 35 | and the values are the classes of the matching column. 36 | } 37 | } 38 | \details{ 39 | The expected way to use this validation function is to supply it the 40 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}. 41 | } 42 | \section{Validation}{ 43 | 44 | 45 | hardhat provides validation functions at two levels. 46 | \itemize{ 47 | \item \verb{check_*()}: \emph{check a condition, and return a list}. The list 48 | always contains at least one element, \code{ok}, a logical that specifies if the 49 | check passed. Each check also has check specific elements in the returned 50 | list that can be used to construct meaningful error messages. 51 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These 52 | functions call their corresponding check function, and 53 | then provide a default error message. If you, as a developer, want a 54 | different error message, then call the \verb{check_*()} function yourself, 55 | and provide your own validation function. 56 | } 57 | } 58 | 59 | \examples{ 60 | # Not a factor column. 61 | check_outcomes_are_factors(data.frame(x = 1)) 62 | 63 | # All good 64 | check_outcomes_are_factors(data.frame(x = factor(c("A", "B")))) 65 | } 66 | \seealso{ 67 | Other validation functions: 68 | \code{\link{validate_column_names}()}, 69 | \code{\link{validate_no_formula_duplication}()}, 70 | \code{\link{validate_outcomes_are_binary}()}, 71 | \code{\link{validate_outcomes_are_numeric}()}, 72 | \code{\link{validate_outcomes_are_univariate}()}, 73 | \code{\link{validate_prediction_size}()}, 74 | \code{\link{validate_predictors_are_numeric}()} 75 | } 76 | \concept{validation functions} 77 | -------------------------------------------------------------------------------- /man/validate_outcomes_are_numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{validate_outcomes_are_numeric} 4 | \alias{validate_outcomes_are_numeric} 5 | \alias{check_outcomes_are_numeric} 6 | \title{Ensure outcomes are all numeric} 7 | \usage{ 8 | validate_outcomes_are_numeric(outcomes) 9 | 10 | check_outcomes_are_numeric(outcomes, ..., call = caller_env()) 11 | } 12 | \arguments{ 13 | \item{outcomes}{An object to check.} 14 | 15 | \item{...}{These dots are for future extensions and must be empty.} 16 | 17 | \item{call}{The call used for errors and warnings.} 18 | } 19 | \value{ 20 | \code{validate_outcomes_are_numeric()} returns \code{outcomes} invisibly. 21 | 22 | \code{check_outcomes_are_numeric()} returns a named list of two components, 23 | \code{ok} and \code{bad_classes}. 24 | } 25 | \description{ 26 | validate - asserts the following: 27 | \itemize{ 28 | \item \code{outcomes} must have numeric columns. 29 | } 30 | 31 | check - returns the following: 32 | \itemize{ 33 | \item \code{ok} A logical. Does the check pass? 34 | \item \code{bad_classes} A named list. The names are the names of problematic columns, 35 | and the values are the classes of the matching column. 36 | } 37 | } 38 | \details{ 39 | The expected way to use this validation function is to supply it the 40 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}. 41 | } 42 | \section{Validation}{ 43 | 44 | 45 | hardhat provides validation functions at two levels. 46 | \itemize{ 47 | \item \verb{check_*()}: \emph{check a condition, and return a list}. The list 48 | always contains at least one element, \code{ok}, a logical that specifies if the 49 | check passed. Each check also has check specific elements in the returned 50 | list that can be used to construct meaningful error messages. 51 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These 52 | functions call their corresponding check function, and 53 | then provide a default error message. If you, as a developer, want a 54 | different error message, then call the \verb{check_*()} function yourself, 55 | and provide your own validation function. 56 | } 57 | } 58 | 59 | \examples{ 60 | # All good 61 | check_outcomes_are_numeric(mtcars) 62 | 63 | # Species is not numeric 64 | check_outcomes_are_numeric(iris) 65 | 66 | # This gives an intelligent error message 67 | try(validate_outcomes_are_numeric(iris)) 68 | } 69 | \seealso{ 70 | Other validation functions: 71 | \code{\link{validate_column_names}()}, 72 | \code{\link{validate_no_formula_duplication}()}, 73 | \code{\link{validate_outcomes_are_binary}()}, 74 | \code{\link{validate_outcomes_are_factors}()}, 75 | \code{\link{validate_outcomes_are_univariate}()}, 76 | \code{\link{validate_prediction_size}()}, 77 | \code{\link{validate_predictors_are_numeric}()} 78 | } 79 | \concept{validation functions} 80 | -------------------------------------------------------------------------------- /man/validate_outcomes_are_univariate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{validate_outcomes_are_univariate} 4 | \alias{validate_outcomes_are_univariate} 5 | \alias{check_outcomes_are_univariate} 6 | \title{Ensure that the outcome is univariate} 7 | \usage{ 8 | validate_outcomes_are_univariate(outcomes) 9 | 10 | check_outcomes_are_univariate(outcomes) 11 | } 12 | \arguments{ 13 | \item{outcomes}{An object to check.} 14 | } 15 | \value{ 16 | \code{validate_outcomes_are_univariate()} returns \code{outcomes} invisibly. 17 | 18 | \code{check_outcomes_are_univariate()} returns a named list of two components, 19 | \code{ok} and \code{n_cols}. 20 | } 21 | \description{ 22 | validate - asserts the following: 23 | \itemize{ 24 | \item \code{outcomes} must have 1 column. Atomic vectors are treated as 25 | 1 column matrices. 26 | } 27 | 28 | check - returns the following: 29 | \itemize{ 30 | \item \code{ok} A logical. Does the check pass? 31 | \item \code{n_cols} A single numeric. The actual number of columns. 32 | } 33 | } 34 | \details{ 35 | The expected way to use this validation function is to supply it the 36 | \verb{$outcomes} element of the result of a call to \code{\link[=mold]{mold()}}. 37 | } 38 | \section{Validation}{ 39 | 40 | 41 | hardhat provides validation functions at two levels. 42 | \itemize{ 43 | \item \verb{check_*()}: \emph{check a condition, and return a list}. The list 44 | always contains at least one element, \code{ok}, a logical that specifies if the 45 | check passed. Each check also has check specific elements in the returned 46 | list that can be used to construct meaningful error messages. 47 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These 48 | functions call their corresponding check function, and 49 | then provide a default error message. If you, as a developer, want a 50 | different error message, then call the \verb{check_*()} function yourself, 51 | and provide your own validation function. 52 | } 53 | } 54 | 55 | \examples{ 56 | validate_outcomes_are_univariate(data.frame(x = 1)) 57 | 58 | try(validate_outcomes_are_univariate(mtcars)) 59 | } 60 | \seealso{ 61 | Other validation functions: 62 | \code{\link{validate_column_names}()}, 63 | \code{\link{validate_no_formula_duplication}()}, 64 | \code{\link{validate_outcomes_are_binary}()}, 65 | \code{\link{validate_outcomes_are_factors}()}, 66 | \code{\link{validate_outcomes_are_numeric}()}, 67 | \code{\link{validate_prediction_size}()}, 68 | \code{\link{validate_predictors_are_numeric}()} 69 | } 70 | \concept{validation functions} 71 | -------------------------------------------------------------------------------- /man/validate_predictors_are_numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{validate_predictors_are_numeric} 4 | \alias{validate_predictors_are_numeric} 5 | \alias{check_predictors_are_numeric} 6 | \title{Ensure predictors are all numeric} 7 | \usage{ 8 | validate_predictors_are_numeric(predictors) 9 | 10 | check_predictors_are_numeric(predictors, ..., call = caller_env()) 11 | } 12 | \arguments{ 13 | \item{predictors}{An object to check.} 14 | 15 | \item{...}{These dots are for future extensions and must be empty.} 16 | 17 | \item{call}{The call used for errors and warnings.} 18 | } 19 | \value{ 20 | \code{validate_predictors_are_numeric()} returns \code{predictors} invisibly. 21 | 22 | \code{check_predictors_are_numeric()} returns a named list of two components, 23 | \code{ok}, and \code{bad_classes}. 24 | } 25 | \description{ 26 | validate - asserts the following: 27 | \itemize{ 28 | \item \code{predictors} must have numeric columns. 29 | } 30 | 31 | check - returns the following: 32 | \itemize{ 33 | \item \code{ok} A logical. Does the check pass? 34 | \item \code{bad_classes} A named list. The names are the names of problematic columns, 35 | and the values are the classes of the matching column. 36 | } 37 | } 38 | \details{ 39 | The expected way to use this validation function is to supply it the 40 | \verb{$predictors} element of the result of a call to \code{\link[=mold]{mold()}}. 41 | } 42 | \section{Validation}{ 43 | 44 | 45 | hardhat provides validation functions at two levels. 46 | \itemize{ 47 | \item \verb{check_*()}: \emph{check a condition, and return a list}. The list 48 | always contains at least one element, \code{ok}, a logical that specifies if the 49 | check passed. Each check also has check specific elements in the returned 50 | list that can be used to construct meaningful error messages. 51 | \item \verb{validate_*()}: \emph{check a condition, and error if it does not pass}. These 52 | functions call their corresponding check function, and 53 | then provide a default error message. If you, as a developer, want a 54 | different error message, then call the \verb{check_*()} function yourself, 55 | and provide your own validation function. 56 | } 57 | } 58 | 59 | \examples{ 60 | # All good 61 | check_predictors_are_numeric(mtcars) 62 | 63 | # Species is not numeric 64 | check_predictors_are_numeric(iris) 65 | 66 | # This gives an intelligent error message 67 | try(validate_predictors_are_numeric(iris)) 68 | } 69 | \seealso{ 70 | Other validation functions: 71 | \code{\link{validate_column_names}()}, 72 | \code{\link{validate_no_formula_duplication}()}, 73 | \code{\link{validate_outcomes_are_binary}()}, 74 | \code{\link{validate_outcomes_are_factors}()}, 75 | \code{\link{validate_outcomes_are_numeric}()}, 76 | \code{\link{validate_outcomes_are_univariate}()}, 77 | \code{\link{validate_prediction_size}()} 78 | } 79 | \concept{validation functions} 80 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /revdep/.gitignore: -------------------------------------------------------------------------------- 1 | checks 2 | library 3 | checks.noindex 4 | library.noindex 5 | data.sqlite 6 | *.html 7 | cloud.noindex 8 | -------------------------------------------------------------------------------- /revdep/README.md: -------------------------------------------------------------------------------- 1 | # Revdeps 2 | 3 | ## Failed to check (2) 4 | 5 | |package |version |error |warning |note | 6 | |:---------|:-------|:-----|:-------|:----| 7 | |censored |? | | | | 8 | |lnmixsurv |? | | | | 9 | 10 | ## New problems (1) 11 | 12 | |package |version |error |warning |note | 13 | |:-----------|:-------|:-----|:-------|:----| 14 | |[viralmodels](problems.md#viralmodels)|1.3.1 | |__+1__ | | 15 | 16 | -------------------------------------------------------------------------------- /revdep/cran.md: -------------------------------------------------------------------------------- 1 | ## revdepcheck results 2 | 3 | We checked 37 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. 4 | 5 | * We saw 1 new problems 6 | * We failed to check 2 packages 7 | 8 | Issues with CRAN packages are summarised below. 9 | 10 | ### New problems 11 | (This reports the first line of each new failure) 12 | 13 | * viralmodels 14 | checking whether package ‘viralmodels’ can be installed ... WARNING 15 | 16 | ### Failed to check 17 | 18 | * censored (NA) 19 | * lnmixsurv (NA) 20 | -------------------------------------------------------------------------------- /revdep/email.yml: -------------------------------------------------------------------------------- 1 | release_date: ??? 2 | rel_release_date: ??? 3 | my_news_url: ??? 4 | release_version: ??? 5 | release_details: ??? 6 | -------------------------------------------------------------------------------- /revdep/problems.md: -------------------------------------------------------------------------------- 1 | # viralmodels 2 | 3 |
4 | 5 | * Version: 1.3.1 6 | * GitHub: https://github.com/juanv66x/viralmodels 7 | * Source code: https://github.com/cran/viralmodels 8 | * Date/Publication: 2024-10-18 12:00:02 UTC 9 | * Number of recursive dependencies: 185 10 | 11 | Run `revdepcheck::cloud_details(, "viralmodels")` for more info 12 | 13 |
14 | 15 | ## Newly broken 16 | 17 | * checking whether package ‘viralmodels’ can be installed ... WARNING 18 | ``` 19 | Found the following significant warnings: 20 | Warning: replacing previous import ‘hardhat::contr_one_hot’ by ‘parsnip::contr_one_hot’ when loading ‘viralmodels’ 21 | See ‘/tmp/workdir/viralmodels/new/viralmodels.Rcheck/00install.out’ for details. 22 | ``` 23 | 24 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(hardhat) 3 | 4 | test_check("hardhat") 5 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/blueprint-formula-default.md: -------------------------------------------------------------------------------- 1 | # `levels` argument is validated 2 | 3 | Code 4 | new_default_formula_blueprint(levels = 1) 5 | Condition 6 | Error in `new_default_formula_blueprint()`: 7 | ! `levels` must be a list, not the number 1. 8 | 9 | --- 10 | 11 | Code 12 | new_default_formula_blueprint(levels = list(1)) 13 | Condition 14 | Error in `new_default_formula_blueprint()`: 15 | ! `levels` must be fully named. 16 | 17 | --- 18 | 19 | Code 20 | new_default_formula_blueprint(levels = list(a = 1)) 21 | Condition 22 | Error in `new_default_formula_blueprint()`: 23 | ! `levels` must only contain character vectors. 24 | 25 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/blueprint-recipe.md: -------------------------------------------------------------------------------- 1 | # `recipe` argument is validated 2 | 3 | Code 4 | new_recipe_blueprint(recipe = 1) 5 | Condition 6 | Error in `new_recipe_blueprint()`: 7 | ! `recipe` must be a recipe or `NULL`, not the number 1. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/blueprint.md: -------------------------------------------------------------------------------- 1 | # check on input to `new_blueprint()` 2 | 3 | Code 4 | new_blueprint(same_new_arg = 1, same_new_arg = 2) 5 | Condition 6 | Error in `new_blueprint()`: 7 | ! All elements of `...` must have unique names. 8 | 9 | # checks for updating a blueprint 10 | 11 | Code 12 | update_blueprint(blueprint, intercept = TRUE, intercept = FALSE) 13 | Condition 14 | Error in `update_blueprint()`: 15 | ! `...` must have unique names. 16 | 17 | --- 18 | 19 | Code 20 | update_blueprint(blueprint, intercpt = TRUE) 21 | Condition 22 | Error in `update_blueprint()`: 23 | ! All elements of `...` must already exist. 24 | i The following fields are new: "intercpt". 25 | 26 | # checks the ptype 27 | 28 | Code 29 | new_blueprint(ptypes = list(x = 1)) 30 | Condition 31 | Error in `new_blueprint()`: 32 | ! `ptypes` must have an element named "predictors". 33 | 34 | --- 35 | 36 | Code 37 | new_blueprint(ptypes = list(predictors = "not a tibble", outcomes = "not a tibble")) 38 | Condition 39 | Error in `new_blueprint()`: 40 | ! `ptypes$predictors` must be a tibble, not the string "not a tibble". 41 | 42 | --- 43 | 44 | Code 45 | new_blueprint(ptypes = list(predictors = tibble_too_long, outcomes = tibble_too_long)) 46 | Condition 47 | Error in `new_blueprint()`: 48 | ! `ptypes$predictors` must be size 0, not size 1. 49 | 50 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/case-weights.md: -------------------------------------------------------------------------------- 1 | # importance_weights() doesn't allow negative weights 2 | 3 | Code 4 | importance_weights(-1) 5 | Condition 6 | Error in `importance_weights()`: 7 | ! `x` can't contain negative weights. 8 | 9 | # importance-weights constructor checks for double data 10 | 11 | Code 12 | new_importance_weights(1L) 13 | Condition 14 | Error in `new_importance_weights()`: 15 | ! `x` must be a double vector. 16 | 17 | # can't cast importance-weights -> integer (too lenient, likely fractional weights) 18 | 19 | Code 20 | vec_cast(x, integer()) 21 | Condition 22 | Error: 23 | ! Can't convert `x` to . 24 | 25 | # as.integer() fails (too lenient, likely fractional weights) 26 | 27 | Code 28 | as.integer(x) 29 | Condition 30 | Error in `as.integer()`: 31 | ! Can't convert `x` to . 32 | 33 | # frequency_weights() coerces to integer 34 | 35 | Code 36 | frequency_weights(1.5) 37 | Condition 38 | Error in `frequency_weights()`: 39 | ! Can't convert from `x` to due to loss of precision. 40 | * Locations: 1 41 | 42 | # frequency_weights() doesn't allow negative weights 43 | 44 | Code 45 | frequency_weights(-1L) 46 | Condition 47 | Error in `frequency_weights()`: 48 | ! `x` can't contain negative weights. 49 | 50 | # frequency-weights constructor checks for integer data 51 | 52 | Code 53 | new_frequency_weights(1) 54 | Condition 55 | Error in `new_frequency_weights()`: 56 | ! `x` must be an integer vector. 57 | 58 | # `x` must be integer or double 59 | 60 | Code 61 | new_case_weights("x", class = "subclass") 62 | Condition 63 | Error in `new_case_weights()`: 64 | ! `x` must be an integer or double vector. 65 | 66 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/constructor.md: -------------------------------------------------------------------------------- 1 | # print - hardhat_model 2 | 3 | Code 4 | new_model() 5 | Output 6 | 7 | named list() 8 | Code 9 | new_model(class = "custom_class") 10 | Output 11 | 12 | named list() 13 | Code 14 | new_model(x = 4, y = "hi", class = "custom_class") 15 | Output 16 | 17 | $x 18 | [1] 4 19 | 20 | $y 21 | [1] "hi" 22 | 23 | 24 | # must use a valid blueprint 25 | 26 | Code 27 | new_model(blueprint = 1, class = "custom") 28 | Condition 29 | Error in `new_model()`: 30 | ! `blueprint` must be a , not the number 1. 31 | 32 | # `new_scalar()` must have elements 33 | 34 | Code 35 | new_scalar(list()) 36 | Condition 37 | Error in `new_scalar()`: 38 | ! `elems` must be a list of length 1 or greater. 39 | 40 | # `new_scalar()` must have unique names 41 | 42 | Code 43 | new_scalar(list(x = 1, x = 2)) 44 | Condition 45 | Error in `new_scalar()`: 46 | ! `elems` must have unique names. 47 | 48 | # `new_scalar()` must have no extra attributes 49 | 50 | Code 51 | new_scalar(x) 52 | Condition 53 | Error in `new_scalar()`: 54 | ! `elems` must have no attributes (apart from names). 55 | 56 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/delete-response.md: -------------------------------------------------------------------------------- 1 | # errors out if not passed a terms object 2 | 3 | Code 4 | delete_response(1) 5 | Condition 6 | Error in `delete_response()`: 7 | ! `terms` must be a , not the number 1. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/encoding.md: -------------------------------------------------------------------------------- 1 | # errors on missing values 2 | 3 | Code 4 | fct_encode_one_hot(x) 5 | Condition 6 | Error in `fct_encode_one_hot()`: 7 | ! `x` can't contain missing values. 8 | 9 | # errors on non-factors 10 | 11 | Code 12 | fct_encode_one_hot(1) 13 | Condition 14 | Error in `fct_encode_one_hot()`: 15 | ! `x` must be a factor, not a number. 16 | 17 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/forge.md: -------------------------------------------------------------------------------- 1 | # `run_forge()` throws an informative default error 2 | 3 | Code 4 | run_forge(1) 5 | Condition 6 | Error in `run_forge()`: 7 | ! No `run_forge()` method provided for a number. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/intercept.md: -------------------------------------------------------------------------------- 1 | # existing intercepts are skipped with a warning 2 | 3 | Code 4 | xx <- add_intercept_column(x) 5 | Condition 6 | Warning: 7 | `data` already has a column named "(Intercept)". 8 | i Returning `data` unchanged. 9 | 10 | # name can only be a single character 11 | 12 | Code 13 | add_intercept_column(mtcars, name = c("x", "y")) 14 | Condition 15 | Error in `add_intercept_column()`: 16 | ! `name` must be a valid name, not a character vector. 17 | 18 | --- 19 | 20 | Code 21 | add_intercept_column(mtcars, name = 1) 22 | Condition 23 | Error in `add_intercept_column()`: 24 | ! `name` must be a valid name, not the number 1. 25 | 26 | # data has to be a data frame or matrix 27 | 28 | Code 29 | add_intercept_column(1) 30 | Condition 31 | Error in `add_intercept_column()`: 32 | ! `data` must be a data frame or a matrix, not the number 1. 33 | 34 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/levels.md: -------------------------------------------------------------------------------- 1 | # Can extract levels from an outcome 2 | 3 | Code 4 | get_outcome_levels("a") 5 | Condition 6 | Error in `standardize()`: 7 | ! No `standardize()` method provided for a string. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/model-matrix.md: -------------------------------------------------------------------------------- 1 | # `contr_one_hot()` input checks 2 | 3 | Code 4 | contr_one_hot(n = 2, sparse = TRUE) 5 | Condition 6 | Warning: 7 | `sparse = TRUE` not implemented for `contr_one_hot()`. 8 | Output 9 | 1 2 10 | 1 1 0 11 | 2 0 1 12 | 13 | --- 14 | 15 | Code 16 | contr_one_hot(n = 2, contrasts = FALSE) 17 | Condition 18 | Warning: 19 | `contrasts = FALSE` not implemented for `contr_one_hot()`. 20 | Output 21 | 1 2 22 | 1 1 0 23 | 2 0 1 24 | 25 | --- 26 | 27 | Code 28 | contr_one_hot(n = 1:2) 29 | Condition 30 | Error in `contr_one_hot()`: 31 | ! `n` must be a whole number, not an integer vector. 32 | 33 | --- 34 | 35 | Code 36 | contr_one_hot(n = list(1:2)) 37 | Condition 38 | Error in `contr_one_hot()`: 39 | ! `n` must be a whole number, not a list. 40 | 41 | --- 42 | 43 | Code 44 | contr_one_hot(character(0)) 45 | Condition 46 | Error in `contr_one_hot()`: 47 | ! `n` cannot be empty. 48 | 49 | --- 50 | 51 | Code 52 | contr_one_hot(-1) 53 | Condition 54 | Error in `contr_one_hot()`: 55 | ! `n` must be a whole number larger than or equal to 1, not the number -1. 56 | 57 | --- 58 | 59 | Code 60 | contr_one_hot(list()) 61 | Condition 62 | Error in `contr_one_hot()`: 63 | ! `n` must be a whole number, not an empty list. 64 | 65 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/model-offset.md: -------------------------------------------------------------------------------- 1 | # Only numeric columns can be offsets 2 | 3 | Code 4 | mold(~ Sepal.Width + offset(Species), iris) 5 | Condition 6 | Error in `mold()`: 7 | ! Column "offset(Species)" is tagged as an offset and thus must be numeric, not a object. 8 | 9 | # offset columns are stored as predictors 10 | 11 | Code 12 | forge(iris2, x$blueprint) 13 | Condition 14 | Error in `forge()`: 15 | ! The required column "Sepal.Length" is missing. 16 | 17 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/mold-recipe.md: -------------------------------------------------------------------------------- 1 | # `data` is validated 2 | 3 | Code 4 | mold(recipes::recipe(Species ~ Sepal.Length, data = iris), 1) 5 | Condition 6 | Error in `mold()`: 7 | ! `data` must be a data frame or a matrix, not the number 1. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/mold-xy.md: -------------------------------------------------------------------------------- 1 | # unknown mold() inputs throw an error 2 | 3 | Code 4 | mold("hi") 5 | Condition 6 | Error in `mold()`: 7 | ! `x` must be a data frame, matrix, recipe, or formula, not the string "hi". 8 | 9 | # cannot pass anything in the dots 10 | 11 | Code 12 | mold(iris[, "Sepal.Length", drop = FALSE], iris$Species, z = "in the dots") 13 | Condition 14 | Error in `mold()`: 15 | ! `...` must be empty. 16 | x Problematic argument: 17 | * z = "in the dots" 18 | 19 | --- 20 | 21 | Code 22 | mold(iris[, "Sepal.Length", drop = FALSE], iris$Species, blueprint = default_xy_blueprint( 23 | composition = "dgCMatrix"), z = "in the dots") 24 | Condition 25 | Error in `mold()`: 26 | ! `...` must be empty. 27 | x Problematic argument: 28 | * z = "in the dots" 29 | 30 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/mold.md: -------------------------------------------------------------------------------- 1 | # `run_mold()` throws an informative default error 2 | 3 | Code 4 | run_mold(1) 5 | Condition 6 | Error in `run_mold()`: 7 | ! No `run_mold()` method provided for a number. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/print.md: -------------------------------------------------------------------------------- 1 | # print - formula 2 | 3 | Code 4 | mold(Species ~ Sepal.Length, iris)$blueprint 5 | Message 6 | Formula blueprint: 7 | # Predictors: 1 8 | # Outcomes: 1 9 | Intercept: FALSE 10 | Novel Levels: FALSE 11 | Composition: tibble 12 | Indicators: traditional 13 | 14 | Code 15 | mold(~Sepal.Length, iris)$blueprint 16 | Message 17 | Formula blueprint: 18 | # Predictors: 1 19 | # Outcomes: 0 20 | Intercept: FALSE 21 | Novel Levels: FALSE 22 | Composition: tibble 23 | Indicators: traditional 24 | 25 | 26 | # print - default 27 | 28 | Code 29 | mold(iris[, c("Sepal.Length"), drop = FALSE], iris$Species)$blueprint 30 | Message 31 | XY blueprint: 32 | # Predictors: 1 33 | # Outcomes: 1 34 | Intercept: FALSE 35 | Novel Levels: FALSE 36 | Composition: tibble 37 | 38 | 39 | # print - recipe 40 | 41 | Code 42 | mold(recipes::recipe(Species ~ Sepal.Length, iris), iris)$blueprint 43 | Condition 44 | Warning: 45 | The `strings_as_factors` argument of `prep.recipe()` is deprecated as of recipes 1.3.0. 46 | i Please use the `strings_as_factors` argument of `recipe()` instead. 47 | Message 48 | Recipe blueprint: 49 | # Predictors: 1 50 | # Outcomes: 1 51 | Intercept: FALSE 52 | Novel Levels: FALSE 53 | Composition: tibble 54 | 55 | 56 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/recompose.md: -------------------------------------------------------------------------------- 1 | # columns must be numeric when coercing to matrix 2 | 3 | Code 4 | recompose(df, composition = "matrix") 5 | Condition 6 | Error: 7 | ! `data` must only contain numeric columns. 8 | i These columns aren't numeric: "y" and "z". 9 | 10 | # columns must be numeric when coercing to sparse matrix 11 | 12 | Code 13 | recompose(df, composition = "dgCMatrix") 14 | Condition 15 | Error: 16 | ! `data` must only contain numeric columns. 17 | i These columns aren't numeric: "y" and "z". 18 | 19 | # checks for data frame input 20 | 21 | Code 22 | recompose(1) 23 | Condition 24 | Error: 25 | ! `data` must be a data frame, not the number 1. 26 | 27 | # dots must be empty 28 | 29 | Code 30 | recompose(data.frame(), 1) 31 | Condition 32 | Error in `recompose()`: 33 | ! `...` must be empty. 34 | x Problematic argument: 35 | * ..1 = 1 36 | i Did you forget to name an argument? 37 | 38 | # validates `composition` 39 | 40 | Code 41 | recompose(data.frame(), composition = "foo") 42 | Condition 43 | Error: 44 | ! `composition` must be one of "tibble", "data.frame", "matrix", or "dgCMatrix", not "foo". 45 | 46 | --- 47 | 48 | Code 49 | recompose(data.frame(), composition = 1) 50 | Condition 51 | Error: 52 | ! `composition` must be a string or character vector. 53 | 54 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/scream.md: -------------------------------------------------------------------------------- 1 | # novel levels can be ignored 2 | 3 | Code 4 | x <- scream(new, ptype, allow_novel_levels = TRUE) 5 | 6 | # novel levels in a new character vector can be ignored 7 | 8 | Code 9 | x <- scream(new, ptype, allow_novel_levels = TRUE) 10 | 11 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/shrink.md: -------------------------------------------------------------------------------- 1 | # `data` must be data-like 2 | 3 | Code 4 | shrink(1, ptype) 5 | Condition 6 | Error in `shrink()`: 7 | ! `data` must be a data frame or a matrix, not the number 1. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/spruce.md: -------------------------------------------------------------------------------- 1 | # spruce - numeric 2 | 3 | Code 4 | spruce_numeric("hi") 5 | Condition 6 | Error in `spruce_numeric()`: 7 | ! `pred` must be a numeric vector, not the string "hi". 8 | 9 | --- 10 | 11 | Code 12 | spruce_numeric(matrix(1)) 13 | Condition 14 | Error in `spruce_numeric()`: 15 | ! `pred` must be a numeric vector, not a double matrix. 16 | 17 | # spruce - class 18 | 19 | Code 20 | spruce_class(1) 21 | Condition 22 | Error in `spruce_class()`: 23 | ! `pred_class` must be a factor, not the number 1. 24 | 25 | --- 26 | 27 | Code 28 | spruce_class("hi") 29 | Condition 30 | Error in `spruce_class()`: 31 | ! `pred_class` must be a factor, not the string "hi". 32 | 33 | # spruce - prob 34 | 35 | Code 36 | spruce_prob(1, prob_matrix) 37 | Condition 38 | Error in `spruce_prob()`: 39 | ! `pred_levels` must be a character vector, not the number 1. 40 | 41 | --- 42 | 43 | Code 44 | spruce_prob(pred_levels, 1) 45 | Condition 46 | Error in `spruce_prob()`: 47 | ! `prob_matrix` must be a numeric matrix, not the number 1. 48 | 49 | --- 50 | 51 | Code 52 | spruce_prob("a", matrix("a")) 53 | Condition 54 | Error in `spruce_prob()`: 55 | ! `prob_matrix` must be a numeric matrix, not a character matrix. 56 | 57 | --- 58 | 59 | Code 60 | spruce_prob(c("a", "b"), matrix(1, ncol = 3)) 61 | Condition 62 | Error in `spruce_prob()`: 63 | ! The number of levels (2) must be equal to the number of class probability columns (3). 64 | 65 | --- 66 | 67 | Code 68 | spruce_prob(c("a"), matrix(1, ncol = 2)) 69 | Condition 70 | Error in `spruce_prob()`: 71 | ! The number of levels (1) must be equal to the number of class probability columns (2). 72 | 73 | # spruce multiple helpers check input type 74 | 75 | Code 76 | spruce_numeric_multiple(1, "x") 77 | Condition 78 | Error in `spruce_numeric_multiple()`: 79 | ! Each element of `...` must be a numeric vector, not a string. 80 | 81 | --- 82 | 83 | Code 84 | spruce_class_multiple(1) 85 | Condition 86 | Error in `spruce_class_multiple()`: 87 | ! Each element of `...` must be a factor, not a number. 88 | 89 | --- 90 | 91 | Code 92 | spruce_prob_multiple(1) 93 | Condition 94 | Error in `spruce_prob_multiple()`: 95 | ! Each element of `...` must be a tibble, not a number. 96 | 97 | # spruce multiple helpers check input sizes (and disallow recycling) 98 | 99 | Code 100 | spruce_numeric_multiple(1, 1:2) 101 | Condition 102 | Error in `spruce_numeric_multiple()`: 103 | ! `..1` must have size 2, not size 1. 104 | 105 | --- 106 | 107 | Code 108 | spruce_class_multiple(factor("x"), factor(c("a", "b"))) 109 | Condition 110 | Error in `spruce_class_multiple()`: 111 | ! `..1` must have size 2, not size 1. 112 | 113 | --- 114 | 115 | Code 116 | spruce_prob_multiple(tibble(x = 1), tibble(x = 1:2)) 117 | Condition 118 | Error in `spruce_prob_multiple()`: 119 | ! `..1` must have size 2, not size 1. 120 | 121 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/standardize.md: -------------------------------------------------------------------------------- 1 | # standardize - matrix 2 | 3 | Code 4 | standardize(mat_bad) 5 | Condition 6 | Error in `standardize()`: 7 | ! All columns of `y` must have unique names. 8 | 9 | --- 10 | 11 | Code 12 | standardize(mat_bad2) 13 | Condition 14 | Error in `standardize()`: 15 | ! `y` must be a numeric matrix, not a character matrix. 16 | 17 | # standardize - array 18 | 19 | Code 20 | standardize(bad) 21 | Condition 22 | Error in `standardize()`: 23 | ! All columns of `y` must have unique names. 24 | 25 | --- 26 | 27 | Code 28 | standardize(bad2) 29 | Condition 30 | Error in `standardize()`: 31 | ! `y` must be a numeric matrix, not a character matrix. 32 | 33 | # standardize - data.frame 34 | 35 | Code 36 | standardize(bad) 37 | Condition 38 | Error in `standardize()`: 39 | ! All columns of `y` must have unique names. 40 | 41 | --- 42 | 43 | Code 44 | standardize(bad2) 45 | Condition 46 | Error in `standardize()`: 47 | ! Not all columns of `y` are known outcome types. 48 | i This column has an unknown type: "x". 49 | 50 | --- 51 | 52 | Code 53 | standardize(bad3) 54 | Condition 55 | Error in `standardize()`: 56 | ! Not all columns of `y` are known outcome types. 57 | i These columns have unknown types: "x" and "y". 58 | 59 | # standardize - unknown 60 | 61 | Code 62 | standardize("hi") 63 | Condition 64 | Error in `standardize()`: 65 | ! No `standardize()` method provided for a string. 66 | 67 | --- 68 | 69 | Code 70 | standardize(Sys.time()) 71 | Condition 72 | Error in `standardize()`: 73 | ! No `standardize()` method provided for a object. 74 | 75 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/table.md: -------------------------------------------------------------------------------- 1 | # `na_remove` is validated 2 | 3 | Code 4 | weighted_table(x, y, weights = w, na_remove = c(TRUE, FALSE)) 5 | Condition 6 | Error in `weighted_table()`: 7 | ! `na_remove` must be `TRUE` or `FALSE`, not a logical vector. 8 | 9 | --- 10 | 11 | Code 12 | weighted_table(x, y, weights = w, na_remove = 1) 13 | Condition 14 | Error in `weighted_table()`: 15 | ! `na_remove` must be `TRUE` or `FALSE`, not the number 1. 16 | 17 | # requires at least one `...` 18 | 19 | Code 20 | weighted_table(weights = w) 21 | Condition 22 | Error in `weighted_table()`: 23 | ! At least one vector must be supplied to `...`. 24 | 25 | # requires all `...` to be factors 26 | 27 | Code 28 | weighted_table(1, weights = w) 29 | Condition 30 | Error in `weighted_table()`: 31 | ! All elements of `...` must be factors. 32 | 33 | # requires all `...` to be the same size 34 | 35 | Code 36 | weighted_table(x, y, weights = w) 37 | Condition 38 | Error in `weighted_table()`: 39 | ! All elements of `...` must be the same size. 40 | 41 | # requires all `weights` to be the same size as `...` elements 42 | 43 | Code 44 | weighted_table(x, y, weights = w) 45 | Condition 46 | Error in `weighted_table()`: 47 | ! `weights` must have size 3, not size 4. 48 | 49 | # requires `weights` to be castable to double 50 | 51 | Code 52 | weighted_table(x, weights = "a") 53 | Condition 54 | Error in `weighted_table()`: 55 | ! Can't convert `weights` to . 56 | 57 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/tune.md: -------------------------------------------------------------------------------- 1 | # `id` is validated 2 | 3 | Code 4 | tune(1) 5 | Condition 6 | Error in `tune()`: 7 | ! `id` must be a single string, not the number 1. 8 | 9 | --- 10 | 11 | Code 12 | tune(c("x", "y")) 13 | Condition 14 | Error in `tune()`: 15 | ! `id` must be a single string, not a character vector. 16 | 17 | --- 18 | 19 | Code 20 | tune(NA_character_) 21 | Condition 22 | Error in `tune()`: 23 | ! `id` must be a single string, not a character `NA`. 24 | 25 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/use.md: -------------------------------------------------------------------------------- 1 | # no `model` aborts normally 2 | 3 | Code 4 | create_modeling_package(path = "my/path") 5 | Condition 6 | Error in `create_modeling_package()`: 7 | ! `model` is absent but must be supplied. 8 | 9 | # no `path` aborts normally 10 | 11 | Code 12 | create_modeling_package(model = "my_model") 13 | Condition 14 | Error in `create_modeling_package()`: 15 | ! `path` is absent but must be supplied. 16 | 17 | # `model` can only be a single string 18 | 19 | Code 20 | create_modeling_package(path = "my/path", model = c("model1", "model2")) 21 | Condition 22 | Error in `create_modeling_package()`: 23 | ! `model` must be a single string, not a character vector. 24 | 25 | --- 26 | 27 | Code 28 | create_modeling_package(path = "my/path", model = 1) 29 | Condition 30 | Error in `create_modeling_package()`: 31 | ! `model` must be a single string, not the number 1. 32 | 33 | --- 34 | 35 | Code 36 | create_modeling_package(path = "my/path", model = "model with space") 37 | Condition 38 | Error in `create_modeling_package()`: 39 | ! `model` must not contain any spaces. 40 | 41 | -------------------------------------------------------------------------------- /tests/testthat/data-raw/hardhat-0.2.0.R: -------------------------------------------------------------------------------- 1 | # Objects used for backwards compatibility testing. 2 | # Objects created with hardhat 0.2.0. 3 | # devtools::install_version("hardhat", "0.2.0") 4 | 5 | # ------------------------------------------------------------------------------ 6 | # Testing compatibility of `mold()` and a basic recipe 7 | 8 | dir <- here::here("tests", "testthat", "data") 9 | file <- fs::path(dir, "hardhat-0.2.0-pre-mold-recipe.rds") 10 | 11 | data <- tibble::tibble(y = 1:5, x = 6:10) 12 | 13 | blueprint <- hardhat::default_recipe_blueprint() 14 | 15 | object <- list(data = data, blueprint = blueprint) 16 | 17 | saveRDS( 18 | object, 19 | file = file, 20 | version = 2 21 | ) 22 | 23 | # ------------------------------------------------------------------------------ 24 | # Testing compatibility of `forge()` and a basic recipe 25 | 26 | dir <- here::here("tests", "testthat", "data") 27 | file <- fs::path(dir, "hardhat-0.2.0-post-mold-recipe.rds") 28 | 29 | data <- tibble::tibble(y = 1:5, x = 6:10) 30 | new_data <- tibble::tibble(y = 6:10, x = 11:15) 31 | 32 | rec <- recipes::recipe(y ~ ., data = data) 33 | rec <- recipes::step_mutate(rec, z = 1) 34 | 35 | blueprint <- hardhat::default_recipe_blueprint() 36 | mold <- hardhat::mold(rec, data = data, blueprint = blueprint) 37 | blueprint <- mold$blueprint 38 | 39 | object <- list(new_data = new_data, blueprint = blueprint) 40 | 41 | saveRDS( 42 | object, 43 | file = file, 44 | version = 2 45 | ) 46 | 47 | # ------------------------------------------------------------------------------ 48 | # Testing compatibility of `forge()` and a recipe with a nonstandard role 49 | 50 | dir <- here::here("tests", "testthat", "data") 51 | file <- fs::path(dir, "hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds") 52 | 53 | data <- tibble::tibble(y = 1:5, x = 6:10, id = 1:5) 54 | new_data <- tibble::tibble(y = 6:10, x = 11:15, id = 6:10) 55 | 56 | rec <- recipes::recipe(y ~ ., data = data) 57 | rec <- recipes::update_role(rec, id, new_role = "id") 58 | rec <- recipes::step_mutate(rec, z = 1) 59 | 60 | blueprint <- hardhat::default_recipe_blueprint() 61 | mold <- hardhat::mold(rec, data = data, blueprint = blueprint) 62 | blueprint <- mold$blueprint 63 | 64 | object <- list(new_data = new_data, blueprint = blueprint) 65 | 66 | saveRDS( 67 | object, 68 | file = file, 69 | version = 2 70 | ) 71 | 72 | # ------------------------------------------------------------------------------ 73 | -------------------------------------------------------------------------------- /tests/testthat/data/hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/tests/testthat/data/hardhat-0.2.0-post-mold-recipe-nonstandard-role.rds -------------------------------------------------------------------------------- /tests/testthat/data/hardhat-0.2.0-post-mold-recipe.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/tests/testthat/data/hardhat-0.2.0-post-mold-recipe.rds -------------------------------------------------------------------------------- /tests/testthat/data/hardhat-0.2.0-pre-mold-recipe.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/hardhat/61c54fc7c93f82e979509fc39bfaaa39d74c9719/tests/testthat/data/hardhat-0.2.0-pre-mold-recipe.rds -------------------------------------------------------------------------------- /tests/testthat/helper-matrix.R: -------------------------------------------------------------------------------- 1 | expect_matrix <- function(x) { 2 | expect_true(inherits(x, "matrix")) 3 | } 4 | -------------------------------------------------------------------------------- /tests/testthat/helper-sparsevctrs.R: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # For sparse tibble testing 3 | 4 | sparse_hotel_rates <- function() { 5 | # 99.2 sparsity 6 | hotel_rates <- modeldata::hotel_rates 7 | 8 | prefix_colnames <- function(x, prefix) { 9 | colnames(x) <- paste(colnames(x), prefix, sep = "_") 10 | x 11 | } 12 | 13 | dummies_country <- hardhat::fct_encode_one_hot(hotel_rates$country) 14 | dummies_company <- hardhat::fct_encode_one_hot(hotel_rates$company) 15 | dummies_agent <- hardhat::fct_encode_one_hot(hotel_rates$agent) 16 | 17 | res <- cbind( 18 | hotel_rates["avg_price_per_room"], 19 | prefix_colnames(dummies_country, "country"), 20 | prefix_colnames(dummies_company, "company"), 21 | prefix_colnames(dummies_agent, "agent") 22 | ) 23 | 24 | res <- as.matrix(res) 25 | Matrix::Matrix(res, sparse = TRUE) 26 | } 27 | -------------------------------------------------------------------------------- /tests/testthat/test-blueprint-formula-default.R: -------------------------------------------------------------------------------- 1 | test_that("`levels` argument is validated", { 2 | expect_snapshot(error = TRUE, { 3 | new_default_formula_blueprint(levels = 1) 4 | }) 5 | expect_snapshot(error = TRUE, { 6 | new_default_formula_blueprint(levels = list(1)) 7 | }) 8 | expect_snapshot(error = TRUE, { 9 | new_default_formula_blueprint(levels = list("a" = 1)) 10 | }) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat/test-blueprint-recipe.R: -------------------------------------------------------------------------------- 1 | test_that("`recipe` argument is validated", { 2 | expect_snapshot(error = TRUE, { 3 | new_recipe_blueprint(recipe = 1) 4 | }) 5 | }) 6 | 7 | test_that("`recipe` argument allows `NULL`", { 8 | x <- new_recipe_blueprint(recipe = NULL) 9 | expect_null(x$recipe) 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test-blueprint.R: -------------------------------------------------------------------------------- 1 | test_that("check on input to `new_blueprint()`", { 2 | expect_snapshot(error = TRUE, { 3 | new_blueprint(same_new_arg = 1, same_new_arg = 2) 4 | }) 5 | }) 6 | 7 | test_that("checks for updating a blueprint", { 8 | blueprint <- default_xy_blueprint() 9 | 10 | expect_snapshot(error = TRUE, { 11 | update_blueprint(blueprint, intercept = TRUE, intercept = FALSE) 12 | }) 13 | expect_snapshot(error = TRUE, { 14 | update_blueprint(blueprint, intercpt = TRUE) 15 | }) 16 | }) 17 | 18 | test_that("checks the ptype", { 19 | expect_snapshot(error = TRUE, { 20 | new_blueprint(ptypes = list(x = 1)) 21 | }) 22 | expect_snapshot(error = TRUE, { 23 | new_blueprint( 24 | ptypes = list("predictors" = "not a tibble", outcomes = "not a tibble") 25 | ) 26 | }) 27 | 28 | tibble_too_long <- tibble::tibble(x = 1) 29 | expect_snapshot(error = TRUE, { 30 | new_blueprint( 31 | ptypes = list("predictors" = tibble_too_long, outcomes = tibble_too_long) 32 | ) 33 | }) 34 | }) 35 | -------------------------------------------------------------------------------- /tests/testthat/test-constructor.R: -------------------------------------------------------------------------------- 1 | test_that("print - hardhat_model", { 2 | expect_snapshot({ 3 | new_model() 4 | new_model(class = "custom_class") 5 | new_model(x = 4, y = "hi", class = "custom_class") 6 | }) 7 | }) 8 | 9 | test_that("can create new empty models", { 10 | x <- new_model() 11 | 12 | expect_s3_class(x$blueprint, "default_xy_blueprint") 13 | expect_s3_class(x, "hardhat_model") 14 | expect_s3_class(x, "hardhat_scalar") 15 | }) 16 | 17 | test_that("can create new models", { 18 | x <- new_model(class = "custom") 19 | 20 | expect_s3_class(x, "custom") 21 | expect_s3_class(x$blueprint, "default_xy_blueprint") 22 | }) 23 | 24 | test_that("can have custom elements", { 25 | x <- new_model( 26 | y = 1, 27 | blueprint = default_xy_blueprint(), 28 | class = "custom_class" 29 | ) 30 | 31 | expect_equal(x$y, 1) 32 | }) 33 | 34 | test_that("must use a valid blueprint", { 35 | expect_no_error( 36 | new_model(blueprint = default_xy_blueprint(), class = "custom") 37 | ) 38 | 39 | expect_snapshot(error = TRUE, { 40 | new_model(blueprint = 1, class = "custom") 41 | }) 42 | }) 43 | 44 | test_that("`new_scalar()` must have elements", { 45 | expect_snapshot(error = TRUE, new_scalar(list())) 46 | }) 47 | 48 | test_that("`new_scalar()` must have unique names", { 49 | expect_snapshot(error = TRUE, new_scalar(list(x = 1, x = 2))) 50 | }) 51 | 52 | test_that("`new_scalar()` must have no extra attributes", { 53 | x <- list(x = 1) 54 | attr(x, "extra") <- 1 55 | expect_snapshot(error = TRUE, new_scalar(x)) 56 | }) 57 | -------------------------------------------------------------------------------- /tests/testthat/test-delete-response.R: -------------------------------------------------------------------------------- 1 | test_that("identical to delete.response() if no dataClasses", { 2 | trms <- terms(y ~ x) 3 | 4 | expect_equal( 5 | delete_response(trms), 6 | delete.response(trms) 7 | ) 8 | }) 9 | 10 | test_that("doesn't return dataClasses for y", { 11 | framed <- model_frame(Sepal.Width ~ Species, iris) 12 | 13 | expect_equal( 14 | attr(delete_response(framed$terms), "dataClasses"), 15 | c(Species = "factor") 16 | ) 17 | 18 | # expected base R behavior 19 | expect_equal( 20 | attr(delete.response(framed$terms), "dataClasses"), 21 | c(Sepal.Width = "numeric", Species = "factor") 22 | ) 23 | }) 24 | 25 | test_that("equal results if no response, but dataClasses exist", { 26 | framed <- model_frame(~Species, iris) 27 | 28 | expect_equal( 29 | delete_response(framed$terms), 30 | delete.response(framed$terms) 31 | ) 32 | }) 33 | 34 | test_that("errors out if not passed a terms object", { 35 | expect_snapshot(error = TRUE, { 36 | delete_response(1) 37 | }) 38 | }) 39 | -------------------------------------------------------------------------------- /tests/testthat/test-encoding.R: -------------------------------------------------------------------------------- 1 | test_that("generates one-hot indicator matrix", { 2 | x <- factor(c("a", "b", "a", "a", "c")) 3 | 4 | expect <- matrix( 5 | 0L, 6 | nrow = 5, 7 | ncol = 3, 8 | dimnames = list(NULL, c("a", "b", "c")) 9 | ) 10 | expect[c(1, 3, 4, 7, 15)] <- 1L 11 | 12 | expect_identical(fct_encode_one_hot(x), expect) 13 | }) 14 | 15 | test_that("works with factors with just 1 level", { 16 | x <- factor(rep("a", 3)) 17 | 18 | expect_identical( 19 | fct_encode_one_hot(x), 20 | matrix(1L, nrow = 3, ncol = 1, dimnames = list(NULL, "a")) 21 | ) 22 | }) 23 | 24 | test_that("works with levels that aren't in the data", { 25 | x <- factor(c("a", "c", "a"), levels = c("a", "b", "c", "d")) 26 | 27 | expect <- matrix( 28 | 0L, 29 | nrow = 3, 30 | ncol = 4, 31 | dimnames = list(NULL, c("a", "b", "c", "d")) 32 | ) 33 | expect[c(1, 3, 8)] <- 1L 34 | 35 | expect_identical(fct_encode_one_hot(x), expect) 36 | }) 37 | 38 | test_that("works with factors with explicit `NA` level but no `NA` data", { 39 | expect_identical( 40 | fct_encode_one_hot(factor("a", levels = c("a", NA), exclude = NULL)), 41 | matrix( 42 | data = c(1L, 0L), 43 | nrow = 1, 44 | ncol = 2, 45 | dimnames = list(NULL, c("a", NA)) 46 | ) 47 | ) 48 | }) 49 | 50 | test_that("works with empty factors", { 51 | expect_identical( 52 | fct_encode_one_hot(factor()), 53 | matrix(data = integer(), nrow = 0, ncol = 0, dimnames = list(NULL, NULL)) 54 | ) 55 | }) 56 | 57 | test_that("works with empty factors with levels", { 58 | expect_identical( 59 | fct_encode_one_hot(factor(levels = c("a", "b"))), 60 | matrix( 61 | data = integer(), 62 | nrow = 0, 63 | ncol = 2, 64 | dimnames = list(NULL, c("a", "b")) 65 | ) 66 | ) 67 | }) 68 | 69 | test_that("propagates names onto the row names", { 70 | x <- set_names(factor(c("a", "b", "a")), c("x", "y", "z")) 71 | expect_identical(rownames(fct_encode_one_hot(x)), c("x", "y", "z")) 72 | }) 73 | 74 | test_that("works with ordered factors", { 75 | x <- factor( 76 | c("a", "b", "a", "a", "c"), 77 | levels = c("c", "b", "a"), 78 | ordered = TRUE 79 | ) 80 | 81 | expect <- matrix( 82 | 0L, 83 | nrow = 5, 84 | ncol = 3, 85 | dimnames = list(NULL, c("c", "b", "a")) 86 | ) 87 | expect[c(5, 7, 11, 13, 14)] <- 1L 88 | 89 | expect_identical(fct_encode_one_hot(x), expect) 90 | }) 91 | 92 | test_that("errors on missing values", { 93 | x <- factor(c("a", NA)) 94 | 95 | expect_snapshot(error = TRUE, { 96 | fct_encode_one_hot(x) 97 | }) 98 | }) 99 | 100 | test_that("errors on non-factors", { 101 | expect_snapshot(error = TRUE, { 102 | fct_encode_one_hot(1) 103 | }) 104 | }) 105 | -------------------------------------------------------------------------------- /tests/testthat/test-forge.R: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # run_forge() 3 | 4 | test_that("`run_forge()` throws an informative default error", { 5 | expect_snapshot(error = TRUE, run_forge(1)) 6 | }) 7 | -------------------------------------------------------------------------------- /tests/testthat/test-intercept.R: -------------------------------------------------------------------------------- 1 | test_that("can add an intercept column", { 2 | x <- add_intercept_column(mtcars) 3 | 4 | expect_equal(colnames(x)[1], "(Intercept)") 5 | expect_type(x[, 1], "integer") 6 | 7 | xx <- add_intercept_column(as.matrix(mtcars)) 8 | 9 | expect_matrix(xx) 10 | expect_equal(colnames(xx)[1], "(Intercept)") 11 | }) 12 | 13 | test_that("existing intercepts are skipped with a warning", { 14 | x <- add_intercept_column(mtcars) 15 | 16 | expect_snapshot({ 17 | xx <- add_intercept_column(x) 18 | }) 19 | 20 | expect_equal( 21 | xx, 22 | x 23 | ) 24 | }) 25 | 26 | test_that("can change the intercept column name", { 27 | x <- add_intercept_column(mtcars, name = "intercept") 28 | 29 | expect_equal(colnames(x)[1], "intercept") 30 | }) 31 | 32 | test_that("name can only be a single character", { 33 | expect_snapshot(error = TRUE, { 34 | add_intercept_column(mtcars, name = c("x", "y")) 35 | }) 36 | expect_snapshot(error = TRUE, { 37 | add_intercept_column(mtcars, name = 1) 38 | }) 39 | }) 40 | 41 | test_that("data has to be a data frame or matrix", { 42 | expect_snapshot(error = TRUE, { 43 | add_intercept_column(1) 44 | }) 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test-levels.R: -------------------------------------------------------------------------------- 1 | test_that("can extract levels", { 2 | x <- data.frame( 3 | x = factor(letters[1:5]), 4 | y = factor(letters[6:10]) 5 | ) 6 | 7 | expect_equal( 8 | get_levels(x), 9 | list( 10 | x = letters[1:5], 11 | y = letters[6:10] 12 | ) 13 | ) 14 | }) 15 | 16 | test_that("non-factors are ignored", { 17 | x <- data.frame( 18 | x = factor(letters[1:5]), 19 | y = 6:10 20 | ) 21 | 22 | expect_equal( 23 | get_levels(x), 24 | list( 25 | x = letters[1:5] 26 | ) 27 | ) 28 | }) 29 | 30 | test_that("NULL returned when no factors", { 31 | x <- data.frame( 32 | x = 1:5, 33 | y = 6:10 34 | ) 35 | 36 | expect_equal( 37 | get_levels(x), 38 | NULL 39 | ) 40 | }) 41 | 42 | test_that("Only data frames are allowed, others return NULL", { 43 | x <- matrix( 44 | 1:5 45 | ) 46 | 47 | expect_equal( 48 | get_levels(x), 49 | NULL 50 | ) 51 | }) 52 | 53 | test_that("Multivariate columns are skipped over", { 54 | x <- data.frame(x = factor(letters[1:5])) 55 | x$y <- matrix(1:10, ncol = 2, dimnames = list(NULL, c("c1", "c2"))) 56 | 57 | expect_equal( 58 | get_levels(x), 59 | list(x = letters[1:5]) 60 | ) 61 | }) 62 | 63 | test_that("Can extract levels from an outcome", { 64 | expect_equal( 65 | get_outcome_levels(1:5), 66 | NULL 67 | ) 68 | 69 | expect_snapshot( 70 | error = TRUE, 71 | get_outcome_levels("a") 72 | ) 73 | 74 | expect_equal( 75 | get_outcome_levels(factor("a")), 76 | list(.outcome = "a") 77 | ) 78 | 79 | expect_equal( 80 | get_outcome_levels(matrix(1:5, dimnames = list(NULL, "c1"))), 81 | NULL 82 | ) 83 | 84 | expect_equal( 85 | get_outcome_levels(data.frame(x = factor(letters[1:5]))), 86 | list(x = letters[1:5]) 87 | ) 88 | }) 89 | -------------------------------------------------------------------------------- /tests/testthat/test-model-matrix.R: -------------------------------------------------------------------------------- 1 | test_that("`model_matrix()` strips all attributes from the `model.matrix()` results", { 2 | framed <- model_frame(Sepal.Width ~ Species + 0, iris) 3 | matrix <- model_matrix(framed$terms, framed$data) 4 | 5 | # Mock what `model_matrix()` does by stripping all attributes 6 | f <- Sepal.Width ~ Species + 0 7 | expect <- model.matrix(f, model.frame(f, iris)) 8 | expect <- expect[, 1, drop = TRUE] 9 | attributes(expect) <- NULL 10 | 11 | # `tibble:::matrixToDataFrame()` would propagate any attributes besides 12 | # column names to each individual column. `model.matrix()` would have 13 | # attached "assign" and "contrasts" attributes here 14 | expect_identical(matrix$Speciessetosa, expect) 15 | }) 16 | 17 | test_that("`contr_one_hot()` input checks", { 18 | expect_snapshot(contr_one_hot(n = 2, sparse = TRUE)) 19 | expect_snapshot(contr_one_hot(n = 2, contrasts = FALSE)) 20 | 21 | expect_snapshot(error = TRUE, { 22 | contr_one_hot(n = 1:2) 23 | }) 24 | expect_snapshot(error = TRUE, { 25 | contr_one_hot(n = list(1:2)) 26 | }) 27 | expect_snapshot(error = TRUE, { 28 | contr_one_hot(character(0)) 29 | }) 30 | expect_snapshot(error = TRUE, { 31 | contr_one_hot(-1) 32 | }) 33 | expect_snapshot(error = TRUE, { 34 | contr_one_hot(list()) 35 | }) 36 | }) 37 | 38 | test_that("one-hot encoding contrasts", { 39 | contr_mat <- contr_one_hot(12) 40 | expect_equal(colnames(contr_mat), paste(1:12)) 41 | expect_equal(rownames(contr_mat), paste(1:12)) 42 | expect_true(all(apply(contr_mat, 1, sum) == 1)) 43 | expect_true(all(apply(contr_mat, 2, sum) == 1)) 44 | 45 | chr_contr_mat <- contr_one_hot(letters[1:12]) 46 | expect_equal(colnames(chr_contr_mat), letters[1:12]) 47 | expect_equal(rownames(chr_contr_mat), letters[1:12]) 48 | expect_true(all(apply(chr_contr_mat, 1, sum) == 1)) 49 | expect_true(all(apply(chr_contr_mat, 2, sum) == 1)) 50 | }) 51 | -------------------------------------------------------------------------------- /tests/testthat/test-mold-xy.R: -------------------------------------------------------------------------------- 1 | test_that("unknown mold() inputs throw an error", { 2 | expect_snapshot(error = TRUE, { 3 | mold("hi") 4 | }) 5 | }) 6 | 7 | test_that("can use x-y mold interface", { 8 | sparse_bp <- default_xy_blueprint(composition = "dgCMatrix") 9 | matrix_bp <- default_xy_blueprint(composition = "matrix") 10 | 11 | x1 <- mold(iris[, "Sepal.Length", drop = FALSE], iris$Species) 12 | x2 <- mold( 13 | iris[, "Sepal.Length", drop = FALSE], 14 | iris$Species, 15 | blueprint = sparse_bp 16 | ) 17 | x3 <- mold( 18 | iris[, "Sepal.Length", drop = FALSE], 19 | iris$Species, 20 | blueprint = matrix_bp 21 | ) 22 | 23 | expect_s3_class(x1$predictors, "tbl_df") 24 | expect_s4_class(x2$predictors, "dgCMatrix") 25 | expect_matrix(x3$predictors) 26 | 27 | expect_equal(colnames(x1$predictors), "Sepal.Length") 28 | expect_equal(colnames(x2$predictors), "Sepal.Length") 29 | expect_equal(colnames(x3$predictors), "Sepal.Length") 30 | 31 | expect_s3_class(x1$outcomes, "tbl_df") 32 | expect_s3_class(x2$outcomes, "tbl_df") 33 | expect_s3_class(x3$outcomes, "tbl_df") 34 | expect_equal(colnames(x1$outcomes), ".outcome") 35 | expect_equal(colnames(x2$outcomes), ".outcome") 36 | expect_equal(colnames(x3$outcomes), ".outcome") 37 | expect_s3_class(x1$blueprint, "default_xy_blueprint") 38 | }) 39 | 40 | test_that("xy intercepts can be added", { 41 | x1 <- mold( 42 | iris[, "Sepal.Length", drop = FALSE], 43 | iris$Species, 44 | blueprint = default_xy_blueprint(intercept = TRUE) 45 | ) 46 | x2 <- mold( 47 | iris[, "Sepal.Length", drop = FALSE], 48 | iris$Species, 49 | blueprint = default_xy_blueprint(intercept = TRUE, composition = "matrix") 50 | ) 51 | 52 | expect_true("(Intercept)" %in% colnames(x1$predictors)) 53 | expect_true("(Intercept)" %in% colnames(x2$predictors)) 54 | }) 55 | 56 | test_that("cannot pass anything in the dots", { 57 | expect_snapshot(error = TRUE, { 58 | mold( 59 | iris[, "Sepal.Length", drop = FALSE], 60 | iris$Species, 61 | z = "in the dots" 62 | ) 63 | }) 64 | expect_snapshot(error = TRUE, { 65 | mold( 66 | iris[, "Sepal.Length", drop = FALSE], 67 | iris$Species, 68 | blueprint = default_xy_blueprint(composition = "dgCMatrix"), 69 | z = "in the dots" 70 | ) 71 | }) 72 | }) 73 | 74 | test_that("`NULL` y value returns a 0 column tibble for `outcomes`", { 75 | x <- mold(iris, y = NULL) 76 | 77 | expect_equal(nrow(x$outcomes), 150) 78 | expect_equal(ncol(x$outcomes), 0) 79 | }) 80 | 81 | test_that("Missing y value returns a 0 column / 0 row tibble for `ptype`", { 82 | x <- mold(iris, y = NULL) 83 | expect_equal(x$blueprint$ptypes$outcomes, tibble()) 84 | }) 85 | -------------------------------------------------------------------------------- /tests/testthat/test-mold.R: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # run_mold() 3 | 4 | test_that("`run_mold()` throws an informative default error", { 5 | expect_snapshot(error = TRUE, run_mold(1)) 6 | }) 7 | -------------------------------------------------------------------------------- /tests/testthat/test-print.R: -------------------------------------------------------------------------------- 1 | test_that("print - formula", { 2 | expect_snapshot({ 3 | mold(Species ~ Sepal.Length, iris)$blueprint 4 | mold(~Sepal.Length, iris)$blueprint 5 | }) 6 | }) 7 | 8 | test_that("print - default", { 9 | expect_snapshot({ 10 | mold(iris[, c("Sepal.Length"), drop = FALSE], iris$Species)$blueprint 11 | }) 12 | }) 13 | 14 | test_that("print - recipe", { 15 | skip_if_not_installed("recipes") 16 | 17 | expect_snapshot({ 18 | mold(recipes::recipe(Species ~ Sepal.Length, iris), iris)$blueprint 19 | }) 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test-quantile-pred.R: -------------------------------------------------------------------------------- 1 | test_that("quantile_pred error types", { 2 | expect_snapshot( 3 | error = TRUE, 4 | quantile_pred(1:10, 1:4 / 5) 5 | ) 6 | expect_snapshot( 7 | error = TRUE, 8 | quantile_pred(matrix(1:20, 5), -1:4 / 5) 9 | ) 10 | expect_snapshot( 11 | error = TRUE, 12 | quantile_pred(matrix(1:20, 5), 1:5 / 6) 13 | ) 14 | expect_snapshot( 15 | error = TRUE, 16 | quantile_pred(matrix(1:20, 5), 4:1 / 5) 17 | ) 18 | }) 19 | 20 | test_that("quantile levels are checked", { 21 | expect_snapshot(error = TRUE, { 22 | quantile_pred(matrix(1:20, 5), quantile_levels = NULL) 23 | }) 24 | expect_snapshot(error = TRUE, { 25 | quantile_pred(matrix(1:20, 5), quantile_levels = c(0.7, 0.7, 0.7)) 26 | }) 27 | expect_snapshot(error = TRUE, { 28 | quantile_pred( 29 | matrix(1:20, 5), 30 | quantile_levels = c(rep(0.7, 2), rep(0.8, 3)) 31 | ) 32 | }) 33 | expect_snapshot(error = TRUE, { 34 | quantile_pred(matrix(1:20, 5), quantile_levels = c(0.8, 0.7)) 35 | }) 36 | }) 37 | 38 | test_that("quantile_pred outputs", { 39 | v <- quantile_pred(matrix(1:20, 5), 1:4 / 5) 40 | expect_s3_class(v, "quantile_pred") 41 | expect_identical(attr(v, "quantile_levels"), 1:4 / 5) 42 | expect_identical( 43 | vctrs::vec_data(v), 44 | lapply(vctrs::vec_chop(matrix(1:20, 5)), drop) 45 | ) 46 | }) 47 | 48 | test_that("extract_quantile_levels", { 49 | v <- quantile_pred(matrix(1:20, 5), 1:4 / 5) 50 | expect_identical(extract_quantile_levels(v), 1:4 / 5) 51 | 52 | expect_snapshot( 53 | error = TRUE, 54 | extract_quantile_levels(1:10) 55 | ) 56 | }) 57 | 58 | test_that("quantile_pred formatting", { 59 | # multiple quantiles 60 | v <- quantile_pred(matrix(1:20, 5), 1:4 / 5) 61 | expect_snapshot(v) 62 | expect_snapshot(quantile_pred(matrix(1:18, 9), c(1 / 3, 2 / 3))) 63 | expect_snapshot( 64 | quantile_pred(matrix(seq(0.01, 1 - 0.01, length.out = 6), 3), c(.2, .8)) 65 | ) 66 | expect_snapshot(tibble(qntls = v)) 67 | m <- matrix(1:20, 5) 68 | m[2, 3] <- NA 69 | m[4, 2] <- NA 70 | expect_snapshot(quantile_pred(m, 1:4 / 5)) 71 | 72 | # single quantile 73 | m <- matrix(1:5) 74 | one_quantile <- quantile_pred(m, 5 / 9) 75 | expect_snapshot(one_quantile) 76 | expect_snapshot(tibble(qntls = one_quantile)) 77 | m[2] <- NA 78 | expect_snapshot(quantile_pred(m, 5 / 9)) 79 | 80 | set.seed(393) 81 | v <- quantile_pred(matrix(exp(rnorm(20)), ncol = 4), 1:4 / 5) 82 | expect_snapshot(format(v)) 83 | expect_snapshot(format(v, digits = 5)) 84 | }) 85 | 86 | test_that("as_tibble() for quantile_pred", { 87 | v <- quantile_pred(matrix(1:20, 5), 1:4 / 5) 88 | tbl <- as_tibble(v) 89 | expect_s3_class(tbl, c("tbl_df", "tbl", "data.frame")) 90 | expect_named(tbl, c(".pred_quantile", ".quantile_levels", ".row")) 91 | expect_true(nrow(tbl) == 20) 92 | }) 93 | 94 | test_that("as.matrix() for quantile_pred", { 95 | x <- matrix(1:20, 5) 96 | v <- quantile_pred(x, 1:4 / 5) 97 | m <- as.matrix(v) 98 | expect_true(is.matrix(m)) 99 | expect_identical(m, x) 100 | }) 101 | -------------------------------------------------------------------------------- /tests/testthat/test-scream.R: -------------------------------------------------------------------------------- 1 | test_that("novel levels can be ignored", { 2 | dat <- data.frame( 3 | y = 1:4, 4 | f = factor(letters[1:4]) 5 | ) 6 | 7 | new <- data.frame( 8 | y = 1:5, 9 | f = factor(letters[1:5]) 10 | ) 11 | 12 | ptype <- vec_ptype(dat) 13 | 14 | # Silent 15 | expect_snapshot({ 16 | x <- scream(new, ptype, allow_novel_levels = TRUE) 17 | }) 18 | 19 | expect_equal(levels(x$f), letters[1:5]) 20 | }) 21 | 22 | test_that("novel levels in a new character vector can be ignored", { 23 | dat <- data.frame( 24 | y = 1:4, 25 | f = factor(letters[1:4]) 26 | ) 27 | 28 | new <- data.frame( 29 | y = 1:5, 30 | f = letters[1:5], 31 | stringsAsFactors = FALSE 32 | ) 33 | 34 | ptype <- vec_ptype(dat) 35 | 36 | # Silent 37 | expect_snapshot({ 38 | x <- scream(new, ptype, allow_novel_levels = TRUE) 39 | }) 40 | 41 | expect_equal(levels(x$f), new$f) 42 | }) 43 | 44 | test_that("ignoring novel levels still passes through incompatible classes", { 45 | dat <- data.frame(f = factor(letters[1:4])) 46 | new <- data.frame(f = 1:5) 47 | 48 | ptype <- vec_ptype(dat) 49 | 50 | expect_error( 51 | scream(new, ptype, allow_novel_levels = TRUE), 52 | class = "vctrs_error_incompatible_type" 53 | ) 54 | }) 55 | -------------------------------------------------------------------------------- /tests/testthat/test-shrink.R: -------------------------------------------------------------------------------- 1 | test_that("an outcome can also be a predictor and is only returned once", { 2 | x <- mold(Sepal.Length ~ Sepal.Length, iris) 3 | 4 | expect_equal( 5 | colnames(shrink(iris, x$blueprint$ptypes$predictors)), 6 | "Sepal.Length" 7 | ) 8 | }) 9 | 10 | test_that("`data` must be data-like", { 11 | ptype <- data.frame(x = integer()) 12 | 13 | expect_snapshot(error = TRUE, { 14 | shrink(1, ptype) 15 | }) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test-standardize.R: -------------------------------------------------------------------------------- 1 | test_that("standardize - factor", { 2 | std <- standardize(factor(letters[1:5])) 3 | expect_s3_class(std, "tbl_df") 4 | expect_equal(colnames(std), ".outcome") 5 | }) 6 | 7 | test_that("standardize - numeric", { 8 | std <- standardize(1:5) 9 | expect_s3_class(std, "tbl_df") 10 | expect_equal(colnames(std), ".outcome") 11 | 12 | std2 <- standardize(as.double(1:5)) 13 | expect_s3_class(std2, "tbl_df") 14 | expect_equal(colnames(std2), ".outcome") 15 | }) 16 | 17 | test_that("standardize - matrix", { 18 | mat_bad <- matrix(1:10, ncol = 2) 19 | mat_bad2 <- matrix("a", dimnames = list(NULL, "c1")) 20 | 21 | mat_good <- mat_bad 22 | colnames(mat_good) <- c("a", "b") 23 | 24 | expect_snapshot(error = TRUE, { 25 | standardize(mat_bad) 26 | }) 27 | 28 | expect_snapshot(error = TRUE, { 29 | standardize(mat_bad2) 30 | }) 31 | 32 | std <- standardize(mat_good) 33 | expect_s3_class(std, "tbl_df") 34 | expect_equal(colnames(std), c("a", "b")) 35 | }) 36 | 37 | test_that("standardize - array", { 38 | bad <- array(1:10, c(5, 2)) 39 | 40 | expect_snapshot(error = TRUE, { 41 | standardize(bad) 42 | }) 43 | 44 | bad2 <- array("a", c(1, 1), dimnames = list(NULL, "c1")) 45 | 46 | expect_snapshot(error = TRUE, { 47 | standardize(bad2) 48 | }) 49 | 50 | good <- bad 51 | colnames(good) <- c("a", "b") 52 | 53 | std <- standardize(good) 54 | expect_s3_class(std, "tbl_df") 55 | expect_equal(colnames(std), c("a", "b")) 56 | 57 | good2 <- array(1:5) 58 | 59 | std2 <- standardize(good2) 60 | expect_s3_class(std2, "tbl_df") 61 | expect_equal(colnames(std2), ".outcome") 62 | }) 63 | 64 | test_that("standardize - data.frame", { 65 | bad <- data.frame(1:5, 6:10) 66 | colnames(bad) <- NULL 67 | 68 | expect_snapshot(error = TRUE, { 69 | standardize(bad) 70 | }) 71 | 72 | bad2 <- data.frame(x = "a", stringsAsFactors = FALSE) 73 | 74 | expect_snapshot(error = TRUE, standardize(bad2)) 75 | 76 | bad3 <- data.frame(x = "a", y = "b", stringsAsFactors = FALSE) 77 | 78 | expect_snapshot(error = TRUE, standardize(bad3)) 79 | 80 | good <- bad 81 | colnames(good) <- c("a", "b") 82 | 83 | std <- standardize(good) 84 | expect_s3_class(std, "tbl_df") 85 | expect_equal(colnames(std), c("a", "b")) 86 | 87 | good2 <- data.frame(x = factor(letters[1:5]), y = factor(letters[6:10])) 88 | 89 | std2 <- standardize(good2) 90 | expect_s3_class(std2, "tbl_df") 91 | expect_equal(colnames(std2), c("x", "y")) 92 | }) 93 | 94 | test_that("standardize - unknown", { 95 | expect_snapshot(error = TRUE, standardize("hi")) 96 | expect_snapshot(error = TRUE, standardize(Sys.time())) 97 | }) 98 | -------------------------------------------------------------------------------- /tests/testthat/test-tune.R: -------------------------------------------------------------------------------- 1 | test_that("tune creates a call", { 2 | expect_true(is.call(tune())) 3 | expect_true(is.call(tune("foo"))) 4 | }) 5 | 6 | test_that("tune `id` value", { 7 | expect_identical(tune(), call("tune")) 8 | expect_identical(tune(""), call("tune")) 9 | expect_identical(tune("foo"), call("tune", "foo")) 10 | }) 11 | 12 | test_that("`id` is validated", { 13 | expect_snapshot(error = TRUE, tune(1)) 14 | expect_snapshot(error = TRUE, tune(c("x", "y"))) 15 | expect_snapshot(error = TRUE, tune(NA_character_)) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test-use.R: -------------------------------------------------------------------------------- 1 | test_that("can create a modeling package", { 2 | skip_on_cran() 3 | skip_if_not_installed("recipes") 4 | 5 | local_options(usethis.quiet = TRUE) 6 | 7 | dir <- withr::local_tempdir("model") 8 | 9 | model <- "linear_regression" 10 | 11 | # `usethis.quiet = TRUE` silences most of the messages, but there is an 12 | # unavoidable `i Loading model` that we get from devtools if we don't do this 13 | suppressMessages({ 14 | create_modeling_package(dir, model, open = FALSE) 15 | }) 16 | 17 | top_level_files <- list.files(dir) 18 | script_files <- list.files(file.path(dir, "R")) 19 | 20 | expect_true("DESCRIPTION" %in% top_level_files) 21 | expect_true("R" %in% top_level_files) 22 | expect_true("man" %in% top_level_files) 23 | expect_true("NAMESPACE" %in% top_level_files) 24 | 25 | expect_true(glue::glue("{model}-constructor.R") %in% script_files) 26 | expect_true(glue::glue("{model}-fit.R") %in% script_files) 27 | expect_true(glue::glue("{model}-predict.R") %in% script_files) 28 | }) 29 | 30 | test_that("can add a second model to a modeling package", { 31 | skip_on_cran() 32 | skip_if_not_installed("recipes") 33 | 34 | local_options(usethis.quiet = TRUE) 35 | 36 | dir <- withr::local_tempdir("model") 37 | 38 | model1 <- "linear_regression" 39 | model2 <- "random_forest" 40 | 41 | # `usethis.quiet = TRUE` silences most of the messages, but there is an 42 | # unavoidable `i Loading model` that we get from devtools if we don't do this 43 | suppressMessages({ 44 | create_modeling_package(dir, model1, open = FALSE) 45 | }) 46 | 47 | usethis::with_project(dir, use_modeling_files(model2)) 48 | 49 | script_files <- list.files(file.path(dir, "R")) 50 | 51 | expect_true(glue::glue("{model1}-constructor.R") %in% script_files) 52 | expect_true(glue::glue("{model1}-fit.R") %in% script_files) 53 | expect_true(glue::glue("{model1}-predict.R") %in% script_files) 54 | 55 | expect_true(glue::glue("{model2}-constructor.R") %in% script_files) 56 | expect_true(glue::glue("{model2}-fit.R") %in% script_files) 57 | expect_true(glue::glue("{model2}-predict.R") %in% script_files) 58 | }) 59 | 60 | test_that("no `model` aborts normally", { 61 | expect_snapshot(error = TRUE, create_modeling_package(path = "my/path")) 62 | }) 63 | 64 | test_that("no `path` aborts normally", { 65 | expect_snapshot(error = TRUE, create_modeling_package(model = "my_model")) 66 | }) 67 | 68 | test_that("`model` can only be a single string", { 69 | skip_if_not_installed("recipes") 70 | 71 | expect_snapshot( 72 | error = TRUE, 73 | create_modeling_package(path = "my/path", model = c("model1", "model2")) 74 | ) 75 | expect_snapshot( 76 | error = TRUE, 77 | create_modeling_package(path = "my/path", model = 1) 78 | ) 79 | expect_snapshot( 80 | error = TRUE, 81 | create_modeling_package(path = "my/path", model = "model with space") 82 | ) 83 | }) 84 | -------------------------------------------------------------------------------- /tests/testthat/test-validation.R: -------------------------------------------------------------------------------- 1 | test_that("validate_outcomes_are_univariate()", { 2 | expect_silent(validate_outcomes_are_univariate(data.frame(x = 1))) 3 | 4 | expect_silent(validate_outcomes_are_univariate(matrix())) 5 | 6 | expect_silent(validate_outcomes_are_univariate(1)) 7 | 8 | expect_snapshot( 9 | error = TRUE, 10 | validate_outcomes_are_univariate(iris) 11 | ) 12 | }) 13 | 14 | test_that("validate_outcomes_are_numeric()", { 15 | expect_silent( 16 | validate_outcomes_are_numeric(mtcars) 17 | ) 18 | 19 | expect_snapshot( 20 | error = TRUE, 21 | validate_outcomes_are_numeric(iris) 22 | ) 23 | 24 | date <- as.POSIXct(as.POSIXlt(as.Date("2019-01-01"))) 25 | x <- data.frame(x = date, y = factor("hi")) 26 | 27 | expect_snapshot( 28 | error = TRUE, 29 | validate_outcomes_are_numeric(x) 30 | ) 31 | }) 32 | 33 | test_that("validate_no_formula_duplication()", { 34 | expect_silent(validate_no_formula_duplication(y ~ x)) 35 | 36 | expect_snapshot( 37 | error = TRUE, 38 | validate_no_formula_duplication(y ~ y) 39 | ) 40 | 41 | expect_silent(validate_no_formula_duplication(y ~ log(y))) 42 | 43 | expect_snapshot( 44 | error = TRUE, 45 | validate_no_formula_duplication(y ~ log(y), original = TRUE) 46 | ) 47 | 48 | expect_snapshot( 49 | error = TRUE, 50 | validate_no_formula_duplication(y + x ~ y + x) 51 | ) 52 | 53 | expect_silent(validate_no_formula_duplication(y ~ .)) 54 | 55 | expect_snapshot( 56 | error = TRUE, 57 | validate_no_formula_duplication(y ~ . + y) 58 | ) 59 | 60 | # offset() is a weird special case but this is ok 61 | expect_silent(validate_no_formula_duplication(offset(y) ~ offset(y))) 62 | 63 | expect_snapshot( 64 | error = TRUE, 65 | validate_no_formula_duplication(y ~ offset(y), original = TRUE) 66 | ) 67 | }) 68 | 69 | test_that("validate_outcomes_are_factors()", { 70 | expect_silent( 71 | validate_outcomes_are_factors(data.frame(x = factor(c("A", "B")))) 72 | ) 73 | 74 | date <- as.POSIXct(as.POSIXlt(as.Date("2019-01-01"))) 75 | x <- data.frame(x = date, y = "hi", stringsAsFactors = FALSE) 76 | 77 | expect_snapshot( 78 | error = TRUE, 79 | validate_outcomes_are_factors(x) 80 | ) 81 | }) 82 | 83 | test_that("validate_outcomes_are_binary()", { 84 | expect_silent( 85 | validate_outcomes_are_binary(data.frame(x = factor(c("A", "B")))) 86 | ) 87 | 88 | expect_snapshot( 89 | error = TRUE, 90 | validate_outcomes_are_binary(iris) 91 | ) 92 | }) 93 | 94 | test_that("validate_predictors_are_numeric()", { 95 | expect_silent( 96 | validate_predictors_are_numeric(mtcars) 97 | ) 98 | 99 | expect_snapshot( 100 | error = TRUE, 101 | validate_predictors_are_numeric(iris) 102 | ) 103 | 104 | date <- as.POSIXct(as.POSIXlt(as.Date("2019-01-01"))) 105 | x <- data.frame(x = date, y = factor("hi")) 106 | 107 | expect_snapshot( 108 | error = TRUE, 109 | validate_predictors_are_numeric(x) 110 | ) 111 | }) 112 | 113 | test_that("validate_prediction_size()", { 114 | expect_silent( 115 | validate_prediction_size(mtcars, mtcars) 116 | ) 117 | 118 | expect_snapshot( 119 | error = TRUE, 120 | validate_prediction_size(mtcars[1:5, ], mtcars) 121 | ) 122 | }) 123 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | 4 | /.quarto/ 5 | --------------------------------------------------------------------------------