├── .Rbuildignore ├── .github ├── .gitignore ├── CODEOWNERS ├── CODE_OF_CONDUCT.md └── workflows │ ├── R-CMD-check-hard.yaml │ ├── R-CMD-check.yaml │ ├── lock.yaml │ ├── pkgdown.yaml │ ├── pr-commands.yaml │ └── test-coverage.yaml ├── .gitignore ├── .vscode ├── extensions.json └── settings.json ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── aaa.R ├── messages.R ├── misc.R ├── use.R └── usemodels-package.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── air.toml ├── codecov.yml ├── cran-comments.md ├── inst └── WORDLIST ├── man ├── templates.Rd └── usemodels-package.Rd ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── _snaps │ ├── basics.md │ ├── clipboard.md │ └── templates.md │ ├── test-basics.R │ ├── test-clipboard.R │ └── test-templates.R └── usemodels.Rproj /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^CODE_OF_CONDUCT\.md$ 4 | ^LICENSE\.md$ 5 | ^codecov\.yml$ 6 | ^cran-comments\.md$ 7 | ^README\.Rmd$ 8 | ^\.github$ 9 | ^_pkgdown\.yml$ 10 | ^docs$ 11 | ^pkgdown$ 12 | ^[\.]?air\.toml$ 13 | ^\.vscode$ 14 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # CODEOWNERS for usemodels 2 | # https://www.tidyverse.org/development/understudies 3 | .github/CODEOWNERS @topepo @juliasilge 4 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at codeofconduct@posit.co. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.1, available at 118 | . 119 | 120 | Community Impact Guidelines were inspired by 121 | [Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion]. 122 | 123 | For answers to common questions about this code of conduct, see the FAQ at 124 | . Translations are available at . 125 | 126 | [homepage]: https://www.contributor-covenant.org 127 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check-hard.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends, 5 | # Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never 6 | # installed, with the exception of testthat, knitr, and rmarkdown. The cache is 7 | # never used to avoid accidentally restoring a cache containing a suggested 8 | # dependency. 9 | on: 10 | push: 11 | branches: [main, master] 12 | pull_request: 13 | 14 | name: R-CMD-check-hard.yaml 15 | 16 | permissions: read-all 17 | 18 | jobs: 19 | check-no-suggests: 20 | runs-on: ${{ matrix.config.os }} 21 | 22 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 23 | 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | config: 28 | - {os: ubuntu-latest, r: 'release'} 29 | 30 | env: 31 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 32 | R_KEEP_PKG_SOURCE: yes 33 | 34 | steps: 35 | - uses: actions/checkout@v4 36 | 37 | - uses: r-lib/actions/setup-pandoc@v2 38 | 39 | - uses: r-lib/actions/setup-r@v2 40 | with: 41 | r-version: ${{ matrix.config.r }} 42 | http-user-agent: ${{ matrix.config.http-user-agent }} 43 | use-public-rspm: true 44 | 45 | - uses: r-lib/actions/setup-r-dependencies@v2 46 | with: 47 | dependencies: '"hard"' 48 | cache: false 49 | extra-packages: | 50 | any::rcmdcheck 51 | any::testthat 52 | any::knitr 53 | any::rmarkdown 54 | needs: check 55 | 56 | - uses: r-lib/actions/check-r-package@v2 57 | with: 58 | upload-snapshots: true 59 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 60 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master] 10 | pull_request: 11 | 12 | name: R-CMD-check.yaml 13 | 14 | permissions: read-all 15 | 16 | jobs: 17 | R-CMD-check: 18 | runs-on: ${{ matrix.config.os }} 19 | 20 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | config: 26 | - {os: macos-latest, r: 'release'} 27 | 28 | - {os: windows-latest, r: 'release'} 29 | # use 4.0 or 4.1 to check with rtools40's older compiler 30 | - {os: windows-latest, r: 'oldrel-4'} 31 | 32 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 33 | - {os: ubuntu-latest, r: 'release'} 34 | - {os: ubuntu-latest, r: 'oldrel-1'} 35 | - {os: ubuntu-latest, r: 'oldrel-2'} 36 | - {os: ubuntu-latest, r: 'oldrel-3'} 37 | - {os: ubuntu-latest, r: 'oldrel-4'} 38 | 39 | env: 40 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 41 | R_KEEP_PKG_SOURCE: yes 42 | 43 | steps: 44 | - uses: actions/checkout@v4 45 | 46 | - uses: r-lib/actions/setup-pandoc@v2 47 | 48 | - uses: r-lib/actions/setup-r@v2 49 | with: 50 | r-version: ${{ matrix.config.r }} 51 | http-user-agent: ${{ matrix.config.http-user-agent }} 52 | use-public-rspm: true 53 | 54 | - uses: r-lib/actions/setup-r-dependencies@v2 55 | with: 56 | extra-packages: any::rcmdcheck 57 | needs: check 58 | 59 | - uses: r-lib/actions/check-r-package@v2 60 | with: 61 | upload-snapshots: true 62 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 63 | -------------------------------------------------------------------------------- /.github/workflows/lock.yaml: -------------------------------------------------------------------------------- 1 | name: 'Lock Threads' 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | jobs: 8 | lock: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: dessant/lock-threads@v2 12 | with: 13 | github-token: ${{ github.token }} 14 | issue-lock-inactive-days: '14' 15 | # issue-exclude-labels: '' 16 | # issue-lock-labels: 'outdated' 17 | issue-lock-comment: > 18 | This issue has been automatically locked. If you believe you have 19 | found a related problem, please file a new issue (with a reprex: 20 | ) and link to this issue. 21 | issue-lock-reason: '' 22 | pr-lock-inactive-days: '14' 23 | # pr-exclude-labels: 'wip' 24 | pr-lock-labels: '' 25 | pr-lock-comment: > 26 | This pull request has been automatically locked. If you believe you 27 | have found a related problem, please file a new issue (with a reprex: 28 | ) and link to this issue. 29 | pr-lock-reason: '' 30 | # process-only: 'issues' 31 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | release: 8 | types: [published] 9 | workflow_dispatch: 10 | 11 | name: pkgdown.yaml 12 | 13 | permissions: read-all 14 | 15 | jobs: 16 | pkgdown: 17 | runs-on: ubuntu-latest 18 | # Only restrict concurrency for non-PR jobs 19 | concurrency: 20 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 21 | env: 22 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 23 | permissions: 24 | contents: write 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | with: 32 | use-public-rspm: true 33 | 34 | - uses: r-lib/actions/setup-r-dependencies@v2 35 | with: 36 | extra-packages: any::pkgdown, local::. 37 | needs: website 38 | 39 | - name: Build site 40 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 41 | shell: Rscript {0} 42 | 43 | - name: Deploy to GitHub pages 🚀 44 | if: github.event_name != 'pull_request' 45 | uses: JamesIves/github-pages-deploy-action@v4.5.0 46 | with: 47 | clean: false 48 | branch: gh-pages 49 | folder: docs 50 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: pr-commands.yaml 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | document: 13 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 14 | name: document 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | permissions: 19 | contents: write 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: r-lib/actions/pr-fetch@v2 24 | with: 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::roxygen2 34 | needs: pr-document 35 | 36 | - name: Document 37 | run: roxygen2::roxygenise() 38 | shell: Rscript {0} 39 | 40 | - name: commit 41 | run: | 42 | git config --local user.name "$GITHUB_ACTOR" 43 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 44 | git add man/\* NAMESPACE 45 | git commit -m 'Document' 46 | 47 | - uses: r-lib/actions/pr-push@v2 48 | with: 49 | repo-token: ${{ secrets.GITHUB_TOKEN }} 50 | 51 | style: 52 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 53 | name: style 54 | runs-on: ubuntu-latest 55 | env: 56 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 57 | permissions: 58 | contents: write 59 | steps: 60 | - uses: actions/checkout@v4 61 | 62 | - uses: r-lib/actions/pr-fetch@v2 63 | with: 64 | repo-token: ${{ secrets.GITHUB_TOKEN }} 65 | 66 | - uses: r-lib/actions/setup-r@v2 67 | 68 | - name: Install dependencies 69 | run: install.packages("styler") 70 | shell: Rscript {0} 71 | 72 | - name: Style 73 | run: styler::style_pkg() 74 | shell: Rscript {0} 75 | 76 | - name: commit 77 | run: | 78 | git config --local user.name "$GITHUB_ACTOR" 79 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 80 | git add \*.R 81 | git commit -m 'Style' 82 | 83 | - uses: r-lib/actions/pr-push@v2 84 | with: 85 | repo-token: ${{ secrets.GITHUB_TOKEN }} 86 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: test-coverage.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | test-coverage: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: any::covr, any::xml2 28 | needs: coverage 29 | 30 | - name: Test coverage 31 | run: | 32 | cov <- covr::package_coverage( 33 | quiet = FALSE, 34 | clean = FALSE, 35 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 36 | ) 37 | print(cov) 38 | covr::to_cobertura(cov) 39 | shell: Rscript {0} 40 | 41 | - uses: codecov/codecov-action@v5 42 | with: 43 | # Fail if error if not on PR, or if on PR and token is given 44 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} 45 | files: ./cobertura.xml 46 | plugins: noop 47 | disable_search: true 48 | token: ${{ secrets.CODECOV_TOKEN }} 49 | 50 | - name: Show testthat output 51 | if: always() 52 | run: | 53 | ## -------------------------------------------------------------------- 54 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 55 | shell: bash 56 | 57 | - name: Upload test results 58 | if: failure() 59 | uses: actions/upload-artifact@v4 60 | with: 61 | name: coverage-test-failures 62 | path: ${{ runner.temp }}/package 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .DS_Store 6 | docs 7 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "Posit.air-vscode" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[r]": { 3 | "editor.formatOnSave": true, 4 | "editor.defaultFormatter": "Posit.air-vscode" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: usemodels 2 | Title: Boilerplate Code for 'Tidymodels' Analyses 3 | Version: 0.2.0.9000 4 | Authors@R: c( 5 | person("Max", "Kuhn", , "max@posit.co", role = c("aut", "cre"), 6 | comment = c(ORCID = "0000-0003-2402-136X")), 7 | person("Posit Software, PBC", role = c("cph", "fnd"), 8 | comment = c(ROR = "03wc8by49")) 9 | ) 10 | Description: Code snippets to fit models using the tidymodels framework 11 | can be easily created for a given data set. 12 | License: MIT + file LICENSE 13 | URL: https://usemodels.tidymodels.org/, 14 | https://github.com/tidymodels/usemodels 15 | BugReports: https://github.com/tidymodels/usemodels/issues 16 | Depends: 17 | R (>= 4.1) 18 | Imports: 19 | cli, 20 | clipr, 21 | dplyr, 22 | purrr, 23 | recipes (>= 0.1.15), 24 | rlang, 25 | tidyr, 26 | tune (>= 0.1.2) 27 | Suggests: 28 | covr, 29 | modeldata, 30 | spelling, 31 | testthat 32 | Config/Needs/website: tidyverse/tidytemplate 33 | Config/testthat/edition: 3 34 | Config/usethis/last-upkeep: 2025-04-27 35 | Encoding: UTF-8 36 | Language: en-US 37 | Roxygen: list(markdown = TRUE) 38 | RoxygenNote: 7.3.2 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: usemodels authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 usemodels authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(use_C5.0) 4 | export(use_bag_tree_rpart) 5 | export(use_cubist) 6 | export(use_dbarts) 7 | export(use_earth) 8 | export(use_glmnet) 9 | export(use_kernlab_svm_poly) 10 | export(use_kernlab_svm_rbf) 11 | export(use_kknn) 12 | export(use_mgcv) 13 | export(use_mixOmics) 14 | export(use_nnet) 15 | export(use_ranger) 16 | export(use_rpart) 17 | export(use_xgboost) 18 | export(use_xrf) 19 | import(cli) 20 | import(rlang) 21 | import(tune) 22 | importFrom(dplyr,one_of) 23 | importFrom(dplyr,pull) 24 | importFrom(dplyr,select) 25 | importFrom(recipes,all_predictors) 26 | importFrom(recipes,recipe) 27 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # usemodels (development version) 2 | 3 | * Transition from the magrittr pipe to the base R pipe. 4 | 5 | * Added `use_nnet()`, `use_rpart()`, `use_bag_tree_rpart()`, `use_mgcv()`, `use_dbarts()`, `use_mixOmics()`, `use_xrf()`. 6 | 7 | * Fix `recipe()` call when `clipboard = TRUE` 8 | 9 | # usemodels 0.2.0 10 | 11 | * SVM (#14) and C5.0 (#9) models were added. 12 | 13 | * Clipboard access is enabled in all `use_*()` functions using the `clipboard = TRUE` argument (#12). 14 | 15 | * Use new `all_nominal_predictors()` selector (#17). 16 | 17 | # usemodels 0.1.0 18 | 19 | * Updated version requirements for dependencies. 20 | 21 | * Added `use_cubist()`. 22 | 23 | * Fixed bug in handling of variables for `step_string2factor()` 24 | 25 | # usemodels 0.0.1 26 | 27 | * Added a `NEWS.md` file to track changes to the package. 28 | -------------------------------------------------------------------------------- /R/aaa.R: -------------------------------------------------------------------------------- 1 | # nocov start 2 | 3 | # Global vars ------------------------------------------------------------------ 4 | 5 | utils::globalVariables( 6 | c( 7 | # false positives from template code 8 | "all_nominal_predictors", 9 | "all_numeric_predictors", 10 | "boost_tree", 11 | "comments", 12 | "linear_reg", 13 | "logistic_reg", 14 | "multinom_reg", 15 | "set_engine", 16 | "set_mode", 17 | "step_dummy", 18 | "step_normalize", 19 | "step_novel", 20 | "step_zv", 21 | "step_string2factor", 22 | "colors", 23 | "workflow", 24 | "add_model", 25 | "add_recipe" 26 | ) 27 | ) 28 | 29 | # nocov end 30 | -------------------------------------------------------------------------------- /R/messages.R: -------------------------------------------------------------------------------- 1 | zv_msg <- paste( 2 | "Before centering and scaling the numeric predictors, any predictors with", 3 | "a single unique value are filtered out." 4 | ) 5 | dist_msg <- 6 | paste( 7 | "Since distance calculations are used, the predictor", 8 | "variables should be on the same scale." 9 | ) 10 | dot_msg <- 11 | paste( 12 | "Since dot product calculations are used, the predictor", 13 | "variables should be on the same scale." 14 | ) 15 | reg_msg <- 16 | paste( 17 | "Regularization methods sum up functions of the model slope coefficients.", 18 | "Because of this, the predictor variables should be on the same scale." 19 | ) 20 | dummy_msg <- 21 | paste( 22 | "This model requires the predictors to be numeric. The most common method to", 23 | "convert qualitative predictors to numeric is to create binary indicator", 24 | "variables (aka dummy variables) from these predictors." 25 | ) 26 | dummy_hot_msg <- 27 | paste( 28 | dummy_msg, 29 | "However, for this model, binary indicator variables can be made for", 30 | "each of the levels of the factors (known as 'one-hot encoding')." 31 | ) 32 | string_to_factor_msg <- 33 | paste( 34 | "For modeling, it is preferred to encode qualitative data as factors", 35 | "(instead of character)." 36 | ) 37 | -------------------------------------------------------------------------------- /R/misc.R: -------------------------------------------------------------------------------- 1 | model_mode <- function(rec) { 2 | var_roles <- summary(rec) 3 | y_types <- var_roles$type[var_roles$role == "outcome"] 4 | y_types <- unique(y_types) 5 | if (length(y_types) > 1) { 6 | rlang::abort("outcomes are of different types.") 7 | } 8 | if (all(purrr::map_lgl(y_types, \(.x) "numeric" %in% .x))) { 9 | mod_mode <- "regression" 10 | } else { 11 | mod_mode <- "classification" 12 | } 13 | mod_mode 14 | } 15 | 16 | y_lvl <- function(rec) { 17 | mod_mode <- model_mode(rec) 18 | if (mod_mode == "regression") { 19 | return(NULL) 20 | } 21 | var_roles <- summary(rec) 22 | y_cols <- var_roles$variable[var_roles$role == "outcome"] 23 | y_dat <- rec$template |> 24 | dplyr::select(one_of(y_cols)) |> 25 | dplyr::pull(1) 26 | length(levels(y_dat)) 27 | } 28 | 29 | has_factor_pred <- function(x) { 30 | info <- summary(x) 31 | pred_types <- info$type[info$role == "predictor"] 32 | any(purrr::map_lgl(pred_types, \(.x) "nominal" %in% .x)) 33 | } 34 | 35 | num_pred_col <- function(x) { 36 | info <- summary(x) 37 | sum(info$role == "predictor") 38 | } 39 | 40 | # ------------------------------------------------------------------------------ 41 | # helper functions 42 | 43 | expr_width <- 74L 44 | 45 | assign_value <- function(name, value, cr = TRUE) { 46 | value <- rlang::enexpr(value) 47 | value <- rlang::expr_text(value, width = expr_width) 48 | chr_assign(name, value, cr) 49 | } 50 | chr_assign <- function(name, value, cr = TRUE) { 51 | name <- paste(name, "<-") 52 | if (cr) { 53 | res <- c(name, paste0("\n ", value)) 54 | } else { 55 | res <- paste(name, value) 56 | } 57 | res 58 | } 59 | pipe_value <- function(base, value) { 60 | # Find last non-comment line, add a `|>` to the end, then add another line 61 | value <- rlang::enexpr(value) 62 | value <- rlang::expr_text(value, width = expr_width) 63 | clean_base <- gsub("\\n", "", base) 64 | clean_base <- trimws(base, which = "left") 65 | not_comment <- seq_along(base)[!grepl("## ", clean_base)] 66 | n <- max(1, max(not_comment)) 67 | base[n] <- paste(base[n], "|>") 68 | c(base, paste0("\n ", value)) 69 | } 70 | add_comment <- function(base, value, add = TRUE, colors = TRUE) { 71 | if (!add) { 72 | return(base) 73 | } 74 | if (!is.character(value)) { 75 | rlang::abort("`value` must be character.") 76 | } 77 | 78 | value <- strwrap(value, width = expr_width, prefix = "## ") 79 | if (colors) { 80 | value <- tune::get_tune_colors()$message$warning(value) 81 | } 82 | 83 | res <- c(base, paste0("\n ", value)) 84 | res 85 | } 86 | add_steps_dummy_vars <- function( 87 | base, 88 | hot = FALSE, 89 | add = FALSE, 90 | colors = TRUE 91 | ) { 92 | base <- base |> 93 | pipe_value(step_novel(all_nominal_predictors())) 94 | if (hot) { 95 | base <- base |> 96 | add_comment(dummy_hot_msg, add, colors = colors) |> 97 | pipe_value(step_dummy(all_nominal_predictors(), one_hot = TRUE)) 98 | } else { 99 | base <- base |> 100 | add_comment(dummy_msg, add, colors = colors) |> 101 | pipe_value(step_dummy(all_nominal_predictors())) 102 | } 103 | base 104 | } 105 | add_steps_normalization <- function(base) { 106 | base |> 107 | pipe_value(step_zv(all_predictors())) |> 108 | pipe_value(step_normalize(all_numeric_predictors())) 109 | } 110 | factor_check <- function(base, rec, add, colors = TRUE) { 111 | var_roles <- summary(rec) 112 | nominal <- var_roles$variable[var_roles$type == "nominal"] 113 | is_str <- 114 | purrr::map_lgl( 115 | rec$template |> dplyr::select(dplyr::one_of(nominal)), 116 | rlang::is_character 117 | ) 118 | if (any(is_str)) { 119 | selector <- rlang::expr(one_of(!!!nominal[is_str])) 120 | step_expr <- rlang::expr(step_string2factor(!!selector)) 121 | base <- 122 | base |> 123 | add_comment(string_to_factor_msg, add = add, colors = colors) |> 124 | pipe_value(!!step_expr) 125 | } 126 | base 127 | } 128 | top_level_comment <- function(..., add = FALSE, colors = TRUE) { 129 | if (!add) { 130 | return(invisible(NULL)) 131 | } 132 | value <- paste(...) 133 | value <- strwrap(value, width = expr_width, prefix = "## ") 134 | if (colors) { 135 | value <- tune::get_tune_colors()$message$warning(value) 136 | } 137 | 138 | cat(paste0(value, collapse = "\n")) 139 | cat("\n") 140 | } 141 | 142 | template_workflow <- function(prefix) { 143 | paste0(prefix, "_workflow") |> 144 | assign_value(workflow()) |> 145 | pipe_value(add_recipe(!!rlang::sym(paste0(prefix, "_recipe")))) |> 146 | pipe_value(add_model(!!rlang::sym(paste0(prefix, "_spec")))) 147 | } 148 | 149 | template_tune_with_grid <- function(prefix, colors = TRUE) { 150 | tune_expr <- 151 | rlang::call2( 152 | "tune_grid", 153 | sym(paste0(prefix, "_workflow")), 154 | resamples = expr(stop("add your rsample object")), 155 | grid = sym(paste0(prefix, "_grid")) 156 | ) 157 | res <- assign_value(paste0(prefix, "_tune"), !!tune_expr) 158 | if (colors) { 159 | res <- sub( 160 | "stop(\"add your rsample object\")", 161 | tune::get_tune_colors()$message$danger( 162 | "stop(\"add your rsample object\")" 163 | ), 164 | res, 165 | fixed = TRUE 166 | ) 167 | } 168 | res 169 | } 170 | template_tune_no_grid <- function( 171 | prefix, 172 | seed = sample.int(10^5, 1), 173 | colors = TRUE 174 | ) { 175 | tune_expr <- 176 | rlang::call2( 177 | "tune_grid", 178 | sym(paste0(prefix, "_workflow")), 179 | resamples = expr(stop("add your rsample object")), 180 | grid = expr(stop("add number of candidate points")) 181 | ) 182 | 183 | res <- c( 184 | paste0("set.seed(", seed, ")\n"), 185 | assign_value(paste0(prefix, "_tune"), !!tune_expr) 186 | ) 187 | 188 | if (colors) { 189 | res <- sub( 190 | "stop(\"add your rsample object\")", 191 | tune::get_tune_colors()$message$danger( 192 | "stop(\"add your rsample object\")" 193 | ), 194 | res, 195 | fixed = TRUE 196 | ) 197 | res <- sub( 198 | "stop(\"add number of candidate points\")", 199 | tune::get_tune_colors()$message$danger( 200 | "stop(\"add number of candidate points\")" 201 | ), 202 | res, 203 | fixed = TRUE 204 | ) 205 | } 206 | res 207 | } 208 | 209 | # Take the call to the template function and turn it into a call to `recipe()` 210 | initial_recipe_call <- function(cl) { 211 | cl$tune <- NULL 212 | cl$verbose <- NULL 213 | cl$colors <- NULL 214 | cl$prefix <- NULL 215 | cl$clipboard <- NULL 216 | rec_cl <- cl 217 | rec_cl[[1]] <- rlang::expr(recipe) 218 | rec_cl 219 | } 220 | 221 | output_loc <- function(clipboard) { 222 | if (clipboard) { 223 | res <- tempfile(pattern = "usemodels_") 224 | } else { 225 | res <- "" 226 | } 227 | res 228 | } 229 | 230 | route <- function(x, path, ...) { 231 | cat(x, "\n\n", file = path, append = path != "", ...) 232 | invisible(NULL) 233 | } 234 | 235 | clipboard_output <- function(pth) { 236 | if (pth == "") { 237 | return(invisible(NULL)) 238 | } 239 | code <- readLines(pth) 240 | clipr::write_clip(code, object_type = "character") 241 | cli::cli_alert_success("code is on the clipboard.") 242 | invisible(NULL) 243 | } 244 | 245 | check_color <- function(cls, clip) { 246 | if (cls & clip) { 247 | cls <- FALSE 248 | } 249 | cls 250 | } 251 | 252 | check_clipboard <- function(clipboard) { 253 | if (!clipboard) { 254 | return(invisible(NULL)) 255 | } 256 | # from reprex_clipboard 257 | y <- clipr::clipr_available() 258 | if (isFALSE(y)) { 259 | clipr::dr_clipr() 260 | rlang::abort("Please use `clipboard = FALSE`") 261 | } 262 | invisible(NULL) 263 | } 264 | -------------------------------------------------------------------------------- /R/use.R: -------------------------------------------------------------------------------- 1 | #' Functions to create boilerplate code for specific models 2 | #' 3 | #' These functions make suggestions for code when using a few common models. 4 | #' They print out code to the console that could be considered minimal syntax 5 | #' for their respective techniques. Each creates a prototype recipe and workflow 6 | #' object that can be edited or updated as the data require. 7 | #' 8 | #' @param formula A simple model formula with no in-line functions. This will 9 | #' be used to template the recipe object as well as determining which outcome 10 | #' and predictor columns will be used. 11 | #' @param data A data frame with the columns used in the analysis. 12 | #' @param prefix A single character string to use as a prefix for the resulting 13 | #' objects. 14 | #' @param verbose A single logical that determined whether comments are added to 15 | #' the printed code explaining why certain lines are used. 16 | #' @param tune A single logical that controls if code for model tuning should be 17 | #' printed. 18 | #' @param colors A single logical for coloring warnings and code snippets that 19 | #' require the users attention (ignored when `colors = FALSE`) 20 | #' @param clipboard A single logical for whether the code output should be 21 | #' sent to the clip board or printed in the console. 22 | #' @return Invisible `NULL` but code is printed to the console. 23 | #' @details 24 | #' Based on the columns in `data`, certain recipe steps printed. For example, if 25 | #' a model requires that qualitative predictors be converted to numeric (say, 26 | #' using dummy variables) then an additional `step_dummy()` is added. Otherwise 27 | #' that recipe step is not included in the output. 28 | #' 29 | #' The syntax is opinionated and should not be considered the exact answer for 30 | #' every data analysis. It has reasonable defaults. 31 | #' @examples 32 | #' if (rlang::is_installed("modeldata")) { 33 | #' library(modeldata) 34 | #' data(ad_data) 35 | #' use_glmnet(Class ~ ., data = ad_data) 36 | #' 37 | #' data(Sacramento) 38 | #' use_glmnet(price ~ ., data = Sacramento, verbose = TRUE, prefix = "sac_homes") 39 | #' } 40 | #' @export 41 | #' @rdname templates 42 | use_glmnet <- function( 43 | formula, 44 | data, 45 | prefix = "glmnet", 46 | verbose = FALSE, 47 | tune = TRUE, 48 | colors = TRUE, 49 | clipboard = FALSE 50 | ) { 51 | check_clipboard(clipboard) 52 | colors <- check_color(colors, clipboard) 53 | pth <- output_loc(clipboard) 54 | on.exit(unlink(pth)) 55 | 56 | rec_cl <- initial_recipe_call(match.call()) 57 | rec_syntax <- 58 | paste0(prefix, "_recipe") |> 59 | assign_value(!!rec_cl) 60 | 61 | rec <- recipes::recipe(formula, data) 62 | 63 | rec_syntax <- 64 | rec_syntax |> 65 | factor_check(rec, add = verbose, colors = colors) 66 | 67 | if (has_factor_pred(rec)) { 68 | rec_syntax <- 69 | add_steps_dummy_vars(rec_syntax, add = verbose, colors = colors) 70 | } 71 | rec_syntax <- 72 | rec_syntax |> 73 | add_comment(paste(reg_msg, zv_msg), add = verbose, colors = colors) |> 74 | add_steps_normalization() 75 | 76 | mod_mode <- model_mode(rec) 77 | 78 | if (tune) { 79 | prm <- rlang::exprs(penalty = tune(), mixture = tune()) 80 | } else { 81 | prm <- NULL 82 | } 83 | 84 | if (mod_mode == "classification") { 85 | num_lvl <- y_lvl(rec) 86 | if (num_lvl == 2) { 87 | mod_syntax <- 88 | paste0(prefix, "_spec") |> 89 | assign_value(!!rlang::call2("logistic_reg", !!!prm)) |> 90 | pipe_value(set_mode("classification")) 91 | } else { 92 | mod_syntax <- 93 | paste0(prefix, "_spec") |> 94 | assign_value(!!rlang::call2("multinom_reg", !!!prm)) |> 95 | pipe_value(set_mode("classification")) 96 | } 97 | } else { 98 | mod_syntax <- 99 | paste0(prefix, "_spec") |> 100 | assign_value(!!rlang::call2("linear_reg", !!!prm)) |> 101 | pipe_value(set_mode("regression")) 102 | } 103 | 104 | mod_syntax <- 105 | mod_syntax |> 106 | pipe_value(set_engine("glmnet")) 107 | 108 | route(rec_syntax, path = pth) 109 | route(mod_syntax, path = pth) 110 | route(template_workflow(prefix), path = pth) 111 | 112 | if (tune) { 113 | glmn_grid <- rlang::expr( 114 | glmn_grid <- 115 | tidyr::crossing( 116 | penalty = 10^seq(-6, -1, length.out = 20), 117 | mixture = c(0.05, .2, .4, .6, .8, 1) 118 | ) 119 | ) 120 | glmn_grid[[2]] <- rlang::sym(paste0(prefix, "_grid")) 121 | route(rlang::expr_text(glmn_grid, width = expr_width), path = pth) 122 | route(template_tune_with_grid(prefix, colors = colors), path = pth) 123 | } 124 | clipboard_output(pth) 125 | invisible(NULL) 126 | } 127 | 128 | #' @export 129 | #' @rdname templates 130 | use_xgboost <- function( 131 | formula, 132 | data, 133 | prefix = "xgboost", 134 | verbose = FALSE, 135 | tune = TRUE, 136 | colors = TRUE, 137 | clipboard = FALSE 138 | ) { 139 | check_clipboard(clipboard) 140 | colors <- check_color(colors, clipboard) 141 | pth <- output_loc(clipboard) 142 | on.exit(unlink(pth)) 143 | 144 | rec_cl <- initial_recipe_call(match.call()) 145 | rec_syntax <- 146 | paste0(prefix, "_recipe") |> 147 | assign_value(!!rec_cl) 148 | 149 | rec <- recipe(formula, data) 150 | 151 | rec_syntax <- 152 | rec_syntax |> 153 | factor_check(rec, add = verbose, colors = colors) 154 | 155 | if (has_factor_pred(rec)) { 156 | rec_syntax <- 157 | add_steps_dummy_vars( 158 | rec_syntax, 159 | hot = TRUE, 160 | add = verbose, 161 | colors = colors 162 | ) 163 | } 164 | 165 | rec_syntax <- pipe_value(rec_syntax, step_zv(all_predictors())) 166 | 167 | if (tune) { 168 | prm <- 169 | rlang::exprs( 170 | trees = tune(), 171 | min_n = tune(), 172 | tree_depth = tune(), 173 | learn_rate = tune(), 174 | loss_reduction = tune(), 175 | sample_size = tune() 176 | ) 177 | } else { 178 | prm <- NULL 179 | } 180 | 181 | mod_syntax <- 182 | paste0(prefix, "_spec") |> 183 | assign_value(!!rlang::call2("boost_tree", !!!prm)) |> 184 | pipe_value(set_mode(!!model_mode(rec))) |> 185 | pipe_value(set_engine("xgboost")) 186 | 187 | route(rec_syntax, path = pth) 188 | route(mod_syntax, path = pth) 189 | route(template_workflow(prefix), path = pth) 190 | if (tune) { 191 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 192 | } 193 | clipboard_output(pth) 194 | invisible(NULL) 195 | } 196 | 197 | # ------------------------------------------------------------------------------ 198 | 199 | #' @export 200 | #' @rdname templates 201 | use_kknn <- function( 202 | formula, 203 | data, 204 | prefix = "kknn", 205 | verbose = FALSE, 206 | tune = TRUE, 207 | colors = TRUE, 208 | clipboard = FALSE 209 | ) { 210 | check_clipboard(clipboard) 211 | colors <- check_color(colors, clipboard) 212 | pth <- output_loc(clipboard) 213 | on.exit(unlink(pth)) 214 | 215 | rec_cl <- initial_recipe_call(match.call()) 216 | rec_syntax <- 217 | paste0(prefix, "_recipe") |> 218 | assign_value(!!rec_cl) 219 | 220 | rec <- recipes::recipe(formula, data) 221 | 222 | rec_syntax <- 223 | rec_syntax |> 224 | factor_check(rec, add = verbose, colors = colors) 225 | 226 | if (has_factor_pred(rec)) { 227 | rec_syntax <- 228 | add_steps_dummy_vars(rec_syntax, add = verbose, colors = colors) 229 | } 230 | rec_syntax <- 231 | rec_syntax |> 232 | add_comment(paste(dist_msg, zv_msg), add = verbose, colors = colors) |> 233 | add_steps_normalization() 234 | 235 | if (tune) { 236 | prm <- rlang::exprs(neighbors = tune(), weight_func = tune()) 237 | } else { 238 | prm <- NULL 239 | } 240 | 241 | mod_syntax <- 242 | paste0(prefix, "_spec") |> 243 | assign_value(!!rlang::call2("nearest_neighbor", !!!prm)) |> 244 | pipe_value(set_mode(!!model_mode(rec))) |> 245 | pipe_value(set_engine("kknn")) 246 | 247 | route(rec_syntax, path = pth) 248 | route(mod_syntax, path = pth) 249 | route(template_workflow(prefix), path = pth) 250 | if (tune) { 251 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 252 | } 253 | clipboard_output(pth) 254 | invisible(NULL) 255 | } 256 | 257 | # ------------------------------------------------------------------------------ 258 | 259 | #' @export 260 | #' @rdname templates 261 | use_ranger <- function( 262 | formula, 263 | data, 264 | prefix = "ranger", 265 | verbose = FALSE, 266 | tune = TRUE, 267 | colors = TRUE, 268 | clipboard = FALSE 269 | ) { 270 | check_clipboard(clipboard) 271 | colors <- check_color(colors, clipboard) 272 | pth <- output_loc(clipboard) 273 | on.exit(unlink(pth)) 274 | 275 | rec_cl <- initial_recipe_call(match.call()) 276 | rec_syntax <- 277 | paste0(prefix, "_recipe") |> 278 | assign_value(!!rec_cl) 279 | 280 | rec <- recipes::recipe(formula, data) 281 | 282 | rec_syntax <- 283 | rec_syntax |> 284 | factor_check(rec, add = verbose, colors = colors) 285 | 286 | # TODO add a check for the factor levels that are an issue for 287 | 288 | if (tune) { 289 | prm <- rlang::exprs(mtry = tune(), min_n = tune(), trees = 1000) 290 | } else { 291 | prm <- prm <- rlang::exprs(trees = 1000) 292 | } 293 | 294 | mod_syntax <- 295 | paste0(prefix, "_spec") |> 296 | assign_value(!!rlang::call2("rand_forest", !!!prm)) |> 297 | pipe_value(set_mode(!!model_mode(rec))) |> 298 | pipe_value(set_engine("ranger")) 299 | 300 | route(rec_syntax, path = pth) 301 | route(mod_syntax, path = pth) 302 | route(template_workflow(prefix), path = pth) 303 | if (tune) { 304 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 305 | } 306 | clipboard_output(pth) 307 | invisible(NULL) 308 | } 309 | 310 | # ------------------------------------------------------------------------------ 311 | 312 | #' @export 313 | #' @rdname templates 314 | use_earth <- function( 315 | formula, 316 | data, 317 | prefix = "earth", 318 | verbose = FALSE, 319 | tune = TRUE, 320 | colors = TRUE, 321 | clipboard = FALSE 322 | ) { 323 | check_clipboard(clipboard) 324 | colors <- check_color(colors, clipboard) 325 | pth <- output_loc(clipboard) 326 | on.exit(unlink(pth)) 327 | 328 | rec_cl <- initial_recipe_call(match.call()) 329 | rec_syntax <- 330 | paste0(prefix, "_recipe") |> 331 | assign_value(!!rec_cl) 332 | 333 | rec <- recipe(formula, data) 334 | 335 | rec_syntax <- 336 | rec_syntax |> 337 | factor_check(rec, add = verbose, colors = colors) 338 | 339 | if (has_factor_pred(rec)) { 340 | rec_syntax <- 341 | add_steps_dummy_vars(rec_syntax, add = verbose, colors = colors) 342 | } 343 | 344 | rec_syntax <- pipe_value(rec_syntax, step_zv(all_predictors())) 345 | 346 | if (tune) { 347 | prm <- 348 | rlang::exprs( 349 | num_terms = tune(), 350 | prod_degree = tune(), 351 | prune_method = "none" 352 | ) 353 | } else { 354 | prm <- NULL 355 | } 356 | 357 | mod_syntax <- 358 | paste0(prefix, "_spec") |> 359 | assign_value(!!rlang::call2("mars", !!!prm)) |> 360 | pipe_value(set_mode(!!model_mode(rec))) |> 361 | pipe_value(set_engine("earth")) 362 | 363 | route(rec_syntax, path = pth) 364 | route(mod_syntax, path = pth) 365 | route(template_workflow(prefix), path = pth) 366 | if (tune) { 367 | # We can only have as many terms as data points but maybe we should 368 | # give some wiggle room for resampling. Also, we will have a sequence of odd 369 | # numbered terms so divide by 2 and keep an integer. 370 | term_max <- floor(min(12, floor(floor(nrow(data) * 0.75))) / 2) 371 | 372 | mars_grid <- rlang::expr( 373 | mars_grid <- 374 | tidyr::crossing(num_terms = 2 * (1:!!term_max), prod_degree = 1:2) 375 | ) 376 | mars_grid[[2]] <- rlang::sym(paste0(prefix, "_grid")) 377 | top_level_comment( 378 | "MARS models can make predictions on many _sub_models_, meaning that we can", 379 | "evaluate many values of `num_terms` without much computational cost.", 380 | "A regular grid is used to exploit this property.", 381 | "The first term is only the intercept, so the grid is a sequence of even", 382 | "numbered values.", 383 | add = verbose, 384 | colors = colors 385 | ) 386 | route(rlang::expr_text(mars_grid, width = expr_width), path = pth) 387 | route(template_tune_with_grid(prefix, colors = colors), path = pth) 388 | } 389 | clipboard_output(pth) 390 | invisible(NULL) 391 | } 392 | 393 | # ------------------------------------------------------------------------------ 394 | 395 | #' @export 396 | #' @rdname templates 397 | use_cubist <- function( 398 | formula, 399 | data, 400 | prefix = "cubist", 401 | verbose = FALSE, 402 | tune = TRUE, 403 | colors = TRUE, 404 | clipboard = FALSE 405 | ) { 406 | check_clipboard(clipboard) 407 | colors <- check_color(colors, clipboard) 408 | pth <- output_loc(clipboard) 409 | on.exit(unlink(pth)) 410 | 411 | rec_cl <- initial_recipe_call(match.call()) 412 | rec_syntax <- 413 | paste0(prefix, "_recipe") |> 414 | assign_value(!!rec_cl) 415 | 416 | rec <- recipes::recipe(formula, data) 417 | if (model_mode(rec) != "regression") { 418 | rlang::abort("Cubist models are only for regression") 419 | } 420 | rec_syntax <- 421 | rec_syntax |> 422 | factor_check(rec, add = verbose, colors = colors) 423 | 424 | rec_syntax <- pipe_value(rec_syntax, step_zv(all_predictors())) 425 | 426 | if (tune) { 427 | prm <- rlang::exprs(committees = tune(), neighbors = tune()) 428 | } else { 429 | prm <- NULL 430 | } 431 | 432 | mod_syntax <- 433 | paste0(prefix, "_spec") |> 434 | assign_value(!!rlang::call2("cubist_rules", !!!prm)) |> 435 | pipe_value(set_engine("Cubist")) 436 | 437 | route("library(rules)", path = pth, sep = "") 438 | route(rec_syntax, path = pth) 439 | route(mod_syntax, path = pth) 440 | route(template_workflow(prefix), path = pth) 441 | if (tune) { 442 | cubist_grid <- rlang::expr( 443 | cubist_grid <- 444 | tidyr::crossing( 445 | committees = c(1:9, (1:5) * 10), 446 | neighbors = c(0, 3, 6, 9) 447 | ) 448 | ) 449 | cubist_grid[[2]] <- rlang::sym(paste0(prefix, "_grid")) 450 | route(rlang::expr_text(cubist_grid, width = expr_width), path = pth) 451 | route(template_tune_with_grid(prefix, colors = colors), path = pth) 452 | } 453 | clipboard_output(pth) 454 | invisible(NULL) 455 | } 456 | 457 | #' @export 458 | #' @rdname templates 459 | use_kernlab_svm_rbf <- function( 460 | formula, 461 | data, 462 | prefix = "kernlab", 463 | verbose = FALSE, 464 | tune = TRUE, 465 | colors = TRUE, 466 | clipboard = FALSE 467 | ) { 468 | check_clipboard(clipboard) 469 | colors <- check_color(colors, clipboard) 470 | pth <- output_loc(clipboard) 471 | on.exit(unlink(pth)) 472 | 473 | rec_cl <- initial_recipe_call(match.call()) 474 | rec_syntax <- 475 | paste0(prefix, "_recipe") |> 476 | assign_value(!!rec_cl) 477 | 478 | rec <- recipes::recipe(formula, data) 479 | 480 | rec_syntax <- 481 | rec_syntax |> 482 | add_comment(paste(dot_msg, zv_msg), add = verbose, colors = colors) |> 483 | add_steps_normalization() 484 | 485 | mod_mode <- model_mode(rec) 486 | 487 | if (tune) { 488 | prm <- rlang::exprs(cost = tune(), rbf_sigma = tune()) 489 | } else { 490 | prm <- NULL 491 | } 492 | 493 | mod_syntax <- 494 | paste0(prefix, "_spec") |> 495 | assign_value(!!rlang::call2("svm_rbf", !!!prm)) |> 496 | pipe_value(set_mode(!!model_mode(rec))) 497 | 498 | route(rec_syntax, path = pth) 499 | route(mod_syntax, path = pth) 500 | route(template_workflow(prefix), path = pth) 501 | 502 | if (tune) { 503 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 504 | } 505 | clipboard_output(pth) 506 | invisible(NULL) 507 | } 508 | 509 | #' @export 510 | #' @rdname templates 511 | use_kernlab_svm_poly <- function( 512 | formula, 513 | data, 514 | prefix = "kernlab", 515 | verbose = FALSE, 516 | tune = TRUE, 517 | colors = TRUE, 518 | clipboard = FALSE 519 | ) { 520 | check_clipboard(clipboard) 521 | colors <- check_color(colors, clipboard) 522 | pth <- output_loc(clipboard) 523 | on.exit(unlink(pth)) 524 | 525 | rec_cl <- initial_recipe_call(match.call()) 526 | rec_syntax <- 527 | paste0(prefix, "_recipe") |> 528 | assign_value(!!rec_cl) 529 | 530 | rec <- recipes::recipe(formula, data) 531 | 532 | rec_syntax <- 533 | rec_syntax |> 534 | add_comment(paste(dot_msg, zv_msg), add = verbose, colors = colors) |> 535 | add_steps_normalization() 536 | 537 | mod_mode <- model_mode(rec) 538 | 539 | if (tune) { 540 | prm <- rlang::exprs(cost = tune(), degree = tune(), scale_factor = tune()) 541 | } else { 542 | prm <- NULL 543 | } 544 | 545 | mod_syntax <- 546 | paste0(prefix, "_spec") |> 547 | assign_value(!!rlang::call2("svm_poly", !!!prm)) |> 548 | pipe_value(set_mode(!!model_mode(rec))) 549 | 550 | route(rec_syntax, path = pth) 551 | route(mod_syntax, path = pth) 552 | route(template_workflow(prefix), path = pth) 553 | 554 | if (tune) { 555 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 556 | } 557 | clipboard_output(pth) 558 | invisible(NULL) 559 | } 560 | 561 | #' @export 562 | #' @rdname templates 563 | use_C5.0 <- function( 564 | formula, 565 | data, 566 | prefix = "C50", 567 | verbose = FALSE, 568 | tune = TRUE, 569 | colors = TRUE, 570 | clipboard = FALSE 571 | ) { 572 | check_clipboard(clipboard) 573 | colors <- check_color(colors, clipboard) 574 | pth <- output_loc(clipboard) 575 | on.exit(unlink(pth)) 576 | 577 | rec_cl <- initial_recipe_call(match.call()) 578 | rec_syntax <- 579 | paste0(prefix, "_recipe") |> 580 | assign_value(!!rec_cl) 581 | 582 | rec <- recipes::recipe(formula, data) 583 | if (model_mode(rec) != "classification") { 584 | rlang::abort("C5.0 models are only for classification.") 585 | } 586 | rec_syntax <- 587 | rec_syntax |> 588 | factor_check(rec, add = verbose, colors = colors) 589 | 590 | if (tune) { 591 | prm <- rlang::exprs(trees = tune(), min_n = tune()) 592 | } else { 593 | prm <- NULL 594 | } 595 | 596 | mod_syntax <- 597 | paste0(prefix, "_spec") |> 598 | assign_value(!!rlang::call2("boost_tree", !!!prm)) |> 599 | pipe_value(set_mode("classification")) |> 600 | pipe_value(set_engine("C5.0")) 601 | 602 | route(rec_syntax, path = pth) 603 | route(mod_syntax, path = pth) 604 | route(template_workflow(prefix), path = pth) 605 | if (tune) { 606 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 607 | } 608 | clipboard_output(pth) 609 | invisible(NULL) 610 | } 611 | 612 | #' @export 613 | #' @rdname templates 614 | use_nnet <- function( 615 | formula, 616 | data, 617 | prefix = "nnet", 618 | verbose = FALSE, 619 | tune = TRUE, 620 | colors = TRUE, 621 | clipboard = FALSE 622 | ) { 623 | check_clipboard(clipboard) 624 | colors <- check_color(colors, clipboard) 625 | pth <- output_loc(clipboard) 626 | on.exit(unlink(pth)) 627 | 628 | rec_cl <- initial_recipe_call(match.call()) 629 | rec_syntax <- 630 | paste0(prefix, "_recipe") |> 631 | assign_value(!!rec_cl) 632 | 633 | rec <- recipes::recipe(formula, data) 634 | 635 | if (has_factor_pred(rec)) { 636 | rec_syntax <- 637 | add_steps_dummy_vars(rec_syntax, add = verbose, colors = colors) 638 | } 639 | 640 | rec_syntax <- 641 | rec_syntax |> 642 | factor_check(rec, add = verbose, colors = colors) |> 643 | add_steps_normalization() 644 | 645 | if (tune) { 646 | prm <- rlang::exprs( 647 | hidden_units = tune(), 648 | penalty = tune(), 649 | epochs = tune() 650 | ) 651 | } else { 652 | prm <- NULL 653 | } 654 | 655 | mod_syntax <- 656 | paste0(prefix, "_spec") |> 657 | assign_value(!!rlang::call2("mlp", !!!prm)) |> 658 | pipe_value(set_mode(!!model_mode(rec))) 659 | 660 | route(rec_syntax, path = pth) 661 | route(mod_syntax, path = pth) 662 | route(template_workflow(prefix), path = pth) 663 | 664 | if (tune) { 665 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 666 | } 667 | clipboard_output(pth) 668 | invisible(NULL) 669 | } 670 | 671 | #' @export 672 | #' @rdname templates 673 | use_rpart <- function( 674 | formula, 675 | data, 676 | prefix = "rpart", 677 | verbose = FALSE, 678 | tune = TRUE, 679 | colors = TRUE, 680 | clipboard = FALSE 681 | ) { 682 | check_clipboard(clipboard) 683 | colors <- check_color(colors, clipboard) 684 | pth <- output_loc(clipboard) 685 | on.exit(unlink(pth)) 686 | 687 | rec_cl <- initial_recipe_call(match.call()) 688 | rec_syntax <- 689 | paste0(prefix, "_recipe") |> 690 | assign_value(!!rec_cl) 691 | 692 | rec <- recipe(formula, data) 693 | 694 | rec_syntax <- 695 | rec_syntax |> 696 | factor_check(rec, add = verbose, colors = colors) 697 | 698 | if (tune) { 699 | prm <- 700 | rlang::exprs( 701 | tree_depth = tune(), 702 | min_n = tune(), 703 | cost_complexity = tune() 704 | ) 705 | } else { 706 | prm <- NULL 707 | } 708 | 709 | mod_syntax <- 710 | paste0(prefix, "_spec") |> 711 | assign_value(!!rlang::call2("decision_tree", !!!prm)) |> 712 | pipe_value(set_mode(!!model_mode(rec))) |> 713 | pipe_value(set_engine("rpart")) 714 | 715 | route(rec_syntax, path = pth) 716 | route(mod_syntax, path = pth) 717 | route(template_workflow(prefix), path = pth) 718 | if (tune) { 719 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 720 | } 721 | clipboard_output(pth) 722 | invisible(NULL) 723 | } 724 | 725 | #' @export 726 | #' @rdname templates 727 | use_bag_tree_rpart <- function( 728 | formula, 729 | data, 730 | prefix = "rpart", 731 | verbose = FALSE, 732 | tune = TRUE, 733 | colors = TRUE, 734 | clipboard = FALSE 735 | ) { 736 | check_clipboard(clipboard) 737 | colors <- check_color(colors, clipboard) 738 | pth <- output_loc(clipboard) 739 | on.exit(unlink(pth)) 740 | 741 | rec_cl <- initial_recipe_call(match.call()) 742 | rec_syntax <- 743 | paste0(prefix, "_recipe") |> 744 | assign_value(!!rec_cl) 745 | 746 | rec <- recipe(formula, data) 747 | 748 | rec_syntax <- 749 | rec_syntax |> 750 | factor_check(rec, add = verbose, colors = colors) 751 | 752 | if (tune) { 753 | prm <- 754 | rlang::exprs( 755 | tree_depth = tune(), 756 | min_n = tune(), 757 | cost_complexity = tune() 758 | ) 759 | } else { 760 | prm <- NULL 761 | } 762 | 763 | mod_syntax <- 764 | paste0(prefix, "_spec") |> 765 | assign_value(!!rlang::call2("bag_tree", !!!prm)) |> 766 | pipe_value(set_mode(!!model_mode(rec))) |> 767 | pipe_value(set_engine("rpart")) 768 | 769 | route("library(baguette)", path = pth, sep = "") 770 | route(rec_syntax, path = pth) 771 | route(mod_syntax, path = pth) 772 | route(template_workflow(prefix), path = pth) 773 | if (tune) { 774 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 775 | } 776 | clipboard_output(pth) 777 | invisible(NULL) 778 | } 779 | 780 | #' @export 781 | #' @rdname templates 782 | use_mgcv <- function( 783 | formula, 784 | data, 785 | prefix = "mgcv", 786 | verbose = FALSE, 787 | tune = TRUE, 788 | colors = TRUE, 789 | clipboard = FALSE 790 | ) { 791 | check_clipboard(clipboard) 792 | colors <- check_color(colors, clipboard) 793 | pth <- output_loc(clipboard) 794 | on.exit(unlink(pth)) 795 | 796 | rec_cl <- initial_recipe_call(match.call()) 797 | rec_syntax <- 798 | paste0(prefix, "_recipe") |> 799 | assign_value(!!rec_cl) 800 | 801 | rec <- recipe(formula, data) 802 | 803 | rec_syntax <- 804 | rec_syntax |> 805 | factor_check(rec, add = verbose, colors = colors) 806 | 807 | if (tune) { 808 | prm <- rlang::exprs( 809 | select_features = tune(), 810 | adjust_deg_free = tune() 811 | ) 812 | } else { 813 | prm <- NULL 814 | } 815 | 816 | mod_syntax <- 817 | paste0(prefix, "_spec") |> 818 | assign_value(!!rlang::call2("gen_additive_mod", !!!prm)) |> 819 | pipe_value(set_mode(!!model_mode(rec))) |> 820 | pipe_value(set_engine("mgcv")) 821 | 822 | spec_expr <- rlang::call2( 823 | "add_model", 824 | sym(paste0(prefix, "_spec")), 825 | formula = expr(stop("add your gam formula")) 826 | ) 827 | 828 | wf_syntax <- paste0(prefix, "_workflow") |> 829 | assign_value(workflow()) |> 830 | pipe_value(add_recipe(!!rlang::sym(paste0(prefix, "_recipe")))) |> 831 | pipe_value(!!spec_expr) 832 | 833 | route(rec_syntax, path = pth) 834 | route(mod_syntax, path = pth) 835 | route(wf_syntax, path = pth) 836 | 837 | if (tune) { 838 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 839 | } 840 | 841 | clipboard_output(pth) 842 | invisible(NULL) 843 | } 844 | 845 | #' @export 846 | #' @rdname templates 847 | use_dbarts <- function( 848 | formula, 849 | data, 850 | prefix = "dbarts", 851 | verbose = FALSE, 852 | tune = TRUE, 853 | colors = TRUE, 854 | clipboard = FALSE 855 | ) { 856 | check_clipboard(clipboard) 857 | colors <- check_color(colors, clipboard) 858 | pth <- output_loc(clipboard) 859 | on.exit(unlink(pth)) 860 | 861 | rec_cl <- initial_recipe_call(match.call()) 862 | rec_syntax <- 863 | paste0(prefix, "_recipe") |> 864 | assign_value(!!rec_cl) 865 | 866 | rec <- recipe(formula, data) 867 | 868 | rec_syntax <- 869 | rec_syntax |> 870 | factor_check(rec, add = verbose, colors = colors) 871 | 872 | if (tune) { 873 | prm <- 874 | rlang::exprs( 875 | trees = tune(), 876 | prior_terminal_node_coef = tune(), 877 | prior_terminal_node_expo = tune() 878 | ) 879 | } else { 880 | prm <- NULL 881 | } 882 | 883 | mod_syntax <- 884 | paste0(prefix, "_spec") |> 885 | assign_value(!!rlang::call2("bart", !!!prm)) |> 886 | pipe_value(set_mode(!!model_mode(rec))) |> 887 | pipe_value(set_engine("dbarts")) 888 | 889 | route(rec_syntax, path = pth) 890 | route(mod_syntax, path = pth) 891 | route(template_workflow(prefix), path = pth) 892 | if (tune) { 893 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 894 | } 895 | clipboard_output(pth) 896 | invisible(NULL) 897 | } 898 | 899 | #' @export 900 | #' @rdname templates 901 | use_mixOmics <- function( 902 | formula, 903 | data, 904 | prefix = "mixOmics", 905 | verbose = FALSE, 906 | tune = TRUE, 907 | colors = TRUE, 908 | clipboard = FALSE 909 | ) { 910 | check_clipboard(clipboard) 911 | colors <- check_color(colors, clipboard) 912 | pth <- output_loc(clipboard) 913 | on.exit(unlink(pth)) 914 | 915 | rec_cl <- initial_recipe_call(match.call()) 916 | rec_syntax <- 917 | paste0(prefix, "_recipe") |> 918 | assign_value(!!rec_cl) 919 | 920 | rec <- recipe(formula, data) 921 | 922 | rec_syntax <- 923 | rec_syntax |> 924 | factor_check(rec, add = verbose, colors = colors) 925 | 926 | if (has_factor_pred(rec)) { 927 | rec_syntax <- 928 | add_steps_dummy_vars(rec_syntax, add = verbose, colors = colors) 929 | } 930 | 931 | rec_syntax <- 932 | rec_syntax |> 933 | add_steps_normalization() 934 | 935 | if (tune) { 936 | prm <- 937 | rlang::exprs( 938 | predictor_prop = tune(), 939 | num_comp = tune() 940 | ) 941 | } else { 942 | prm <- NULL 943 | } 944 | mod_syntax <- 945 | paste0(prefix, "_spec") |> 946 | assign_value(!!rlang::call2("pls", !!!prm)) |> 947 | pipe_value(set_mode(!!model_mode(rec))) |> 948 | pipe_value(set_engine("mixOmics")) 949 | 950 | route("library(plsmod)", path = pth, sep = "") 951 | route(rec_syntax, path = pth) 952 | route(mod_syntax, path = pth) 953 | route(template_workflow(prefix), path = pth) 954 | if (tune) { 955 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 956 | } 957 | clipboard_output(pth) 958 | invisible(NULL) 959 | } 960 | 961 | #' @export 962 | #' @rdname templates 963 | use_xrf <- function( 964 | formula, 965 | data, 966 | prefix = "xrf", 967 | verbose = FALSE, 968 | tune = TRUE, 969 | colors = TRUE, 970 | clipboard = FALSE 971 | ) { 972 | check_clipboard(clipboard) 973 | colors <- check_color(colors, clipboard) 974 | pth <- output_loc(clipboard) 975 | on.exit(unlink(pth)) 976 | 977 | rec_cl <- initial_recipe_call(match.call()) 978 | rec_syntax <- 979 | paste0(prefix, "_recipe") |> 980 | assign_value(!!rec_cl) 981 | 982 | rec <- recipe(formula, data) 983 | 984 | rec_syntax <- 985 | rec_syntax |> 986 | factor_check(rec, add = verbose, colors = colors) 987 | 988 | if (has_factor_pred(rec)) { 989 | rec_syntax <- 990 | add_steps_dummy_vars(rec_syntax, add = verbose, colors = colors) 991 | } 992 | 993 | rec_syntax <- 994 | rec_syntax |> 995 | add_steps_normalization() 996 | 997 | if (tune) { 998 | prm <- 999 | rlang::exprs( 1000 | mtry = tune(), 1001 | trees = tune(), 1002 | min_n = tune(), 1003 | tree_depth = tune(), 1004 | learn_rate = tune(), 1005 | loss_reduction = tune(), 1006 | sample_size = tune(), 1007 | penalty = tune() 1008 | ) 1009 | } else { 1010 | prm <- NULL 1011 | } 1012 | 1013 | mod_syntax <- 1014 | paste0(prefix, "_spec") |> 1015 | assign_value(!!rlang::call2("rule_fit", !!!prm)) |> 1016 | pipe_value(set_mode(!!model_mode(rec))) |> 1017 | pipe_value(set_engine("xrf")) 1018 | 1019 | route("library(rules)", path = pth, sep = "") 1020 | route(rec_syntax, path = pth) 1021 | route(mod_syntax, path = pth) 1022 | route(template_workflow(prefix), path = pth) 1023 | if (tune) { 1024 | route(template_tune_no_grid(prefix, colors = colors), path = pth, sep = "") 1025 | } 1026 | clipboard_output(pth) 1027 | invisible(NULL) 1028 | } 1029 | -------------------------------------------------------------------------------- /R/usemodels-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | #' @import rlang 5 | #' @import tune 6 | #' @import cli 7 | ## usethis namespace: start 8 | #' @importFrom dplyr one_of 9 | #' @importFrom dplyr pull 10 | #' @importFrom dplyr select 11 | #' @importFrom recipes all_predictors 12 | #' @importFrom recipes recipe 13 | ## usethis namespace: end 14 | NULL 15 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r} 8 | #| include: false 9 | knitr::opts_chunk$set( 10 | collapse = TRUE, 11 | comment = "", 12 | prompt = TRUE, 13 | fig.path = "man/figures/README-", 14 | out.width = "100%" 15 | ) 16 | ``` 17 | 18 | # usemodels 19 | 20 | 21 | [![R-CMD-check](https://github.com/tidymodels/usemodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/usemodels/actions/workflows/R-CMD-check.yaml) 22 | [![Coverage status](https://codecov.io/gh/tidymodels/usemodels/branch/main/graph/badge.svg)](https://app.codecov.io/github/tidymodels/usemodels?branch=main) 23 | [![lifecycle](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) 24 | 25 | 26 | The usemodels package is a helpful way of quickly creating code snippets to fit models using the tidymodels framework. 27 | 28 | Given a simple formula and a data set, the `use_*` functions can create code that appropriate for the data (given the model). 29 | 30 | For example, using the palmerpenguins data with a `glmnet` model: 31 | 32 | ```{r} 33 | #| label: glmnet 34 | #| message: false 35 | library(usemodels) 36 | library(palmerpenguins) 37 | data(penguins) 38 | use_glmnet(body_mass_g ~ ., data = penguins) 39 | ``` 40 | 41 | The recipe steps that are used (if any) depend on the type of data as well as the model. In this case, the first two steps handle the fact that `Species` is a factor-encoded predictor (and `glmnet` requires all numeric predictors). The last two steps are added because, for this model, the predictors should be on the same scale to be properly regularized. 42 | 43 | The package includes these templates: 44 | 45 | 46 | ```{r} 47 | #| label: use-list 48 | ls("package:usemodels", pattern = "use_") 49 | ``` 50 | 51 | You can also copy code to the clipboard using the option `clipboard = TRUE`. 52 | 53 | ## Installation 54 | 55 | You can install usemodels with: 56 | 57 | ``` r 58 | devtools::install_github("tidymodels/usemodels") 59 | ``` 60 | 61 | 62 | ## Contributing 63 | 64 | This project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 65 | 66 | - For questions and discussions about tidymodels packages, modeling, and machine learning, please [post on Posit Community](https://forum.posit.co/new-topic?category_id=15&tags=tidymodels,question). 67 | 68 | - If you think you have encountered a bug, please [submit an issue](https://github.com/tidymodels/usemodels/issues). 69 | 70 | - Either way, learn how to create and share a [reprex](https://reprex.tidyverse.org/articles/articles/learn-reprex.html) (a minimal, reproducible example), to clearly communicate about your code. 71 | 72 | - Check out further details on [contributing guidelines for tidymodels packages](https://www.tidymodels.org/contribute/) and [how to get help](https://www.tidymodels.org/help/). 73 | 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # usemodels 5 | 6 | 7 | 8 | [![R-CMD-check](https://github.com/tidymodels/usemodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/usemodels/actions/workflows/R-CMD-check.yaml) 9 | [![Coverage 10 | status](https://codecov.io/gh/tidymodels/usemodels/branch/main/graph/badge.svg)](https://app.codecov.io/github/tidymodels/usemodels?branch=main) 11 | [![lifecycle](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) 12 | 13 | 14 | The usemodels package is a helpful way of quickly creating code snippets 15 | to fit models using the tidymodels framework. 16 | 17 | Given a simple formula and a data set, the `use_*` functions can create 18 | code that appropriate for the data (given the model). 19 | 20 | For example, using the palmerpenguins data with a `glmnet` model: 21 | 22 | ``` r 23 | > library(usemodels) 24 | > library(palmerpenguins) 25 | > data(penguins) 26 | > use_glmnet(body_mass_g ~ ., data = penguins) 27 | glmnet_recipe <- 28 | recipe(formula = body_mass_g ~ ., data = penguins) |> 29 | step_novel(all_nominal_predictors()) |> 30 | step_dummy(all_nominal_predictors()) |> 31 | step_zv(all_predictors()) |> 32 | step_normalize(all_numeric_predictors()) 33 | 34 | glmnet_spec <- 35 | linear_reg(penalty = tune(), mixture = tune()) |> 36 | set_mode("regression") |> 37 | set_engine("glmnet") 38 | 39 | glmnet_workflow <- 40 | workflow() |> 41 | add_recipe(glmnet_recipe) |> 42 | add_model(glmnet_spec) 43 | 44 | glmnet_grid <- tidyr::crossing(penalty = 10^seq(-6, -1, length.out = 20), mixture = c(0.05, 45 | 0.2, 0.4, 0.6, 0.8, 1)) 46 | 47 | glmnet_tune <- 48 | tune_grid(glmnet_workflow, resamples = stop("add your rsample object"), grid = glmnet_grid) 49 | ``` 50 | 51 | The recipe steps that are used (if any) depend on the type of data as 52 | well as the model. In this case, the first two steps handle the fact 53 | that `Species` is a factor-encoded predictor (and `glmnet` requires all 54 | numeric predictors). The last two steps are added because, for this 55 | model, the predictors should be on the same scale to be properly 56 | regularized. 57 | 58 | The package includes these templates: 59 | 60 | ``` r 61 | > ls("package:usemodels", pattern = "use_") 62 | [1] "use_bag_tree_rpart" "use_C5.0" "use_cubist" 63 | [4] "use_dbarts" "use_earth" "use_glmnet" 64 | [7] "use_kernlab_svm_poly" "use_kernlab_svm_rbf" "use_kknn" 65 | [10] "use_mgcv" "use_mixOmics" "use_nnet" 66 | [13] "use_ranger" "use_rpart" "use_xgboost" 67 | [16] "use_xrf" 68 | ``` 69 | 70 | You can also copy code to the clipboard using the option 71 | `clipboard = TRUE`. 72 | 73 | ## Installation 74 | 75 | You can install usemodels with: 76 | 77 | ``` r 78 | devtools::install_github("tidymodels/usemodels") 79 | ``` 80 | 81 | ## Contributing 82 | 83 | This project is released with a [Contributor Code of 84 | Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). 85 | By contributing to this project, you agree to abide by its terms. 86 | 87 | - For questions and discussions about tidymodels packages, modeling, and 88 | machine learning, please [post on Posit 89 | Community](https://forum.posit.co/new-topic?category_id=15&tags=tidymodels,question). 90 | 91 | - If you think you have encountered a bug, please [submit an 92 | issue](https://github.com/tidymodels/usemodels/issues). 93 | 94 | - Either way, learn how to create and share a 95 | [reprex](https://reprex.tidyverse.org/articles/articles/learn-reprex.html) 96 | (a minimal, reproducible example), to clearly communicate about your 97 | code. 98 | 99 | - Check out further details on [contributing guidelines for tidymodels 100 | packages](https://www.tidymodels.org/contribute/) and [how to get 101 | help](https://www.tidymodels.org/help/). 102 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://usemodels.tidymodels.org 2 | 3 | template: 4 | package: tidytemplate 5 | bootstrap: 5 6 | bslib: 7 | danger: "#CA225E" 8 | primary: "#CA225E" 9 | includes: 10 | in_header: | 11 | 12 | 13 | development: 14 | mode: auto 15 | -------------------------------------------------------------------------------- /air.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidymodels/usemodels/1ed59ae83a0d31fe3251d283fb24499d180c814d/air.toml -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | * local R installation, R 4.0.0 3 | * ubuntu 16.04 (on travis-ci), R 4.0.0 4 | * win-builder (devel) 5 | 6 | ## R CMD check results 7 | 8 | 0 errors | 0 warnings | 1 note 9 | 10 | * This is a new release. 11 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | CMD 2 | ORCID 3 | PBC 4 | RStudio 5 | Tidymodels 6 | funder 7 | lifecycle 8 | magrittr 9 | palmerpenguins 10 | reprex 11 | tidymodels 12 | -------------------------------------------------------------------------------- /man/templates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/use.R 3 | \name{use_glmnet} 4 | \alias{use_glmnet} 5 | \alias{use_xgboost} 6 | \alias{use_kknn} 7 | \alias{use_ranger} 8 | \alias{use_earth} 9 | \alias{use_cubist} 10 | \alias{use_kernlab_svm_rbf} 11 | \alias{use_kernlab_svm_poly} 12 | \alias{use_C5.0} 13 | \alias{use_nnet} 14 | \alias{use_rpart} 15 | \alias{use_bag_tree_rpart} 16 | \alias{use_mgcv} 17 | \alias{use_dbarts} 18 | \alias{use_mixOmics} 19 | \alias{use_xrf} 20 | \title{Functions to create boilerplate code for specific models} 21 | \usage{ 22 | use_glmnet( 23 | formula, 24 | data, 25 | prefix = "glmnet", 26 | verbose = FALSE, 27 | tune = TRUE, 28 | colors = TRUE, 29 | clipboard = FALSE 30 | ) 31 | 32 | use_xgboost( 33 | formula, 34 | data, 35 | prefix = "xgboost", 36 | verbose = FALSE, 37 | tune = TRUE, 38 | colors = TRUE, 39 | clipboard = FALSE 40 | ) 41 | 42 | use_kknn( 43 | formula, 44 | data, 45 | prefix = "kknn", 46 | verbose = FALSE, 47 | tune = TRUE, 48 | colors = TRUE, 49 | clipboard = FALSE 50 | ) 51 | 52 | use_ranger( 53 | formula, 54 | data, 55 | prefix = "ranger", 56 | verbose = FALSE, 57 | tune = TRUE, 58 | colors = TRUE, 59 | clipboard = FALSE 60 | ) 61 | 62 | use_earth( 63 | formula, 64 | data, 65 | prefix = "earth", 66 | verbose = FALSE, 67 | tune = TRUE, 68 | colors = TRUE, 69 | clipboard = FALSE 70 | ) 71 | 72 | use_cubist( 73 | formula, 74 | data, 75 | prefix = "cubist", 76 | verbose = FALSE, 77 | tune = TRUE, 78 | colors = TRUE, 79 | clipboard = FALSE 80 | ) 81 | 82 | use_kernlab_svm_rbf( 83 | formula, 84 | data, 85 | prefix = "kernlab", 86 | verbose = FALSE, 87 | tune = TRUE, 88 | colors = TRUE, 89 | clipboard = FALSE 90 | ) 91 | 92 | use_kernlab_svm_poly( 93 | formula, 94 | data, 95 | prefix = "kernlab", 96 | verbose = FALSE, 97 | tune = TRUE, 98 | colors = TRUE, 99 | clipboard = FALSE 100 | ) 101 | 102 | use_C5.0( 103 | formula, 104 | data, 105 | prefix = "C50", 106 | verbose = FALSE, 107 | tune = TRUE, 108 | colors = TRUE, 109 | clipboard = FALSE 110 | ) 111 | 112 | use_nnet( 113 | formula, 114 | data, 115 | prefix = "nnet", 116 | verbose = FALSE, 117 | tune = TRUE, 118 | colors = TRUE, 119 | clipboard = FALSE 120 | ) 121 | 122 | use_rpart( 123 | formula, 124 | data, 125 | prefix = "rpart", 126 | verbose = FALSE, 127 | tune = TRUE, 128 | colors = TRUE, 129 | clipboard = FALSE 130 | ) 131 | 132 | use_bag_tree_rpart( 133 | formula, 134 | data, 135 | prefix = "rpart", 136 | verbose = FALSE, 137 | tune = TRUE, 138 | colors = TRUE, 139 | clipboard = FALSE 140 | ) 141 | 142 | use_mgcv( 143 | formula, 144 | data, 145 | prefix = "mgcv", 146 | verbose = FALSE, 147 | tune = TRUE, 148 | colors = TRUE, 149 | clipboard = FALSE 150 | ) 151 | 152 | use_dbarts( 153 | formula, 154 | data, 155 | prefix = "dbarts", 156 | verbose = FALSE, 157 | tune = TRUE, 158 | colors = TRUE, 159 | clipboard = FALSE 160 | ) 161 | 162 | use_mixOmics( 163 | formula, 164 | data, 165 | prefix = "mixOmics", 166 | verbose = FALSE, 167 | tune = TRUE, 168 | colors = TRUE, 169 | clipboard = FALSE 170 | ) 171 | 172 | use_xrf( 173 | formula, 174 | data, 175 | prefix = "xrf", 176 | verbose = FALSE, 177 | tune = TRUE, 178 | colors = TRUE, 179 | clipboard = FALSE 180 | ) 181 | } 182 | \arguments{ 183 | \item{formula}{A simple model formula with no in-line functions. This will 184 | be used to template the recipe object as well as determining which outcome 185 | and predictor columns will be used.} 186 | 187 | \item{data}{A data frame with the columns used in the analysis.} 188 | 189 | \item{prefix}{A single character string to use as a prefix for the resulting 190 | objects.} 191 | 192 | \item{verbose}{A single logical that determined whether comments are added to 193 | the printed code explaining why certain lines are used.} 194 | 195 | \item{tune}{A single logical that controls if code for model tuning should be 196 | printed.} 197 | 198 | \item{colors}{A single logical for coloring warnings and code snippets that 199 | require the users attention (ignored when \code{colors = FALSE})} 200 | 201 | \item{clipboard}{A single logical for whether the code output should be 202 | sent to the clip board or printed in the console.} 203 | } 204 | \value{ 205 | Invisible \code{NULL} but code is printed to the console. 206 | } 207 | \description{ 208 | These functions make suggestions for code when using a few common models. 209 | They print out code to the console that could be considered minimal syntax 210 | for their respective techniques. Each creates a prototype recipe and workflow 211 | object that can be edited or updated as the data require. 212 | } 213 | \details{ 214 | Based on the columns in \code{data}, certain recipe steps printed. For example, if 215 | a model requires that qualitative predictors be converted to numeric (say, 216 | using dummy variables) then an additional \code{step_dummy()} is added. Otherwise 217 | that recipe step is not included in the output. 218 | 219 | The syntax is opinionated and should not be considered the exact answer for 220 | every data analysis. It has reasonable defaults. 221 | } 222 | \examples{ 223 | if (rlang::is_installed("modeldata")) { 224 | library(modeldata) 225 | data(ad_data) 226 | use_glmnet(Class ~ ., data = ad_data) 227 | 228 | data(Sacramento) 229 | use_glmnet(price ~ ., data = Sacramento, verbose = TRUE, prefix = "sac_homes") 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /man/usemodels-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/usemodels-package.R 3 | \docType{package} 4 | \name{usemodels-package} 5 | \alias{usemodels} 6 | \alias{usemodels-package} 7 | \title{usemodels: Boilerplate Code for 'Tidymodels' Analyses} 8 | \description{ 9 | Code snippets to fit models using the tidymodels framework can be easily created for a given data set. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://usemodels.tidymodels.org/} 15 | \item \url{https://github.com/tidymodels/usemodels} 16 | \item Report bugs at \url{https://github.com/tidymodels/usemodels/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Max Kuhn \email{max@posit.co} (\href{https://orcid.org/0000-0003-2402-136X}{ORCID}) 22 | 23 | Other contributors: 24 | \itemize{ 25 | \item Posit Software, PBC (03wc8by49) [copyright holder, funder] 26 | } 27 | 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("spelling", quietly = TRUE)) { 2 | spelling::spell_check_test( 3 | vignettes = TRUE, 4 | error = FALSE, 5 | skip_on_cran = TRUE 6 | ) 7 | } 8 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(usemodels) 3 | 4 | 5 | if (identical(Sys.getenv("NOT_CRAN"), "true")) { 6 | # emulates `testthat:::on_cran()` 7 | if (utils::packageVersion("testthat") >= "2.99.0.9000") { 8 | test_check("usemodels") 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/basics.md: -------------------------------------------------------------------------------- 1 | # wrong model type 2 | 3 | Code 4 | use_cubist(island ~ ., data = penguins) 5 | Condition 6 | Error in `use_cubist()`: 7 | ! Cubist models are only for regression 8 | 9 | --- 10 | 11 | Code 12 | use_C5.0(bill_depth_mm ~ ., data = penguins) 13 | Condition 14 | Error in `use_C5.0()`: 15 | ! C5.0 models are only for classification. 16 | 17 | # no access to clipboard 18 | 19 | Code 20 | use_kknn(mpg ~ ., data = mtcars, clipboard = TRUE) 21 | Message 22 | CLIPR_ALLOW has not been set, so clipr will not run interactively 23 | Condition 24 | Error in `check_clipboard()`: 25 | ! Please use `clipboard = FALSE` 26 | 27 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/clipboard.md: -------------------------------------------------------------------------------- 1 | # all model templates with clipboard 2 | 3 | Code 4 | dummy_clip_template(model, prefix, verbose, tune) 5 | Message 6 | v code is on the clipboard. 7 | Output 8 | [1] "library(baguette)" 9 | [2] "" 10 | [3] "test_config_1_dummies_recipe <- " 11 | [4] " recipe(formula = body_mass_g ~ ., data = penguins) " 12 | [5] "" 13 | [6] "test_config_1_dummies_spec <- " 14 | [7] " bag_tree() |> " 15 | [8] " set_mode(\"regression\") |> " 16 | [9] " set_engine(\"rpart\") " 17 | [10] "" 18 | [11] "test_config_1_dummies_workflow <- " 19 | [12] " workflow() |> " 20 | [13] " add_recipe(test_config_1_dummies_recipe) |> " 21 | [14] " add_model(test_config_1_dummies_spec) " 22 | 23 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/templates.md: -------------------------------------------------------------------------------- 1 | # all model templates 2 | 3 | Code 4 | dummy_template(model, prefix, verbose, tune) 5 | Output 6 | library(baguette) 7 | 8 | test_config_1_dummies_recipe <- 9 | recipe(formula = body_mass_g ~ ., data = penguins) 10 | 11 | test_config_1_dummies_spec <- 12 | bag_tree() |> 13 | set_mode("regression") |> 14 | set_engine("rpart") 15 | 16 | test_config_1_dummies_workflow <- 17 | workflow() |> 18 | add_recipe(test_config_1_dummies_recipe) |> 19 | add_model(test_config_1_dummies_spec) 20 | 21 | 22 | --- 23 | 24 | Code 25 | no_dummy_template(model, prefix, verbose, tune) 26 | Output 27 | library(baguette) 28 | 29 | test_config_1_no_dummies_recipe <- 30 | recipe(formula = species ~ ., data = penguins) 31 | 32 | test_config_1_no_dummies_spec <- 33 | bag_tree() |> 34 | set_mode("classification") |> 35 | set_engine("rpart") 36 | 37 | test_config_1_no_dummies_workflow <- 38 | workflow() |> 39 | add_recipe(test_config_1_no_dummies_recipe) |> 40 | add_model(test_config_1_no_dummies_spec) 41 | 42 | 43 | --- 44 | 45 | Code 46 | no_dummy_template(model, prefix, verbose, tune) 47 | Output 48 | test_config_2_no_dummies_recipe <- 49 | recipe(formula = species ~ ., data = penguins) 50 | 51 | test_config_2_no_dummies_spec <- 52 | boost_tree() |> 53 | set_mode("classification") |> 54 | set_engine("C5.0") 55 | 56 | test_config_2_no_dummies_workflow <- 57 | workflow() |> 58 | add_recipe(test_config_2_no_dummies_recipe) |> 59 | add_model(test_config_2_no_dummies_spec) 60 | 61 | 62 | --- 63 | 64 | Code 65 | dummy_template(model, prefix, verbose, tune) 66 | Output 67 | library(rules) 68 | 69 | test_config_3_dummies_recipe <- 70 | recipe(formula = body_mass_g ~ ., data = penguins) |> 71 | step_zv(all_predictors()) 72 | 73 | test_config_3_dummies_spec <- 74 | cubist_rules() |> 75 | set_engine("Cubist") 76 | 77 | test_config_3_dummies_workflow <- 78 | workflow() |> 79 | add_recipe(test_config_3_dummies_recipe) |> 80 | add_model(test_config_3_dummies_spec) 81 | 82 | 83 | --- 84 | 85 | Code 86 | dummy_template(model, prefix, verbose, tune) 87 | Output 88 | test_config_4_dummies_recipe <- 89 | recipe(formula = body_mass_g ~ ., data = penguins) 90 | 91 | test_config_4_dummies_spec <- 92 | bart() |> 93 | set_mode("regression") |> 94 | set_engine("dbarts") 95 | 96 | test_config_4_dummies_workflow <- 97 | workflow() |> 98 | add_recipe(test_config_4_dummies_recipe) |> 99 | add_model(test_config_4_dummies_spec) 100 | 101 | 102 | --- 103 | 104 | Code 105 | no_dummy_template(model, prefix, verbose, tune) 106 | Output 107 | test_config_4_no_dummies_recipe <- 108 | recipe(formula = species ~ ., data = penguins) 109 | 110 | test_config_4_no_dummies_spec <- 111 | bart() |> 112 | set_mode("classification") |> 113 | set_engine("dbarts") 114 | 115 | test_config_4_no_dummies_workflow <- 116 | workflow() |> 117 | add_recipe(test_config_4_no_dummies_recipe) |> 118 | add_model(test_config_4_no_dummies_spec) 119 | 120 | 121 | --- 122 | 123 | Code 124 | dummy_template(model, prefix, verbose, tune) 125 | Output 126 | test_config_5_dummies_recipe <- 127 | recipe(formula = body_mass_g ~ ., data = penguins) |> 128 | step_novel(all_nominal_predictors()) |> 129 | ## This model requires the predictors to be numeric. The most common 130 | ## method to convert qualitative predictors to numeric is to create 131 | ## binary indicator variables (aka dummy variables) from these 132 | ## predictors. 133 | step_dummy(all_nominal_predictors()) |> 134 | step_zv(all_predictors()) 135 | 136 | test_config_5_dummies_spec <- 137 | mars() |> 138 | set_mode("regression") |> 139 | set_engine("earth") 140 | 141 | test_config_5_dummies_workflow <- 142 | workflow() |> 143 | add_recipe(test_config_5_dummies_recipe) |> 144 | add_model(test_config_5_dummies_spec) 145 | 146 | 147 | --- 148 | 149 | Code 150 | no_dummy_template(model, prefix, verbose, tune) 151 | Output 152 | test_config_5_no_dummies_recipe <- 153 | recipe(formula = species ~ ., data = penguins) |> 154 | step_novel(all_nominal_predictors()) |> 155 | ## This model requires the predictors to be numeric. The most common 156 | ## method to convert qualitative predictors to numeric is to create 157 | ## binary indicator variables (aka dummy variables) from these 158 | ## predictors. 159 | step_dummy(all_nominal_predictors()) |> 160 | step_zv(all_predictors()) 161 | 162 | test_config_5_no_dummies_spec <- 163 | mars() |> 164 | set_mode("classification") |> 165 | set_engine("earth") 166 | 167 | test_config_5_no_dummies_workflow <- 168 | workflow() |> 169 | add_recipe(test_config_5_no_dummies_recipe) |> 170 | add_model(test_config_5_no_dummies_spec) 171 | 172 | 173 | --- 174 | 175 | Code 176 | dummy_template(model, prefix, verbose, tune) 177 | Output 178 | test_config_6_dummies_recipe <- 179 | recipe(formula = body_mass_g ~ ., data = penguins) |> 180 | step_novel(all_nominal_predictors()) |> 181 | ## This model requires the predictors to be numeric. The most common 182 | ## method to convert qualitative predictors to numeric is to create 183 | ## binary indicator variables (aka dummy variables) from these 184 | ## predictors. 185 | step_dummy(all_nominal_predictors()) |> 186 | ## Regularization methods sum up functions of the model slope 187 | ## coefficients. Because of this, the predictor variables should be on 188 | ## the same scale. Before centering and scaling the numeric predictors, 189 | ## any predictors with a single unique value are filtered out. 190 | step_zv(all_predictors()) |> 191 | step_normalize(all_numeric_predictors()) 192 | 193 | test_config_6_dummies_spec <- 194 | linear_reg() |> 195 | set_mode("regression") |> 196 | set_engine("glmnet") 197 | 198 | test_config_6_dummies_workflow <- 199 | workflow() |> 200 | add_recipe(test_config_6_dummies_recipe) |> 201 | add_model(test_config_6_dummies_spec) 202 | 203 | 204 | --- 205 | 206 | Code 207 | no_dummy_template(model, prefix, verbose, tune) 208 | Output 209 | test_config_6_no_dummies_recipe <- 210 | recipe(formula = species ~ ., data = penguins) |> 211 | step_novel(all_nominal_predictors()) |> 212 | ## This model requires the predictors to be numeric. The most common 213 | ## method to convert qualitative predictors to numeric is to create 214 | ## binary indicator variables (aka dummy variables) from these 215 | ## predictors. 216 | step_dummy(all_nominal_predictors()) |> 217 | ## Regularization methods sum up functions of the model slope 218 | ## coefficients. Because of this, the predictor variables should be on 219 | ## the same scale. Before centering and scaling the numeric predictors, 220 | ## any predictors with a single unique value are filtered out. 221 | step_zv(all_predictors()) |> 222 | step_normalize(all_numeric_predictors()) 223 | 224 | test_config_6_no_dummies_spec <- 225 | multinom_reg() |> 226 | set_mode("classification") |> 227 | set_engine("glmnet") 228 | 229 | test_config_6_no_dummies_workflow <- 230 | workflow() |> 231 | add_recipe(test_config_6_no_dummies_recipe) |> 232 | add_model(test_config_6_no_dummies_spec) 233 | 234 | 235 | --- 236 | 237 | Code 238 | dummy_template(model, prefix, verbose, tune) 239 | Output 240 | test_config_7_dummies_recipe <- 241 | recipe(formula = body_mass_g ~ ., data = penguins) |> 242 | ## Since dot product calculations are used, the predictor variables 243 | ## should be on the same scale. Before centering and scaling the numeric 244 | ## predictors, any predictors with a single unique value are filtered 245 | ## out. 246 | step_zv(all_predictors()) |> 247 | step_normalize(all_numeric_predictors()) 248 | 249 | test_config_7_dummies_spec <- 250 | svm_poly() |> 251 | set_mode("regression") 252 | 253 | test_config_7_dummies_workflow <- 254 | workflow() |> 255 | add_recipe(test_config_7_dummies_recipe) |> 256 | add_model(test_config_7_dummies_spec) 257 | 258 | 259 | --- 260 | 261 | Code 262 | no_dummy_template(model, prefix, verbose, tune) 263 | Output 264 | test_config_7_no_dummies_recipe <- 265 | recipe(formula = species ~ ., data = penguins) |> 266 | ## Since dot product calculations are used, the predictor variables 267 | ## should be on the same scale. Before centering and scaling the numeric 268 | ## predictors, any predictors with a single unique value are filtered 269 | ## out. 270 | step_zv(all_predictors()) |> 271 | step_normalize(all_numeric_predictors()) 272 | 273 | test_config_7_no_dummies_spec <- 274 | svm_poly() |> 275 | set_mode("classification") 276 | 277 | test_config_7_no_dummies_workflow <- 278 | workflow() |> 279 | add_recipe(test_config_7_no_dummies_recipe) |> 280 | add_model(test_config_7_no_dummies_spec) 281 | 282 | 283 | --- 284 | 285 | Code 286 | dummy_template(model, prefix, verbose, tune) 287 | Output 288 | test_config_8_dummies_recipe <- 289 | recipe(formula = body_mass_g ~ ., data = penguins) |> 290 | ## Since dot product calculations are used, the predictor variables 291 | ## should be on the same scale. Before centering and scaling the numeric 292 | ## predictors, any predictors with a single unique value are filtered 293 | ## out. 294 | step_zv(all_predictors()) |> 295 | step_normalize(all_numeric_predictors()) 296 | 297 | test_config_8_dummies_spec <- 298 | svm_rbf() |> 299 | set_mode("regression") 300 | 301 | test_config_8_dummies_workflow <- 302 | workflow() |> 303 | add_recipe(test_config_8_dummies_recipe) |> 304 | add_model(test_config_8_dummies_spec) 305 | 306 | 307 | --- 308 | 309 | Code 310 | no_dummy_template(model, prefix, verbose, tune) 311 | Output 312 | test_config_8_no_dummies_recipe <- 313 | recipe(formula = species ~ ., data = penguins) |> 314 | ## Since dot product calculations are used, the predictor variables 315 | ## should be on the same scale. Before centering and scaling the numeric 316 | ## predictors, any predictors with a single unique value are filtered 317 | ## out. 318 | step_zv(all_predictors()) |> 319 | step_normalize(all_numeric_predictors()) 320 | 321 | test_config_8_no_dummies_spec <- 322 | svm_rbf() |> 323 | set_mode("classification") 324 | 325 | test_config_8_no_dummies_workflow <- 326 | workflow() |> 327 | add_recipe(test_config_8_no_dummies_recipe) |> 328 | add_model(test_config_8_no_dummies_spec) 329 | 330 | 331 | --- 332 | 333 | Code 334 | dummy_template(model, prefix, verbose, tune) 335 | Output 336 | test_config_9_dummies_recipe <- 337 | recipe(formula = body_mass_g ~ ., data = penguins) |> 338 | step_novel(all_nominal_predictors()) |> 339 | ## This model requires the predictors to be numeric. The most common 340 | ## method to convert qualitative predictors to numeric is to create 341 | ## binary indicator variables (aka dummy variables) from these 342 | ## predictors. 343 | step_dummy(all_nominal_predictors()) |> 344 | ## Since distance calculations are used, the predictor variables should 345 | ## be on the same scale. Before centering and scaling the numeric 346 | ## predictors, any predictors with a single unique value are filtered 347 | ## out. 348 | step_zv(all_predictors()) |> 349 | step_normalize(all_numeric_predictors()) 350 | 351 | test_config_9_dummies_spec <- 352 | nearest_neighbor() |> 353 | set_mode("regression") |> 354 | set_engine("kknn") 355 | 356 | test_config_9_dummies_workflow <- 357 | workflow() |> 358 | add_recipe(test_config_9_dummies_recipe) |> 359 | add_model(test_config_9_dummies_spec) 360 | 361 | 362 | --- 363 | 364 | Code 365 | no_dummy_template(model, prefix, verbose, tune) 366 | Output 367 | test_config_9_no_dummies_recipe <- 368 | recipe(formula = species ~ ., data = penguins) |> 369 | step_novel(all_nominal_predictors()) |> 370 | ## This model requires the predictors to be numeric. The most common 371 | ## method to convert qualitative predictors to numeric is to create 372 | ## binary indicator variables (aka dummy variables) from these 373 | ## predictors. 374 | step_dummy(all_nominal_predictors()) |> 375 | ## Since distance calculations are used, the predictor variables should 376 | ## be on the same scale. Before centering and scaling the numeric 377 | ## predictors, any predictors with a single unique value are filtered 378 | ## out. 379 | step_zv(all_predictors()) |> 380 | step_normalize(all_numeric_predictors()) 381 | 382 | test_config_9_no_dummies_spec <- 383 | nearest_neighbor() |> 384 | set_mode("classification") |> 385 | set_engine("kknn") 386 | 387 | test_config_9_no_dummies_workflow <- 388 | workflow() |> 389 | add_recipe(test_config_9_no_dummies_recipe) |> 390 | add_model(test_config_9_no_dummies_spec) 391 | 392 | 393 | --- 394 | 395 | Code 396 | dummy_template(model, prefix, verbose, tune) 397 | Output 398 | test_config_10_dummies_recipe <- 399 | recipe(formula = body_mass_g ~ ., data = penguins) 400 | 401 | test_config_10_dummies_spec <- 402 | gen_additive_mod() |> 403 | set_mode("regression") |> 404 | set_engine("mgcv") 405 | 406 | test_config_10_dummies_workflow <- 407 | workflow() |> 408 | add_recipe(test_config_10_dummies_recipe) |> 409 | add_model(test_config_10_dummies_spec, formula = stop("add your gam formula")) 410 | 411 | 412 | --- 413 | 414 | Code 415 | no_dummy_template(model, prefix, verbose, tune) 416 | Output 417 | test_config_10_no_dummies_recipe <- 418 | recipe(formula = species ~ ., data = penguins) 419 | 420 | test_config_10_no_dummies_spec <- 421 | gen_additive_mod() |> 422 | set_mode("classification") |> 423 | set_engine("mgcv") 424 | 425 | test_config_10_no_dummies_workflow <- 426 | workflow() |> 427 | add_recipe(test_config_10_no_dummies_recipe) |> 428 | add_model(test_config_10_no_dummies_spec, formula = stop("add your gam formula")) 429 | 430 | 431 | --- 432 | 433 | Code 434 | dummy_template(model, prefix, verbose, tune) 435 | Output 436 | library(plsmod) 437 | 438 | test_config_11_dummies_recipe <- 439 | recipe(formula = body_mass_g ~ ., data = penguins) |> 440 | step_novel(all_nominal_predictors()) |> 441 | ## This model requires the predictors to be numeric. The most common 442 | ## method to convert qualitative predictors to numeric is to create 443 | ## binary indicator variables (aka dummy variables) from these 444 | ## predictors. 445 | step_dummy(all_nominal_predictors()) |> 446 | step_zv(all_predictors()) |> 447 | step_normalize(all_numeric_predictors()) 448 | 449 | test_config_11_dummies_spec <- 450 | pls() |> 451 | set_mode("regression") |> 452 | set_engine("mixOmics") 453 | 454 | test_config_11_dummies_workflow <- 455 | workflow() |> 456 | add_recipe(test_config_11_dummies_recipe) |> 457 | add_model(test_config_11_dummies_spec) 458 | 459 | 460 | --- 461 | 462 | Code 463 | no_dummy_template(model, prefix, verbose, tune) 464 | Output 465 | library(plsmod) 466 | 467 | test_config_11_no_dummies_recipe <- 468 | recipe(formula = species ~ ., data = penguins) |> 469 | step_novel(all_nominal_predictors()) |> 470 | ## This model requires the predictors to be numeric. The most common 471 | ## method to convert qualitative predictors to numeric is to create 472 | ## binary indicator variables (aka dummy variables) from these 473 | ## predictors. 474 | step_dummy(all_nominal_predictors()) |> 475 | step_zv(all_predictors()) |> 476 | step_normalize(all_numeric_predictors()) 477 | 478 | test_config_11_no_dummies_spec <- 479 | pls() |> 480 | set_mode("classification") |> 481 | set_engine("mixOmics") 482 | 483 | test_config_11_no_dummies_workflow <- 484 | workflow() |> 485 | add_recipe(test_config_11_no_dummies_recipe) |> 486 | add_model(test_config_11_no_dummies_spec) 487 | 488 | 489 | --- 490 | 491 | Code 492 | dummy_template(model, prefix, verbose, tune) 493 | Output 494 | test_config_12_dummies_recipe <- 495 | recipe(formula = body_mass_g ~ ., data = penguins) |> 496 | step_novel(all_nominal_predictors()) |> 497 | ## This model requires the predictors to be numeric. The most common 498 | ## method to convert qualitative predictors to numeric is to create 499 | ## binary indicator variables (aka dummy variables) from these 500 | ## predictors. 501 | step_dummy(all_nominal_predictors()) |> 502 | step_zv(all_predictors()) |> 503 | step_normalize(all_numeric_predictors()) 504 | 505 | test_config_12_dummies_spec <- 506 | mlp() |> 507 | set_mode("regression") 508 | 509 | test_config_12_dummies_workflow <- 510 | workflow() |> 511 | add_recipe(test_config_12_dummies_recipe) |> 512 | add_model(test_config_12_dummies_spec) 513 | 514 | 515 | --- 516 | 517 | Code 518 | no_dummy_template(model, prefix, verbose, tune) 519 | Output 520 | test_config_12_no_dummies_recipe <- 521 | recipe(formula = species ~ ., data = penguins) |> 522 | step_novel(all_nominal_predictors()) |> 523 | ## This model requires the predictors to be numeric. The most common 524 | ## method to convert qualitative predictors to numeric is to create 525 | ## binary indicator variables (aka dummy variables) from these 526 | ## predictors. 527 | step_dummy(all_nominal_predictors()) |> 528 | step_zv(all_predictors()) |> 529 | step_normalize(all_numeric_predictors()) 530 | 531 | test_config_12_no_dummies_spec <- 532 | mlp() |> 533 | set_mode("classification") 534 | 535 | test_config_12_no_dummies_workflow <- 536 | workflow() |> 537 | add_recipe(test_config_12_no_dummies_recipe) |> 538 | add_model(test_config_12_no_dummies_spec) 539 | 540 | 541 | --- 542 | 543 | Code 544 | dummy_template(model, prefix, verbose, tune) 545 | Output 546 | test_config_13_dummies_recipe <- 547 | recipe(formula = body_mass_g ~ ., data = penguins) 548 | 549 | test_config_13_dummies_spec <- 550 | rand_forest(trees = 1000) |> 551 | set_mode("regression") |> 552 | set_engine("ranger") 553 | 554 | test_config_13_dummies_workflow <- 555 | workflow() |> 556 | add_recipe(test_config_13_dummies_recipe) |> 557 | add_model(test_config_13_dummies_spec) 558 | 559 | 560 | --- 561 | 562 | Code 563 | no_dummy_template(model, prefix, verbose, tune) 564 | Output 565 | test_config_13_no_dummies_recipe <- 566 | recipe(formula = species ~ ., data = penguins) 567 | 568 | test_config_13_no_dummies_spec <- 569 | rand_forest(trees = 1000) |> 570 | set_mode("classification") |> 571 | set_engine("ranger") 572 | 573 | test_config_13_no_dummies_workflow <- 574 | workflow() |> 575 | add_recipe(test_config_13_no_dummies_recipe) |> 576 | add_model(test_config_13_no_dummies_spec) 577 | 578 | 579 | --- 580 | 581 | Code 582 | dummy_template(model, prefix, verbose, tune) 583 | Output 584 | test_config_14_dummies_recipe <- 585 | recipe(formula = body_mass_g ~ ., data = penguins) 586 | 587 | test_config_14_dummies_spec <- 588 | decision_tree() |> 589 | set_mode("regression") |> 590 | set_engine("rpart") 591 | 592 | test_config_14_dummies_workflow <- 593 | workflow() |> 594 | add_recipe(test_config_14_dummies_recipe) |> 595 | add_model(test_config_14_dummies_spec) 596 | 597 | 598 | --- 599 | 600 | Code 601 | no_dummy_template(model, prefix, verbose, tune) 602 | Output 603 | test_config_14_no_dummies_recipe <- 604 | recipe(formula = species ~ ., data = penguins) 605 | 606 | test_config_14_no_dummies_spec <- 607 | decision_tree() |> 608 | set_mode("classification") |> 609 | set_engine("rpart") 610 | 611 | test_config_14_no_dummies_workflow <- 612 | workflow() |> 613 | add_recipe(test_config_14_no_dummies_recipe) |> 614 | add_model(test_config_14_no_dummies_spec) 615 | 616 | 617 | --- 618 | 619 | Code 620 | dummy_template(model, prefix, verbose, tune) 621 | Output 622 | test_config_15_dummies_recipe <- 623 | recipe(formula = body_mass_g ~ ., data = penguins) |> 624 | step_novel(all_nominal_predictors()) |> 625 | ## This model requires the predictors to be numeric. The most common 626 | ## method to convert qualitative predictors to numeric is to create 627 | ## binary indicator variables (aka dummy variables) from these 628 | ## predictors. However, for this model, binary indicator variables can be 629 | ## made for each of the levels of the factors (known as 'one-hot 630 | ## encoding'). 631 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 632 | step_zv(all_predictors()) 633 | 634 | test_config_15_dummies_spec <- 635 | boost_tree() |> 636 | set_mode("regression") |> 637 | set_engine("xgboost") 638 | 639 | test_config_15_dummies_workflow <- 640 | workflow() |> 641 | add_recipe(test_config_15_dummies_recipe) |> 642 | add_model(test_config_15_dummies_spec) 643 | 644 | 645 | --- 646 | 647 | Code 648 | no_dummy_template(model, prefix, verbose, tune) 649 | Output 650 | test_config_15_no_dummies_recipe <- 651 | recipe(formula = species ~ ., data = penguins) |> 652 | step_novel(all_nominal_predictors()) |> 653 | ## This model requires the predictors to be numeric. The most common 654 | ## method to convert qualitative predictors to numeric is to create 655 | ## binary indicator variables (aka dummy variables) from these 656 | ## predictors. However, for this model, binary indicator variables can be 657 | ## made for each of the levels of the factors (known as 'one-hot 658 | ## encoding'). 659 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 660 | step_zv(all_predictors()) 661 | 662 | test_config_15_no_dummies_spec <- 663 | boost_tree() |> 664 | set_mode("classification") |> 665 | set_engine("xgboost") 666 | 667 | test_config_15_no_dummies_workflow <- 668 | workflow() |> 669 | add_recipe(test_config_15_no_dummies_recipe) |> 670 | add_model(test_config_15_no_dummies_spec) 671 | 672 | 673 | --- 674 | 675 | Code 676 | dummy_template(model, prefix, verbose, tune) 677 | Output 678 | library(rules) 679 | 680 | test_config_16_dummies_recipe <- 681 | recipe(formula = body_mass_g ~ ., data = penguins) |> 682 | step_novel(all_nominal_predictors()) |> 683 | ## This model requires the predictors to be numeric. The most common 684 | ## method to convert qualitative predictors to numeric is to create 685 | ## binary indicator variables (aka dummy variables) from these 686 | ## predictors. 687 | step_dummy(all_nominal_predictors()) |> 688 | step_zv(all_predictors()) |> 689 | step_normalize(all_numeric_predictors()) 690 | 691 | test_config_16_dummies_spec <- 692 | rule_fit() |> 693 | set_mode("regression") |> 694 | set_engine("xrf") 695 | 696 | test_config_16_dummies_workflow <- 697 | workflow() |> 698 | add_recipe(test_config_16_dummies_recipe) |> 699 | add_model(test_config_16_dummies_spec) 700 | 701 | 702 | --- 703 | 704 | Code 705 | no_dummy_template(model, prefix, verbose, tune) 706 | Output 707 | library(rules) 708 | 709 | test_config_16_no_dummies_recipe <- 710 | recipe(formula = species ~ ., data = penguins) |> 711 | step_novel(all_nominal_predictors()) |> 712 | ## This model requires the predictors to be numeric. The most common 713 | ## method to convert qualitative predictors to numeric is to create 714 | ## binary indicator variables (aka dummy variables) from these 715 | ## predictors. 716 | step_dummy(all_nominal_predictors()) |> 717 | step_zv(all_predictors()) |> 718 | step_normalize(all_numeric_predictors()) 719 | 720 | test_config_16_no_dummies_spec <- 721 | rule_fit() |> 722 | set_mode("classification") |> 723 | set_engine("xrf") 724 | 725 | test_config_16_no_dummies_workflow <- 726 | workflow() |> 727 | add_recipe(test_config_16_no_dummies_recipe) |> 728 | add_model(test_config_16_no_dummies_spec) 729 | 730 | 731 | --- 732 | 733 | Code 734 | dummy_template(model, prefix, verbose, tune) 735 | Output 736 | library(baguette) 737 | 738 | test_config_17_dummies_recipe <- 739 | recipe(formula = body_mass_g ~ ., data = penguins) 740 | 741 | test_config_17_dummies_spec <- 742 | bag_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 743 | set_mode("regression") |> 744 | set_engine("rpart") 745 | 746 | test_config_17_dummies_workflow <- 747 | workflow() |> 748 | add_recipe(test_config_17_dummies_recipe) |> 749 | add_model(test_config_17_dummies_spec) 750 | 751 | set.seed(27246) 752 | test_config_17_dummies_tune <- 753 | tune_grid(test_config_17_dummies_workflow, resamples = stop("add your rsample object"), 754 | grid = stop("add number of candidate points")) 755 | 756 | 757 | --- 758 | 759 | Code 760 | no_dummy_template(model, prefix, verbose, tune) 761 | Output 762 | library(baguette) 763 | 764 | test_config_17_no_dummies_recipe <- 765 | recipe(formula = species ~ ., data = penguins) 766 | 767 | test_config_17_no_dummies_spec <- 768 | bag_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 769 | set_mode("classification") |> 770 | set_engine("rpart") 771 | 772 | test_config_17_no_dummies_workflow <- 773 | workflow() |> 774 | add_recipe(test_config_17_no_dummies_recipe) |> 775 | add_model(test_config_17_no_dummies_spec) 776 | 777 | set.seed(27246) 778 | test_config_17_no_dummies_tune <- 779 | tune_grid(test_config_17_no_dummies_workflow, resamples = stop("add your rsample object"), 780 | grid = stop("add number of candidate points")) 781 | 782 | 783 | --- 784 | 785 | Code 786 | no_dummy_template(model, prefix, verbose, tune) 787 | Output 788 | test_config_18_no_dummies_recipe <- 789 | recipe(formula = species ~ ., data = penguins) 790 | 791 | test_config_18_no_dummies_spec <- 792 | boost_tree(trees = tune(), min_n = tune()) |> 793 | set_mode("classification") |> 794 | set_engine("C5.0") 795 | 796 | test_config_18_no_dummies_workflow <- 797 | workflow() |> 798 | add_recipe(test_config_18_no_dummies_recipe) |> 799 | add_model(test_config_18_no_dummies_spec) 800 | 801 | set.seed(27246) 802 | test_config_18_no_dummies_tune <- 803 | tune_grid(test_config_18_no_dummies_workflow, resamples = stop("add your rsample object"), 804 | grid = stop("add number of candidate points")) 805 | 806 | 807 | --- 808 | 809 | Code 810 | dummy_template(model, prefix, verbose, tune) 811 | Output 812 | library(rules) 813 | 814 | test_config_19_dummies_recipe <- 815 | recipe(formula = body_mass_g ~ ., data = penguins) |> 816 | step_zv(all_predictors()) 817 | 818 | test_config_19_dummies_spec <- 819 | cubist_rules(committees = tune(), neighbors = tune()) |> 820 | set_engine("Cubist") 821 | 822 | test_config_19_dummies_workflow <- 823 | workflow() |> 824 | add_recipe(test_config_19_dummies_recipe) |> 825 | add_model(test_config_19_dummies_spec) 826 | 827 | test_config_19_dummies_grid <- tidyr::crossing(committees = c(1:9, (1:5) * 828 | 10), neighbors = c(0, 3, 6, 9)) 829 | 830 | test_config_19_dummies_tune <- 831 | tune_grid(test_config_19_dummies_workflow, resamples = stop("add your rsample object"), 832 | grid = test_config_19_dummies_grid) 833 | 834 | 835 | --- 836 | 837 | Code 838 | dummy_template(model, prefix, verbose, tune) 839 | Output 840 | test_config_20_dummies_recipe <- 841 | recipe(formula = body_mass_g ~ ., data = penguins) 842 | 843 | test_config_20_dummies_spec <- 844 | bart(trees = tune(), prior_terminal_node_coef = tune(), prior_terminal_node_expo = tune()) |> 845 | set_mode("regression") |> 846 | set_engine("dbarts") 847 | 848 | test_config_20_dummies_workflow <- 849 | workflow() |> 850 | add_recipe(test_config_20_dummies_recipe) |> 851 | add_model(test_config_20_dummies_spec) 852 | 853 | set.seed(27246) 854 | test_config_20_dummies_tune <- 855 | tune_grid(test_config_20_dummies_workflow, resamples = stop("add your rsample object"), 856 | grid = stop("add number of candidate points")) 857 | 858 | 859 | --- 860 | 861 | Code 862 | no_dummy_template(model, prefix, verbose, tune) 863 | Output 864 | test_config_20_no_dummies_recipe <- 865 | recipe(formula = species ~ ., data = penguins) 866 | 867 | test_config_20_no_dummies_spec <- 868 | bart(trees = tune(), prior_terminal_node_coef = tune(), prior_terminal_node_expo = tune()) |> 869 | set_mode("classification") |> 870 | set_engine("dbarts") 871 | 872 | test_config_20_no_dummies_workflow <- 873 | workflow() |> 874 | add_recipe(test_config_20_no_dummies_recipe) |> 875 | add_model(test_config_20_no_dummies_spec) 876 | 877 | set.seed(27246) 878 | test_config_20_no_dummies_tune <- 879 | tune_grid(test_config_20_no_dummies_workflow, resamples = stop("add your rsample object"), 880 | grid = stop("add number of candidate points")) 881 | 882 | 883 | --- 884 | 885 | Code 886 | dummy_template(model, prefix, verbose, tune) 887 | Output 888 | test_config_21_dummies_recipe <- 889 | recipe(formula = body_mass_g ~ ., data = penguins) |> 890 | step_novel(all_nominal_predictors()) |> 891 | ## This model requires the predictors to be numeric. The most common 892 | ## method to convert qualitative predictors to numeric is to create 893 | ## binary indicator variables (aka dummy variables) from these 894 | ## predictors. 895 | step_dummy(all_nominal_predictors()) |> 896 | step_zv(all_predictors()) 897 | 898 | test_config_21_dummies_spec <- 899 | mars(num_terms = tune(), prod_degree = tune(), prune_method = "none") |> 900 | set_mode("regression") |> 901 | set_engine("earth") 902 | 903 | test_config_21_dummies_workflow <- 904 | workflow() |> 905 | add_recipe(test_config_21_dummies_recipe) |> 906 | add_model(test_config_21_dummies_spec) 907 | 908 | ## MARS models can make predictions on many _sub_models_, meaning that we 909 | ## can evaluate many values of `num_terms` without much computational 910 | ## cost. A regular grid is used to exploit this property. The first term 911 | ## is only the intercept, so the grid is a sequence of even numbered 912 | ## values. 913 | test_config_21_dummies_grid <- tidyr::crossing(num_terms = 2 * (1:6), prod_degree = 1:2) 914 | 915 | test_config_21_dummies_tune <- 916 | tune_grid(test_config_21_dummies_workflow, resamples = stop("add your rsample object"), 917 | grid = test_config_21_dummies_grid) 918 | 919 | 920 | --- 921 | 922 | Code 923 | no_dummy_template(model, prefix, verbose, tune) 924 | Output 925 | test_config_21_no_dummies_recipe <- 926 | recipe(formula = species ~ ., data = penguins) |> 927 | step_novel(all_nominal_predictors()) |> 928 | ## This model requires the predictors to be numeric. The most common 929 | ## method to convert qualitative predictors to numeric is to create 930 | ## binary indicator variables (aka dummy variables) from these 931 | ## predictors. 932 | step_dummy(all_nominal_predictors()) |> 933 | step_zv(all_predictors()) 934 | 935 | test_config_21_no_dummies_spec <- 936 | mars(num_terms = tune(), prod_degree = tune(), prune_method = "none") |> 937 | set_mode("classification") |> 938 | set_engine("earth") 939 | 940 | test_config_21_no_dummies_workflow <- 941 | workflow() |> 942 | add_recipe(test_config_21_no_dummies_recipe) |> 943 | add_model(test_config_21_no_dummies_spec) 944 | 945 | ## MARS models can make predictions on many _sub_models_, meaning that we 946 | ## can evaluate many values of `num_terms` without much computational 947 | ## cost. A regular grid is used to exploit this property. The first term 948 | ## is only the intercept, so the grid is a sequence of even numbered 949 | ## values. 950 | test_config_21_no_dummies_grid <- tidyr::crossing(num_terms = 2 * (1:6), prod_degree = 1:2) 951 | 952 | test_config_21_no_dummies_tune <- 953 | tune_grid(test_config_21_no_dummies_workflow, resamples = stop("add your rsample object"), 954 | grid = test_config_21_no_dummies_grid) 955 | 956 | 957 | --- 958 | 959 | Code 960 | dummy_template(model, prefix, verbose, tune) 961 | Output 962 | test_config_22_dummies_recipe <- 963 | recipe(formula = body_mass_g ~ ., data = penguins) |> 964 | step_novel(all_nominal_predictors()) |> 965 | ## This model requires the predictors to be numeric. The most common 966 | ## method to convert qualitative predictors to numeric is to create 967 | ## binary indicator variables (aka dummy variables) from these 968 | ## predictors. 969 | step_dummy(all_nominal_predictors()) |> 970 | ## Regularization methods sum up functions of the model slope 971 | ## coefficients. Because of this, the predictor variables should be on 972 | ## the same scale. Before centering and scaling the numeric predictors, 973 | ## any predictors with a single unique value are filtered out. 974 | step_zv(all_predictors()) |> 975 | step_normalize(all_numeric_predictors()) 976 | 977 | test_config_22_dummies_spec <- 978 | linear_reg(penalty = tune(), mixture = tune()) |> 979 | set_mode("regression") |> 980 | set_engine("glmnet") 981 | 982 | test_config_22_dummies_workflow <- 983 | workflow() |> 984 | add_recipe(test_config_22_dummies_recipe) |> 985 | add_model(test_config_22_dummies_spec) 986 | 987 | test_config_22_dummies_grid <- tidyr::crossing(penalty = 10^seq(-6, -1, length.out = 20), 988 | mixture = c(0.05, 0.2, 0.4, 0.6, 0.8, 1)) 989 | 990 | test_config_22_dummies_tune <- 991 | tune_grid(test_config_22_dummies_workflow, resamples = stop("add your rsample object"), 992 | grid = test_config_22_dummies_grid) 993 | 994 | 995 | --- 996 | 997 | Code 998 | no_dummy_template(model, prefix, verbose, tune) 999 | Output 1000 | test_config_22_no_dummies_recipe <- 1001 | recipe(formula = species ~ ., data = penguins) |> 1002 | step_novel(all_nominal_predictors()) |> 1003 | ## This model requires the predictors to be numeric. The most common 1004 | ## method to convert qualitative predictors to numeric is to create 1005 | ## binary indicator variables (aka dummy variables) from these 1006 | ## predictors. 1007 | step_dummy(all_nominal_predictors()) |> 1008 | ## Regularization methods sum up functions of the model slope 1009 | ## coefficients. Because of this, the predictor variables should be on 1010 | ## the same scale. Before centering and scaling the numeric predictors, 1011 | ## any predictors with a single unique value are filtered out. 1012 | step_zv(all_predictors()) |> 1013 | step_normalize(all_numeric_predictors()) 1014 | 1015 | test_config_22_no_dummies_spec <- 1016 | multinom_reg(penalty = tune(), mixture = tune()) |> 1017 | set_mode("classification") |> 1018 | set_engine("glmnet") 1019 | 1020 | test_config_22_no_dummies_workflow <- 1021 | workflow() |> 1022 | add_recipe(test_config_22_no_dummies_recipe) |> 1023 | add_model(test_config_22_no_dummies_spec) 1024 | 1025 | test_config_22_no_dummies_grid <- tidyr::crossing(penalty = 10^seq(-6, -1, 1026 | length.out = 20), mixture = c(0.05, 0.2, 0.4, 0.6, 0.8, 1)) 1027 | 1028 | test_config_22_no_dummies_tune <- 1029 | tune_grid(test_config_22_no_dummies_workflow, resamples = stop("add your rsample object"), 1030 | grid = test_config_22_no_dummies_grid) 1031 | 1032 | 1033 | --- 1034 | 1035 | Code 1036 | dummy_template(model, prefix, verbose, tune) 1037 | Output 1038 | test_config_23_dummies_recipe <- 1039 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1040 | ## Since dot product calculations are used, the predictor variables 1041 | ## should be on the same scale. Before centering and scaling the numeric 1042 | ## predictors, any predictors with a single unique value are filtered 1043 | ## out. 1044 | step_zv(all_predictors()) |> 1045 | step_normalize(all_numeric_predictors()) 1046 | 1047 | test_config_23_dummies_spec <- 1048 | svm_poly(cost = tune(), degree = tune(), scale_factor = tune()) |> 1049 | set_mode("regression") 1050 | 1051 | test_config_23_dummies_workflow <- 1052 | workflow() |> 1053 | add_recipe(test_config_23_dummies_recipe) |> 1054 | add_model(test_config_23_dummies_spec) 1055 | 1056 | set.seed(27246) 1057 | test_config_23_dummies_tune <- 1058 | tune_grid(test_config_23_dummies_workflow, resamples = stop("add your rsample object"), 1059 | grid = stop("add number of candidate points")) 1060 | 1061 | 1062 | --- 1063 | 1064 | Code 1065 | no_dummy_template(model, prefix, verbose, tune) 1066 | Output 1067 | test_config_23_no_dummies_recipe <- 1068 | recipe(formula = species ~ ., data = penguins) |> 1069 | ## Since dot product calculations are used, the predictor variables 1070 | ## should be on the same scale. Before centering and scaling the numeric 1071 | ## predictors, any predictors with a single unique value are filtered 1072 | ## out. 1073 | step_zv(all_predictors()) |> 1074 | step_normalize(all_numeric_predictors()) 1075 | 1076 | test_config_23_no_dummies_spec <- 1077 | svm_poly(cost = tune(), degree = tune(), scale_factor = tune()) |> 1078 | set_mode("classification") 1079 | 1080 | test_config_23_no_dummies_workflow <- 1081 | workflow() |> 1082 | add_recipe(test_config_23_no_dummies_recipe) |> 1083 | add_model(test_config_23_no_dummies_spec) 1084 | 1085 | set.seed(27246) 1086 | test_config_23_no_dummies_tune <- 1087 | tune_grid(test_config_23_no_dummies_workflow, resamples = stop("add your rsample object"), 1088 | grid = stop("add number of candidate points")) 1089 | 1090 | 1091 | --- 1092 | 1093 | Code 1094 | dummy_template(model, prefix, verbose, tune) 1095 | Output 1096 | test_config_24_dummies_recipe <- 1097 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1098 | ## Since dot product calculations are used, the predictor variables 1099 | ## should be on the same scale. Before centering and scaling the numeric 1100 | ## predictors, any predictors with a single unique value are filtered 1101 | ## out. 1102 | step_zv(all_predictors()) |> 1103 | step_normalize(all_numeric_predictors()) 1104 | 1105 | test_config_24_dummies_spec <- 1106 | svm_rbf(cost = tune(), rbf_sigma = tune()) |> 1107 | set_mode("regression") 1108 | 1109 | test_config_24_dummies_workflow <- 1110 | workflow() |> 1111 | add_recipe(test_config_24_dummies_recipe) |> 1112 | add_model(test_config_24_dummies_spec) 1113 | 1114 | set.seed(27246) 1115 | test_config_24_dummies_tune <- 1116 | tune_grid(test_config_24_dummies_workflow, resamples = stop("add your rsample object"), 1117 | grid = stop("add number of candidate points")) 1118 | 1119 | 1120 | --- 1121 | 1122 | Code 1123 | no_dummy_template(model, prefix, verbose, tune) 1124 | Output 1125 | test_config_24_no_dummies_recipe <- 1126 | recipe(formula = species ~ ., data = penguins) |> 1127 | ## Since dot product calculations are used, the predictor variables 1128 | ## should be on the same scale. Before centering and scaling the numeric 1129 | ## predictors, any predictors with a single unique value are filtered 1130 | ## out. 1131 | step_zv(all_predictors()) |> 1132 | step_normalize(all_numeric_predictors()) 1133 | 1134 | test_config_24_no_dummies_spec <- 1135 | svm_rbf(cost = tune(), rbf_sigma = tune()) |> 1136 | set_mode("classification") 1137 | 1138 | test_config_24_no_dummies_workflow <- 1139 | workflow() |> 1140 | add_recipe(test_config_24_no_dummies_recipe) |> 1141 | add_model(test_config_24_no_dummies_spec) 1142 | 1143 | set.seed(27246) 1144 | test_config_24_no_dummies_tune <- 1145 | tune_grid(test_config_24_no_dummies_workflow, resamples = stop("add your rsample object"), 1146 | grid = stop("add number of candidate points")) 1147 | 1148 | 1149 | --- 1150 | 1151 | Code 1152 | dummy_template(model, prefix, verbose, tune) 1153 | Output 1154 | test_config_25_dummies_recipe <- 1155 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1156 | step_novel(all_nominal_predictors()) |> 1157 | ## This model requires the predictors to be numeric. The most common 1158 | ## method to convert qualitative predictors to numeric is to create 1159 | ## binary indicator variables (aka dummy variables) from these 1160 | ## predictors. 1161 | step_dummy(all_nominal_predictors()) |> 1162 | ## Since distance calculations are used, the predictor variables should 1163 | ## be on the same scale. Before centering and scaling the numeric 1164 | ## predictors, any predictors with a single unique value are filtered 1165 | ## out. 1166 | step_zv(all_predictors()) |> 1167 | step_normalize(all_numeric_predictors()) 1168 | 1169 | test_config_25_dummies_spec <- 1170 | nearest_neighbor(neighbors = tune(), weight_func = tune()) |> 1171 | set_mode("regression") |> 1172 | set_engine("kknn") 1173 | 1174 | test_config_25_dummies_workflow <- 1175 | workflow() |> 1176 | add_recipe(test_config_25_dummies_recipe) |> 1177 | add_model(test_config_25_dummies_spec) 1178 | 1179 | set.seed(27246) 1180 | test_config_25_dummies_tune <- 1181 | tune_grid(test_config_25_dummies_workflow, resamples = stop("add your rsample object"), 1182 | grid = stop("add number of candidate points")) 1183 | 1184 | 1185 | --- 1186 | 1187 | Code 1188 | no_dummy_template(model, prefix, verbose, tune) 1189 | Output 1190 | test_config_25_no_dummies_recipe <- 1191 | recipe(formula = species ~ ., data = penguins) |> 1192 | step_novel(all_nominal_predictors()) |> 1193 | ## This model requires the predictors to be numeric. The most common 1194 | ## method to convert qualitative predictors to numeric is to create 1195 | ## binary indicator variables (aka dummy variables) from these 1196 | ## predictors. 1197 | step_dummy(all_nominal_predictors()) |> 1198 | ## Since distance calculations are used, the predictor variables should 1199 | ## be on the same scale. Before centering and scaling the numeric 1200 | ## predictors, any predictors with a single unique value are filtered 1201 | ## out. 1202 | step_zv(all_predictors()) |> 1203 | step_normalize(all_numeric_predictors()) 1204 | 1205 | test_config_25_no_dummies_spec <- 1206 | nearest_neighbor(neighbors = tune(), weight_func = tune()) |> 1207 | set_mode("classification") |> 1208 | set_engine("kknn") 1209 | 1210 | test_config_25_no_dummies_workflow <- 1211 | workflow() |> 1212 | add_recipe(test_config_25_no_dummies_recipe) |> 1213 | add_model(test_config_25_no_dummies_spec) 1214 | 1215 | set.seed(27246) 1216 | test_config_25_no_dummies_tune <- 1217 | tune_grid(test_config_25_no_dummies_workflow, resamples = stop("add your rsample object"), 1218 | grid = stop("add number of candidate points")) 1219 | 1220 | 1221 | --- 1222 | 1223 | Code 1224 | dummy_template(model, prefix, verbose, tune) 1225 | Output 1226 | test_config_26_dummies_recipe <- 1227 | recipe(formula = body_mass_g ~ ., data = penguins) 1228 | 1229 | test_config_26_dummies_spec <- 1230 | gen_additive_mod(select_features = tune(), adjust_deg_free = tune()) |> 1231 | set_mode("regression") |> 1232 | set_engine("mgcv") 1233 | 1234 | test_config_26_dummies_workflow <- 1235 | workflow() |> 1236 | add_recipe(test_config_26_dummies_recipe) |> 1237 | add_model(test_config_26_dummies_spec, formula = stop("add your gam formula")) 1238 | 1239 | set.seed(27246) 1240 | test_config_26_dummies_tune <- 1241 | tune_grid(test_config_26_dummies_workflow, resamples = stop("add your rsample object"), 1242 | grid = stop("add number of candidate points")) 1243 | 1244 | 1245 | --- 1246 | 1247 | Code 1248 | no_dummy_template(model, prefix, verbose, tune) 1249 | Output 1250 | test_config_26_no_dummies_recipe <- 1251 | recipe(formula = species ~ ., data = penguins) 1252 | 1253 | test_config_26_no_dummies_spec <- 1254 | gen_additive_mod(select_features = tune(), adjust_deg_free = tune()) |> 1255 | set_mode("classification") |> 1256 | set_engine("mgcv") 1257 | 1258 | test_config_26_no_dummies_workflow <- 1259 | workflow() |> 1260 | add_recipe(test_config_26_no_dummies_recipe) |> 1261 | add_model(test_config_26_no_dummies_spec, formula = stop("add your gam formula")) 1262 | 1263 | set.seed(27246) 1264 | test_config_26_no_dummies_tune <- 1265 | tune_grid(test_config_26_no_dummies_workflow, resamples = stop("add your rsample object"), 1266 | grid = stop("add number of candidate points")) 1267 | 1268 | 1269 | --- 1270 | 1271 | Code 1272 | dummy_template(model, prefix, verbose, tune) 1273 | Output 1274 | library(plsmod) 1275 | 1276 | test_config_27_dummies_recipe <- 1277 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1278 | step_novel(all_nominal_predictors()) |> 1279 | ## This model requires the predictors to be numeric. The most common 1280 | ## method to convert qualitative predictors to numeric is to create 1281 | ## binary indicator variables (aka dummy variables) from these 1282 | ## predictors. 1283 | step_dummy(all_nominal_predictors()) |> 1284 | step_zv(all_predictors()) |> 1285 | step_normalize(all_numeric_predictors()) 1286 | 1287 | test_config_27_dummies_spec <- 1288 | pls(predictor_prop = tune(), num_comp = tune()) |> 1289 | set_mode("regression") |> 1290 | set_engine("mixOmics") 1291 | 1292 | test_config_27_dummies_workflow <- 1293 | workflow() |> 1294 | add_recipe(test_config_27_dummies_recipe) |> 1295 | add_model(test_config_27_dummies_spec) 1296 | 1297 | set.seed(27246) 1298 | test_config_27_dummies_tune <- 1299 | tune_grid(test_config_27_dummies_workflow, resamples = stop("add your rsample object"), 1300 | grid = stop("add number of candidate points")) 1301 | 1302 | 1303 | --- 1304 | 1305 | Code 1306 | no_dummy_template(model, prefix, verbose, tune) 1307 | Output 1308 | library(plsmod) 1309 | 1310 | test_config_27_no_dummies_recipe <- 1311 | recipe(formula = species ~ ., data = penguins) |> 1312 | step_novel(all_nominal_predictors()) |> 1313 | ## This model requires the predictors to be numeric. The most common 1314 | ## method to convert qualitative predictors to numeric is to create 1315 | ## binary indicator variables (aka dummy variables) from these 1316 | ## predictors. 1317 | step_dummy(all_nominal_predictors()) |> 1318 | step_zv(all_predictors()) |> 1319 | step_normalize(all_numeric_predictors()) 1320 | 1321 | test_config_27_no_dummies_spec <- 1322 | pls(predictor_prop = tune(), num_comp = tune()) |> 1323 | set_mode("classification") |> 1324 | set_engine("mixOmics") 1325 | 1326 | test_config_27_no_dummies_workflow <- 1327 | workflow() |> 1328 | add_recipe(test_config_27_no_dummies_recipe) |> 1329 | add_model(test_config_27_no_dummies_spec) 1330 | 1331 | set.seed(27246) 1332 | test_config_27_no_dummies_tune <- 1333 | tune_grid(test_config_27_no_dummies_workflow, resamples = stop("add your rsample object"), 1334 | grid = stop("add number of candidate points")) 1335 | 1336 | 1337 | --- 1338 | 1339 | Code 1340 | dummy_template(model, prefix, verbose, tune) 1341 | Output 1342 | test_config_28_dummies_recipe <- 1343 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1344 | step_novel(all_nominal_predictors()) |> 1345 | ## This model requires the predictors to be numeric. The most common 1346 | ## method to convert qualitative predictors to numeric is to create 1347 | ## binary indicator variables (aka dummy variables) from these 1348 | ## predictors. 1349 | step_dummy(all_nominal_predictors()) |> 1350 | step_zv(all_predictors()) |> 1351 | step_normalize(all_numeric_predictors()) 1352 | 1353 | test_config_28_dummies_spec <- 1354 | mlp(hidden_units = tune(), penalty = tune(), epochs = tune()) |> 1355 | set_mode("regression") 1356 | 1357 | test_config_28_dummies_workflow <- 1358 | workflow() |> 1359 | add_recipe(test_config_28_dummies_recipe) |> 1360 | add_model(test_config_28_dummies_spec) 1361 | 1362 | set.seed(27246) 1363 | test_config_28_dummies_tune <- 1364 | tune_grid(test_config_28_dummies_workflow, resamples = stop("add your rsample object"), 1365 | grid = stop("add number of candidate points")) 1366 | 1367 | 1368 | --- 1369 | 1370 | Code 1371 | no_dummy_template(model, prefix, verbose, tune) 1372 | Output 1373 | test_config_28_no_dummies_recipe <- 1374 | recipe(formula = species ~ ., data = penguins) |> 1375 | step_novel(all_nominal_predictors()) |> 1376 | ## This model requires the predictors to be numeric. The most common 1377 | ## method to convert qualitative predictors to numeric is to create 1378 | ## binary indicator variables (aka dummy variables) from these 1379 | ## predictors. 1380 | step_dummy(all_nominal_predictors()) |> 1381 | step_zv(all_predictors()) |> 1382 | step_normalize(all_numeric_predictors()) 1383 | 1384 | test_config_28_no_dummies_spec <- 1385 | mlp(hidden_units = tune(), penalty = tune(), epochs = tune()) |> 1386 | set_mode("classification") 1387 | 1388 | test_config_28_no_dummies_workflow <- 1389 | workflow() |> 1390 | add_recipe(test_config_28_no_dummies_recipe) |> 1391 | add_model(test_config_28_no_dummies_spec) 1392 | 1393 | set.seed(27246) 1394 | test_config_28_no_dummies_tune <- 1395 | tune_grid(test_config_28_no_dummies_workflow, resamples = stop("add your rsample object"), 1396 | grid = stop("add number of candidate points")) 1397 | 1398 | 1399 | --- 1400 | 1401 | Code 1402 | dummy_template(model, prefix, verbose, tune) 1403 | Output 1404 | test_config_29_dummies_recipe <- 1405 | recipe(formula = body_mass_g ~ ., data = penguins) 1406 | 1407 | test_config_29_dummies_spec <- 1408 | rand_forest(mtry = tune(), min_n = tune(), trees = 1000) |> 1409 | set_mode("regression") |> 1410 | set_engine("ranger") 1411 | 1412 | test_config_29_dummies_workflow <- 1413 | workflow() |> 1414 | add_recipe(test_config_29_dummies_recipe) |> 1415 | add_model(test_config_29_dummies_spec) 1416 | 1417 | set.seed(27246) 1418 | test_config_29_dummies_tune <- 1419 | tune_grid(test_config_29_dummies_workflow, resamples = stop("add your rsample object"), 1420 | grid = stop("add number of candidate points")) 1421 | 1422 | 1423 | --- 1424 | 1425 | Code 1426 | no_dummy_template(model, prefix, verbose, tune) 1427 | Output 1428 | test_config_29_no_dummies_recipe <- 1429 | recipe(formula = species ~ ., data = penguins) 1430 | 1431 | test_config_29_no_dummies_spec <- 1432 | rand_forest(mtry = tune(), min_n = tune(), trees = 1000) |> 1433 | set_mode("classification") |> 1434 | set_engine("ranger") 1435 | 1436 | test_config_29_no_dummies_workflow <- 1437 | workflow() |> 1438 | add_recipe(test_config_29_no_dummies_recipe) |> 1439 | add_model(test_config_29_no_dummies_spec) 1440 | 1441 | set.seed(27246) 1442 | test_config_29_no_dummies_tune <- 1443 | tune_grid(test_config_29_no_dummies_workflow, resamples = stop("add your rsample object"), 1444 | grid = stop("add number of candidate points")) 1445 | 1446 | 1447 | --- 1448 | 1449 | Code 1450 | dummy_template(model, prefix, verbose, tune) 1451 | Output 1452 | test_config_30_dummies_recipe <- 1453 | recipe(formula = body_mass_g ~ ., data = penguins) 1454 | 1455 | test_config_30_dummies_spec <- 1456 | decision_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 1457 | set_mode("regression") |> 1458 | set_engine("rpart") 1459 | 1460 | test_config_30_dummies_workflow <- 1461 | workflow() |> 1462 | add_recipe(test_config_30_dummies_recipe) |> 1463 | add_model(test_config_30_dummies_spec) 1464 | 1465 | set.seed(27246) 1466 | test_config_30_dummies_tune <- 1467 | tune_grid(test_config_30_dummies_workflow, resamples = stop("add your rsample object"), 1468 | grid = stop("add number of candidate points")) 1469 | 1470 | 1471 | --- 1472 | 1473 | Code 1474 | no_dummy_template(model, prefix, verbose, tune) 1475 | Output 1476 | test_config_30_no_dummies_recipe <- 1477 | recipe(formula = species ~ ., data = penguins) 1478 | 1479 | test_config_30_no_dummies_spec <- 1480 | decision_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 1481 | set_mode("classification") |> 1482 | set_engine("rpart") 1483 | 1484 | test_config_30_no_dummies_workflow <- 1485 | workflow() |> 1486 | add_recipe(test_config_30_no_dummies_recipe) |> 1487 | add_model(test_config_30_no_dummies_spec) 1488 | 1489 | set.seed(27246) 1490 | test_config_30_no_dummies_tune <- 1491 | tune_grid(test_config_30_no_dummies_workflow, resamples = stop("add your rsample object"), 1492 | grid = stop("add number of candidate points")) 1493 | 1494 | 1495 | --- 1496 | 1497 | Code 1498 | dummy_template(model, prefix, verbose, tune) 1499 | Output 1500 | test_config_31_dummies_recipe <- 1501 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1502 | step_novel(all_nominal_predictors()) |> 1503 | ## This model requires the predictors to be numeric. The most common 1504 | ## method to convert qualitative predictors to numeric is to create 1505 | ## binary indicator variables (aka dummy variables) from these 1506 | ## predictors. However, for this model, binary indicator variables can be 1507 | ## made for each of the levels of the factors (known as 'one-hot 1508 | ## encoding'). 1509 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 1510 | step_zv(all_predictors()) 1511 | 1512 | test_config_31_dummies_spec <- 1513 | boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 1514 | loss_reduction = tune(), sample_size = tune()) |> 1515 | set_mode("regression") |> 1516 | set_engine("xgboost") 1517 | 1518 | test_config_31_dummies_workflow <- 1519 | workflow() |> 1520 | add_recipe(test_config_31_dummies_recipe) |> 1521 | add_model(test_config_31_dummies_spec) 1522 | 1523 | set.seed(27246) 1524 | test_config_31_dummies_tune <- 1525 | tune_grid(test_config_31_dummies_workflow, resamples = stop("add your rsample object"), 1526 | grid = stop("add number of candidate points")) 1527 | 1528 | 1529 | --- 1530 | 1531 | Code 1532 | no_dummy_template(model, prefix, verbose, tune) 1533 | Output 1534 | test_config_31_no_dummies_recipe <- 1535 | recipe(formula = species ~ ., data = penguins) |> 1536 | step_novel(all_nominal_predictors()) |> 1537 | ## This model requires the predictors to be numeric. The most common 1538 | ## method to convert qualitative predictors to numeric is to create 1539 | ## binary indicator variables (aka dummy variables) from these 1540 | ## predictors. However, for this model, binary indicator variables can be 1541 | ## made for each of the levels of the factors (known as 'one-hot 1542 | ## encoding'). 1543 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 1544 | step_zv(all_predictors()) 1545 | 1546 | test_config_31_no_dummies_spec <- 1547 | boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 1548 | loss_reduction = tune(), sample_size = tune()) |> 1549 | set_mode("classification") |> 1550 | set_engine("xgboost") 1551 | 1552 | test_config_31_no_dummies_workflow <- 1553 | workflow() |> 1554 | add_recipe(test_config_31_no_dummies_recipe) |> 1555 | add_model(test_config_31_no_dummies_spec) 1556 | 1557 | set.seed(27246) 1558 | test_config_31_no_dummies_tune <- 1559 | tune_grid(test_config_31_no_dummies_workflow, resamples = stop("add your rsample object"), 1560 | grid = stop("add number of candidate points")) 1561 | 1562 | 1563 | --- 1564 | 1565 | Code 1566 | dummy_template(model, prefix, verbose, tune) 1567 | Output 1568 | library(rules) 1569 | 1570 | test_config_32_dummies_recipe <- 1571 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1572 | step_novel(all_nominal_predictors()) |> 1573 | ## This model requires the predictors to be numeric. The most common 1574 | ## method to convert qualitative predictors to numeric is to create 1575 | ## binary indicator variables (aka dummy variables) from these 1576 | ## predictors. 1577 | step_dummy(all_nominal_predictors()) |> 1578 | step_zv(all_predictors()) |> 1579 | step_normalize(all_numeric_predictors()) 1580 | 1581 | test_config_32_dummies_spec <- 1582 | rule_fit(mtry = tune(), trees = tune(), min_n = tune(), tree_depth = tune(), 1583 | learn_rate = tune(), loss_reduction = tune(), sample_size = tune(), penalty = tune()) |> 1584 | set_mode("regression") |> 1585 | set_engine("xrf") 1586 | 1587 | test_config_32_dummies_workflow <- 1588 | workflow() |> 1589 | add_recipe(test_config_32_dummies_recipe) |> 1590 | add_model(test_config_32_dummies_spec) 1591 | 1592 | set.seed(27246) 1593 | test_config_32_dummies_tune <- 1594 | tune_grid(test_config_32_dummies_workflow, resamples = stop("add your rsample object"), 1595 | grid = stop("add number of candidate points")) 1596 | 1597 | 1598 | --- 1599 | 1600 | Code 1601 | no_dummy_template(model, prefix, verbose, tune) 1602 | Output 1603 | library(rules) 1604 | 1605 | test_config_32_no_dummies_recipe <- 1606 | recipe(formula = species ~ ., data = penguins) |> 1607 | step_novel(all_nominal_predictors()) |> 1608 | ## This model requires the predictors to be numeric. The most common 1609 | ## method to convert qualitative predictors to numeric is to create 1610 | ## binary indicator variables (aka dummy variables) from these 1611 | ## predictors. 1612 | step_dummy(all_nominal_predictors()) |> 1613 | step_zv(all_predictors()) |> 1614 | step_normalize(all_numeric_predictors()) 1615 | 1616 | test_config_32_no_dummies_spec <- 1617 | rule_fit(mtry = tune(), trees = tune(), min_n = tune(), tree_depth = tune(), 1618 | learn_rate = tune(), loss_reduction = tune(), sample_size = tune(), penalty = tune()) |> 1619 | set_mode("classification") |> 1620 | set_engine("xrf") 1621 | 1622 | test_config_32_no_dummies_workflow <- 1623 | workflow() |> 1624 | add_recipe(test_config_32_no_dummies_recipe) |> 1625 | add_model(test_config_32_no_dummies_spec) 1626 | 1627 | set.seed(27246) 1628 | test_config_32_no_dummies_tune <- 1629 | tune_grid(test_config_32_no_dummies_workflow, resamples = stop("add your rsample object"), 1630 | grid = stop("add number of candidate points")) 1631 | 1632 | 1633 | --- 1634 | 1635 | Code 1636 | dummy_template(model, prefix, verbose, tune) 1637 | Output 1638 | library(baguette) 1639 | 1640 | test_config_33_dummies_recipe <- 1641 | recipe(formula = body_mass_g ~ ., data = penguins) 1642 | 1643 | test_config_33_dummies_spec <- 1644 | bag_tree() |> 1645 | set_mode("regression") |> 1646 | set_engine("rpart") 1647 | 1648 | test_config_33_dummies_workflow <- 1649 | workflow() |> 1650 | add_recipe(test_config_33_dummies_recipe) |> 1651 | add_model(test_config_33_dummies_spec) 1652 | 1653 | 1654 | --- 1655 | 1656 | Code 1657 | no_dummy_template(model, prefix, verbose, tune) 1658 | Output 1659 | library(baguette) 1660 | 1661 | test_config_33_no_dummies_recipe <- 1662 | recipe(formula = species ~ ., data = penguins) 1663 | 1664 | test_config_33_no_dummies_spec <- 1665 | bag_tree() |> 1666 | set_mode("classification") |> 1667 | set_engine("rpart") 1668 | 1669 | test_config_33_no_dummies_workflow <- 1670 | workflow() |> 1671 | add_recipe(test_config_33_no_dummies_recipe) |> 1672 | add_model(test_config_33_no_dummies_spec) 1673 | 1674 | 1675 | --- 1676 | 1677 | Code 1678 | no_dummy_template(model, prefix, verbose, tune) 1679 | Output 1680 | test_config_34_no_dummies_recipe <- 1681 | recipe(formula = species ~ ., data = penguins) 1682 | 1683 | test_config_34_no_dummies_spec <- 1684 | boost_tree() |> 1685 | set_mode("classification") |> 1686 | set_engine("C5.0") 1687 | 1688 | test_config_34_no_dummies_workflow <- 1689 | workflow() |> 1690 | add_recipe(test_config_34_no_dummies_recipe) |> 1691 | add_model(test_config_34_no_dummies_spec) 1692 | 1693 | 1694 | --- 1695 | 1696 | Code 1697 | dummy_template(model, prefix, verbose, tune) 1698 | Output 1699 | library(rules) 1700 | 1701 | test_config_35_dummies_recipe <- 1702 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1703 | step_zv(all_predictors()) 1704 | 1705 | test_config_35_dummies_spec <- 1706 | cubist_rules() |> 1707 | set_engine("Cubist") 1708 | 1709 | test_config_35_dummies_workflow <- 1710 | workflow() |> 1711 | add_recipe(test_config_35_dummies_recipe) |> 1712 | add_model(test_config_35_dummies_spec) 1713 | 1714 | 1715 | --- 1716 | 1717 | Code 1718 | dummy_template(model, prefix, verbose, tune) 1719 | Output 1720 | test_config_36_dummies_recipe <- 1721 | recipe(formula = body_mass_g ~ ., data = penguins) 1722 | 1723 | test_config_36_dummies_spec <- 1724 | bart() |> 1725 | set_mode("regression") |> 1726 | set_engine("dbarts") 1727 | 1728 | test_config_36_dummies_workflow <- 1729 | workflow() |> 1730 | add_recipe(test_config_36_dummies_recipe) |> 1731 | add_model(test_config_36_dummies_spec) 1732 | 1733 | 1734 | --- 1735 | 1736 | Code 1737 | no_dummy_template(model, prefix, verbose, tune) 1738 | Output 1739 | test_config_36_no_dummies_recipe <- 1740 | recipe(formula = species ~ ., data = penguins) 1741 | 1742 | test_config_36_no_dummies_spec <- 1743 | bart() |> 1744 | set_mode("classification") |> 1745 | set_engine("dbarts") 1746 | 1747 | test_config_36_no_dummies_workflow <- 1748 | workflow() |> 1749 | add_recipe(test_config_36_no_dummies_recipe) |> 1750 | add_model(test_config_36_no_dummies_spec) 1751 | 1752 | 1753 | --- 1754 | 1755 | Code 1756 | dummy_template(model, prefix, verbose, tune) 1757 | Output 1758 | test_config_37_dummies_recipe <- 1759 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1760 | step_novel(all_nominal_predictors()) |> 1761 | step_dummy(all_nominal_predictors()) |> 1762 | step_zv(all_predictors()) 1763 | 1764 | test_config_37_dummies_spec <- 1765 | mars() |> 1766 | set_mode("regression") |> 1767 | set_engine("earth") 1768 | 1769 | test_config_37_dummies_workflow <- 1770 | workflow() |> 1771 | add_recipe(test_config_37_dummies_recipe) |> 1772 | add_model(test_config_37_dummies_spec) 1773 | 1774 | 1775 | --- 1776 | 1777 | Code 1778 | no_dummy_template(model, prefix, verbose, tune) 1779 | Output 1780 | test_config_37_no_dummies_recipe <- 1781 | recipe(formula = species ~ ., data = penguins) |> 1782 | step_novel(all_nominal_predictors()) |> 1783 | step_dummy(all_nominal_predictors()) |> 1784 | step_zv(all_predictors()) 1785 | 1786 | test_config_37_no_dummies_spec <- 1787 | mars() |> 1788 | set_mode("classification") |> 1789 | set_engine("earth") 1790 | 1791 | test_config_37_no_dummies_workflow <- 1792 | workflow() |> 1793 | add_recipe(test_config_37_no_dummies_recipe) |> 1794 | add_model(test_config_37_no_dummies_spec) 1795 | 1796 | 1797 | --- 1798 | 1799 | Code 1800 | dummy_template(model, prefix, verbose, tune) 1801 | Output 1802 | test_config_38_dummies_recipe <- 1803 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1804 | step_novel(all_nominal_predictors()) |> 1805 | step_dummy(all_nominal_predictors()) |> 1806 | step_zv(all_predictors()) |> 1807 | step_normalize(all_numeric_predictors()) 1808 | 1809 | test_config_38_dummies_spec <- 1810 | linear_reg() |> 1811 | set_mode("regression") |> 1812 | set_engine("glmnet") 1813 | 1814 | test_config_38_dummies_workflow <- 1815 | workflow() |> 1816 | add_recipe(test_config_38_dummies_recipe) |> 1817 | add_model(test_config_38_dummies_spec) 1818 | 1819 | 1820 | --- 1821 | 1822 | Code 1823 | no_dummy_template(model, prefix, verbose, tune) 1824 | Output 1825 | test_config_38_no_dummies_recipe <- 1826 | recipe(formula = species ~ ., data = penguins) |> 1827 | step_novel(all_nominal_predictors()) |> 1828 | step_dummy(all_nominal_predictors()) |> 1829 | step_zv(all_predictors()) |> 1830 | step_normalize(all_numeric_predictors()) 1831 | 1832 | test_config_38_no_dummies_spec <- 1833 | multinom_reg() |> 1834 | set_mode("classification") |> 1835 | set_engine("glmnet") 1836 | 1837 | test_config_38_no_dummies_workflow <- 1838 | workflow() |> 1839 | add_recipe(test_config_38_no_dummies_recipe) |> 1840 | add_model(test_config_38_no_dummies_spec) 1841 | 1842 | 1843 | --- 1844 | 1845 | Code 1846 | dummy_template(model, prefix, verbose, tune) 1847 | Output 1848 | test_config_39_dummies_recipe <- 1849 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1850 | step_zv(all_predictors()) |> 1851 | step_normalize(all_numeric_predictors()) 1852 | 1853 | test_config_39_dummies_spec <- 1854 | svm_poly() |> 1855 | set_mode("regression") 1856 | 1857 | test_config_39_dummies_workflow <- 1858 | workflow() |> 1859 | add_recipe(test_config_39_dummies_recipe) |> 1860 | add_model(test_config_39_dummies_spec) 1861 | 1862 | 1863 | --- 1864 | 1865 | Code 1866 | no_dummy_template(model, prefix, verbose, tune) 1867 | Output 1868 | test_config_39_no_dummies_recipe <- 1869 | recipe(formula = species ~ ., data = penguins) |> 1870 | step_zv(all_predictors()) |> 1871 | step_normalize(all_numeric_predictors()) 1872 | 1873 | test_config_39_no_dummies_spec <- 1874 | svm_poly() |> 1875 | set_mode("classification") 1876 | 1877 | test_config_39_no_dummies_workflow <- 1878 | workflow() |> 1879 | add_recipe(test_config_39_no_dummies_recipe) |> 1880 | add_model(test_config_39_no_dummies_spec) 1881 | 1882 | 1883 | --- 1884 | 1885 | Code 1886 | dummy_template(model, prefix, verbose, tune) 1887 | Output 1888 | test_config_40_dummies_recipe <- 1889 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1890 | step_zv(all_predictors()) |> 1891 | step_normalize(all_numeric_predictors()) 1892 | 1893 | test_config_40_dummies_spec <- 1894 | svm_rbf() |> 1895 | set_mode("regression") 1896 | 1897 | test_config_40_dummies_workflow <- 1898 | workflow() |> 1899 | add_recipe(test_config_40_dummies_recipe) |> 1900 | add_model(test_config_40_dummies_spec) 1901 | 1902 | 1903 | --- 1904 | 1905 | Code 1906 | no_dummy_template(model, prefix, verbose, tune) 1907 | Output 1908 | test_config_40_no_dummies_recipe <- 1909 | recipe(formula = species ~ ., data = penguins) |> 1910 | step_zv(all_predictors()) |> 1911 | step_normalize(all_numeric_predictors()) 1912 | 1913 | test_config_40_no_dummies_spec <- 1914 | svm_rbf() |> 1915 | set_mode("classification") 1916 | 1917 | test_config_40_no_dummies_workflow <- 1918 | workflow() |> 1919 | add_recipe(test_config_40_no_dummies_recipe) |> 1920 | add_model(test_config_40_no_dummies_spec) 1921 | 1922 | 1923 | --- 1924 | 1925 | Code 1926 | dummy_template(model, prefix, verbose, tune) 1927 | Output 1928 | test_config_41_dummies_recipe <- 1929 | recipe(formula = body_mass_g ~ ., data = penguins) |> 1930 | step_novel(all_nominal_predictors()) |> 1931 | step_dummy(all_nominal_predictors()) |> 1932 | step_zv(all_predictors()) |> 1933 | step_normalize(all_numeric_predictors()) 1934 | 1935 | test_config_41_dummies_spec <- 1936 | nearest_neighbor() |> 1937 | set_mode("regression") |> 1938 | set_engine("kknn") 1939 | 1940 | test_config_41_dummies_workflow <- 1941 | workflow() |> 1942 | add_recipe(test_config_41_dummies_recipe) |> 1943 | add_model(test_config_41_dummies_spec) 1944 | 1945 | 1946 | --- 1947 | 1948 | Code 1949 | no_dummy_template(model, prefix, verbose, tune) 1950 | Output 1951 | test_config_41_no_dummies_recipe <- 1952 | recipe(formula = species ~ ., data = penguins) |> 1953 | step_novel(all_nominal_predictors()) |> 1954 | step_dummy(all_nominal_predictors()) |> 1955 | step_zv(all_predictors()) |> 1956 | step_normalize(all_numeric_predictors()) 1957 | 1958 | test_config_41_no_dummies_spec <- 1959 | nearest_neighbor() |> 1960 | set_mode("classification") |> 1961 | set_engine("kknn") 1962 | 1963 | test_config_41_no_dummies_workflow <- 1964 | workflow() |> 1965 | add_recipe(test_config_41_no_dummies_recipe) |> 1966 | add_model(test_config_41_no_dummies_spec) 1967 | 1968 | 1969 | --- 1970 | 1971 | Code 1972 | dummy_template(model, prefix, verbose, tune) 1973 | Output 1974 | test_config_42_dummies_recipe <- 1975 | recipe(formula = body_mass_g ~ ., data = penguins) 1976 | 1977 | test_config_42_dummies_spec <- 1978 | gen_additive_mod() |> 1979 | set_mode("regression") |> 1980 | set_engine("mgcv") 1981 | 1982 | test_config_42_dummies_workflow <- 1983 | workflow() |> 1984 | add_recipe(test_config_42_dummies_recipe) |> 1985 | add_model(test_config_42_dummies_spec, formula = stop("add your gam formula")) 1986 | 1987 | 1988 | --- 1989 | 1990 | Code 1991 | no_dummy_template(model, prefix, verbose, tune) 1992 | Output 1993 | test_config_42_no_dummies_recipe <- 1994 | recipe(formula = species ~ ., data = penguins) 1995 | 1996 | test_config_42_no_dummies_spec <- 1997 | gen_additive_mod() |> 1998 | set_mode("classification") |> 1999 | set_engine("mgcv") 2000 | 2001 | test_config_42_no_dummies_workflow <- 2002 | workflow() |> 2003 | add_recipe(test_config_42_no_dummies_recipe) |> 2004 | add_model(test_config_42_no_dummies_spec, formula = stop("add your gam formula")) 2005 | 2006 | 2007 | --- 2008 | 2009 | Code 2010 | dummy_template(model, prefix, verbose, tune) 2011 | Output 2012 | library(plsmod) 2013 | 2014 | test_config_43_dummies_recipe <- 2015 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2016 | step_novel(all_nominal_predictors()) |> 2017 | step_dummy(all_nominal_predictors()) |> 2018 | step_zv(all_predictors()) |> 2019 | step_normalize(all_numeric_predictors()) 2020 | 2021 | test_config_43_dummies_spec <- 2022 | pls() |> 2023 | set_mode("regression") |> 2024 | set_engine("mixOmics") 2025 | 2026 | test_config_43_dummies_workflow <- 2027 | workflow() |> 2028 | add_recipe(test_config_43_dummies_recipe) |> 2029 | add_model(test_config_43_dummies_spec) 2030 | 2031 | 2032 | --- 2033 | 2034 | Code 2035 | no_dummy_template(model, prefix, verbose, tune) 2036 | Output 2037 | library(plsmod) 2038 | 2039 | test_config_43_no_dummies_recipe <- 2040 | recipe(formula = species ~ ., data = penguins) |> 2041 | step_novel(all_nominal_predictors()) |> 2042 | step_dummy(all_nominal_predictors()) |> 2043 | step_zv(all_predictors()) |> 2044 | step_normalize(all_numeric_predictors()) 2045 | 2046 | test_config_43_no_dummies_spec <- 2047 | pls() |> 2048 | set_mode("classification") |> 2049 | set_engine("mixOmics") 2050 | 2051 | test_config_43_no_dummies_workflow <- 2052 | workflow() |> 2053 | add_recipe(test_config_43_no_dummies_recipe) |> 2054 | add_model(test_config_43_no_dummies_spec) 2055 | 2056 | 2057 | --- 2058 | 2059 | Code 2060 | dummy_template(model, prefix, verbose, tune) 2061 | Output 2062 | test_config_44_dummies_recipe <- 2063 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2064 | step_novel(all_nominal_predictors()) |> 2065 | step_dummy(all_nominal_predictors()) |> 2066 | step_zv(all_predictors()) |> 2067 | step_normalize(all_numeric_predictors()) 2068 | 2069 | test_config_44_dummies_spec <- 2070 | mlp() |> 2071 | set_mode("regression") 2072 | 2073 | test_config_44_dummies_workflow <- 2074 | workflow() |> 2075 | add_recipe(test_config_44_dummies_recipe) |> 2076 | add_model(test_config_44_dummies_spec) 2077 | 2078 | 2079 | --- 2080 | 2081 | Code 2082 | no_dummy_template(model, prefix, verbose, tune) 2083 | Output 2084 | test_config_44_no_dummies_recipe <- 2085 | recipe(formula = species ~ ., data = penguins) |> 2086 | step_novel(all_nominal_predictors()) |> 2087 | step_dummy(all_nominal_predictors()) |> 2088 | step_zv(all_predictors()) |> 2089 | step_normalize(all_numeric_predictors()) 2090 | 2091 | test_config_44_no_dummies_spec <- 2092 | mlp() |> 2093 | set_mode("classification") 2094 | 2095 | test_config_44_no_dummies_workflow <- 2096 | workflow() |> 2097 | add_recipe(test_config_44_no_dummies_recipe) |> 2098 | add_model(test_config_44_no_dummies_spec) 2099 | 2100 | 2101 | --- 2102 | 2103 | Code 2104 | dummy_template(model, prefix, verbose, tune) 2105 | Output 2106 | test_config_45_dummies_recipe <- 2107 | recipe(formula = body_mass_g ~ ., data = penguins) 2108 | 2109 | test_config_45_dummies_spec <- 2110 | rand_forest(trees = 1000) |> 2111 | set_mode("regression") |> 2112 | set_engine("ranger") 2113 | 2114 | test_config_45_dummies_workflow <- 2115 | workflow() |> 2116 | add_recipe(test_config_45_dummies_recipe) |> 2117 | add_model(test_config_45_dummies_spec) 2118 | 2119 | 2120 | --- 2121 | 2122 | Code 2123 | no_dummy_template(model, prefix, verbose, tune) 2124 | Output 2125 | test_config_45_no_dummies_recipe <- 2126 | recipe(formula = species ~ ., data = penguins) 2127 | 2128 | test_config_45_no_dummies_spec <- 2129 | rand_forest(trees = 1000) |> 2130 | set_mode("classification") |> 2131 | set_engine("ranger") 2132 | 2133 | test_config_45_no_dummies_workflow <- 2134 | workflow() |> 2135 | add_recipe(test_config_45_no_dummies_recipe) |> 2136 | add_model(test_config_45_no_dummies_spec) 2137 | 2138 | 2139 | --- 2140 | 2141 | Code 2142 | dummy_template(model, prefix, verbose, tune) 2143 | Output 2144 | test_config_46_dummies_recipe <- 2145 | recipe(formula = body_mass_g ~ ., data = penguins) 2146 | 2147 | test_config_46_dummies_spec <- 2148 | decision_tree() |> 2149 | set_mode("regression") |> 2150 | set_engine("rpart") 2151 | 2152 | test_config_46_dummies_workflow <- 2153 | workflow() |> 2154 | add_recipe(test_config_46_dummies_recipe) |> 2155 | add_model(test_config_46_dummies_spec) 2156 | 2157 | 2158 | --- 2159 | 2160 | Code 2161 | no_dummy_template(model, prefix, verbose, tune) 2162 | Output 2163 | test_config_46_no_dummies_recipe <- 2164 | recipe(formula = species ~ ., data = penguins) 2165 | 2166 | test_config_46_no_dummies_spec <- 2167 | decision_tree() |> 2168 | set_mode("classification") |> 2169 | set_engine("rpart") 2170 | 2171 | test_config_46_no_dummies_workflow <- 2172 | workflow() |> 2173 | add_recipe(test_config_46_no_dummies_recipe) |> 2174 | add_model(test_config_46_no_dummies_spec) 2175 | 2176 | 2177 | --- 2178 | 2179 | Code 2180 | dummy_template(model, prefix, verbose, tune) 2181 | Output 2182 | test_config_47_dummies_recipe <- 2183 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2184 | step_novel(all_nominal_predictors()) |> 2185 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 2186 | step_zv(all_predictors()) 2187 | 2188 | test_config_47_dummies_spec <- 2189 | boost_tree() |> 2190 | set_mode("regression") |> 2191 | set_engine("xgboost") 2192 | 2193 | test_config_47_dummies_workflow <- 2194 | workflow() |> 2195 | add_recipe(test_config_47_dummies_recipe) |> 2196 | add_model(test_config_47_dummies_spec) 2197 | 2198 | 2199 | --- 2200 | 2201 | Code 2202 | no_dummy_template(model, prefix, verbose, tune) 2203 | Output 2204 | test_config_47_no_dummies_recipe <- 2205 | recipe(formula = species ~ ., data = penguins) |> 2206 | step_novel(all_nominal_predictors()) |> 2207 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 2208 | step_zv(all_predictors()) 2209 | 2210 | test_config_47_no_dummies_spec <- 2211 | boost_tree() |> 2212 | set_mode("classification") |> 2213 | set_engine("xgboost") 2214 | 2215 | test_config_47_no_dummies_workflow <- 2216 | workflow() |> 2217 | add_recipe(test_config_47_no_dummies_recipe) |> 2218 | add_model(test_config_47_no_dummies_spec) 2219 | 2220 | 2221 | --- 2222 | 2223 | Code 2224 | dummy_template(model, prefix, verbose, tune) 2225 | Output 2226 | library(rules) 2227 | 2228 | test_config_48_dummies_recipe <- 2229 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2230 | step_novel(all_nominal_predictors()) |> 2231 | step_dummy(all_nominal_predictors()) |> 2232 | step_zv(all_predictors()) |> 2233 | step_normalize(all_numeric_predictors()) 2234 | 2235 | test_config_48_dummies_spec <- 2236 | rule_fit() |> 2237 | set_mode("regression") |> 2238 | set_engine("xrf") 2239 | 2240 | test_config_48_dummies_workflow <- 2241 | workflow() |> 2242 | add_recipe(test_config_48_dummies_recipe) |> 2243 | add_model(test_config_48_dummies_spec) 2244 | 2245 | 2246 | --- 2247 | 2248 | Code 2249 | no_dummy_template(model, prefix, verbose, tune) 2250 | Output 2251 | library(rules) 2252 | 2253 | test_config_48_no_dummies_recipe <- 2254 | recipe(formula = species ~ ., data = penguins) |> 2255 | step_novel(all_nominal_predictors()) |> 2256 | step_dummy(all_nominal_predictors()) |> 2257 | step_zv(all_predictors()) |> 2258 | step_normalize(all_numeric_predictors()) 2259 | 2260 | test_config_48_no_dummies_spec <- 2261 | rule_fit() |> 2262 | set_mode("classification") |> 2263 | set_engine("xrf") 2264 | 2265 | test_config_48_no_dummies_workflow <- 2266 | workflow() |> 2267 | add_recipe(test_config_48_no_dummies_recipe) |> 2268 | add_model(test_config_48_no_dummies_spec) 2269 | 2270 | 2271 | --- 2272 | 2273 | Code 2274 | dummy_template(model, prefix, verbose, tune) 2275 | Output 2276 | library(baguette) 2277 | 2278 | test_config_49_dummies_recipe <- 2279 | recipe(formula = body_mass_g ~ ., data = penguins) 2280 | 2281 | test_config_49_dummies_spec <- 2282 | bag_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 2283 | set_mode("regression") |> 2284 | set_engine("rpart") 2285 | 2286 | test_config_49_dummies_workflow <- 2287 | workflow() |> 2288 | add_recipe(test_config_49_dummies_recipe) |> 2289 | add_model(test_config_49_dummies_spec) 2290 | 2291 | set.seed(27246) 2292 | test_config_49_dummies_tune <- 2293 | tune_grid(test_config_49_dummies_workflow, resamples = stop("add your rsample object"), 2294 | grid = stop("add number of candidate points")) 2295 | 2296 | 2297 | --- 2298 | 2299 | Code 2300 | no_dummy_template(model, prefix, verbose, tune) 2301 | Output 2302 | library(baguette) 2303 | 2304 | test_config_49_no_dummies_recipe <- 2305 | recipe(formula = species ~ ., data = penguins) 2306 | 2307 | test_config_49_no_dummies_spec <- 2308 | bag_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 2309 | set_mode("classification") |> 2310 | set_engine("rpart") 2311 | 2312 | test_config_49_no_dummies_workflow <- 2313 | workflow() |> 2314 | add_recipe(test_config_49_no_dummies_recipe) |> 2315 | add_model(test_config_49_no_dummies_spec) 2316 | 2317 | set.seed(27246) 2318 | test_config_49_no_dummies_tune <- 2319 | tune_grid(test_config_49_no_dummies_workflow, resamples = stop("add your rsample object"), 2320 | grid = stop("add number of candidate points")) 2321 | 2322 | 2323 | --- 2324 | 2325 | Code 2326 | no_dummy_template(model, prefix, verbose, tune) 2327 | Output 2328 | test_config_50_no_dummies_recipe <- 2329 | recipe(formula = species ~ ., data = penguins) 2330 | 2331 | test_config_50_no_dummies_spec <- 2332 | boost_tree(trees = tune(), min_n = tune()) |> 2333 | set_mode("classification") |> 2334 | set_engine("C5.0") 2335 | 2336 | test_config_50_no_dummies_workflow <- 2337 | workflow() |> 2338 | add_recipe(test_config_50_no_dummies_recipe) |> 2339 | add_model(test_config_50_no_dummies_spec) 2340 | 2341 | set.seed(27246) 2342 | test_config_50_no_dummies_tune <- 2343 | tune_grid(test_config_50_no_dummies_workflow, resamples = stop("add your rsample object"), 2344 | grid = stop("add number of candidate points")) 2345 | 2346 | 2347 | --- 2348 | 2349 | Code 2350 | dummy_template(model, prefix, verbose, tune) 2351 | Output 2352 | library(rules) 2353 | 2354 | test_config_51_dummies_recipe <- 2355 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2356 | step_zv(all_predictors()) 2357 | 2358 | test_config_51_dummies_spec <- 2359 | cubist_rules(committees = tune(), neighbors = tune()) |> 2360 | set_engine("Cubist") 2361 | 2362 | test_config_51_dummies_workflow <- 2363 | workflow() |> 2364 | add_recipe(test_config_51_dummies_recipe) |> 2365 | add_model(test_config_51_dummies_spec) 2366 | 2367 | test_config_51_dummies_grid <- tidyr::crossing(committees = c(1:9, (1:5) * 2368 | 10), neighbors = c(0, 3, 6, 9)) 2369 | 2370 | test_config_51_dummies_tune <- 2371 | tune_grid(test_config_51_dummies_workflow, resamples = stop("add your rsample object"), 2372 | grid = test_config_51_dummies_grid) 2373 | 2374 | 2375 | --- 2376 | 2377 | Code 2378 | dummy_template(model, prefix, verbose, tune) 2379 | Output 2380 | test_config_52_dummies_recipe <- 2381 | recipe(formula = body_mass_g ~ ., data = penguins) 2382 | 2383 | test_config_52_dummies_spec <- 2384 | bart(trees = tune(), prior_terminal_node_coef = tune(), prior_terminal_node_expo = tune()) |> 2385 | set_mode("regression") |> 2386 | set_engine("dbarts") 2387 | 2388 | test_config_52_dummies_workflow <- 2389 | workflow() |> 2390 | add_recipe(test_config_52_dummies_recipe) |> 2391 | add_model(test_config_52_dummies_spec) 2392 | 2393 | set.seed(27246) 2394 | test_config_52_dummies_tune <- 2395 | tune_grid(test_config_52_dummies_workflow, resamples = stop("add your rsample object"), 2396 | grid = stop("add number of candidate points")) 2397 | 2398 | 2399 | --- 2400 | 2401 | Code 2402 | no_dummy_template(model, prefix, verbose, tune) 2403 | Output 2404 | test_config_52_no_dummies_recipe <- 2405 | recipe(formula = species ~ ., data = penguins) 2406 | 2407 | test_config_52_no_dummies_spec <- 2408 | bart(trees = tune(), prior_terminal_node_coef = tune(), prior_terminal_node_expo = tune()) |> 2409 | set_mode("classification") |> 2410 | set_engine("dbarts") 2411 | 2412 | test_config_52_no_dummies_workflow <- 2413 | workflow() |> 2414 | add_recipe(test_config_52_no_dummies_recipe) |> 2415 | add_model(test_config_52_no_dummies_spec) 2416 | 2417 | set.seed(27246) 2418 | test_config_52_no_dummies_tune <- 2419 | tune_grid(test_config_52_no_dummies_workflow, resamples = stop("add your rsample object"), 2420 | grid = stop("add number of candidate points")) 2421 | 2422 | 2423 | --- 2424 | 2425 | Code 2426 | dummy_template(model, prefix, verbose, tune) 2427 | Output 2428 | test_config_53_dummies_recipe <- 2429 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2430 | step_novel(all_nominal_predictors()) |> 2431 | step_dummy(all_nominal_predictors()) |> 2432 | step_zv(all_predictors()) 2433 | 2434 | test_config_53_dummies_spec <- 2435 | mars(num_terms = tune(), prod_degree = tune(), prune_method = "none") |> 2436 | set_mode("regression") |> 2437 | set_engine("earth") 2438 | 2439 | test_config_53_dummies_workflow <- 2440 | workflow() |> 2441 | add_recipe(test_config_53_dummies_recipe) |> 2442 | add_model(test_config_53_dummies_spec) 2443 | 2444 | test_config_53_dummies_grid <- tidyr::crossing(num_terms = 2 * (1:6), prod_degree = 1:2) 2445 | 2446 | test_config_53_dummies_tune <- 2447 | tune_grid(test_config_53_dummies_workflow, resamples = stop("add your rsample object"), 2448 | grid = test_config_53_dummies_grid) 2449 | 2450 | 2451 | --- 2452 | 2453 | Code 2454 | no_dummy_template(model, prefix, verbose, tune) 2455 | Output 2456 | test_config_53_no_dummies_recipe <- 2457 | recipe(formula = species ~ ., data = penguins) |> 2458 | step_novel(all_nominal_predictors()) |> 2459 | step_dummy(all_nominal_predictors()) |> 2460 | step_zv(all_predictors()) 2461 | 2462 | test_config_53_no_dummies_spec <- 2463 | mars(num_terms = tune(), prod_degree = tune(), prune_method = "none") |> 2464 | set_mode("classification") |> 2465 | set_engine("earth") 2466 | 2467 | test_config_53_no_dummies_workflow <- 2468 | workflow() |> 2469 | add_recipe(test_config_53_no_dummies_recipe) |> 2470 | add_model(test_config_53_no_dummies_spec) 2471 | 2472 | test_config_53_no_dummies_grid <- tidyr::crossing(num_terms = 2 * (1:6), prod_degree = 1:2) 2473 | 2474 | test_config_53_no_dummies_tune <- 2475 | tune_grid(test_config_53_no_dummies_workflow, resamples = stop("add your rsample object"), 2476 | grid = test_config_53_no_dummies_grid) 2477 | 2478 | 2479 | --- 2480 | 2481 | Code 2482 | dummy_template(model, prefix, verbose, tune) 2483 | Output 2484 | test_config_54_dummies_recipe <- 2485 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2486 | step_novel(all_nominal_predictors()) |> 2487 | step_dummy(all_nominal_predictors()) |> 2488 | step_zv(all_predictors()) |> 2489 | step_normalize(all_numeric_predictors()) 2490 | 2491 | test_config_54_dummies_spec <- 2492 | linear_reg(penalty = tune(), mixture = tune()) |> 2493 | set_mode("regression") |> 2494 | set_engine("glmnet") 2495 | 2496 | test_config_54_dummies_workflow <- 2497 | workflow() |> 2498 | add_recipe(test_config_54_dummies_recipe) |> 2499 | add_model(test_config_54_dummies_spec) 2500 | 2501 | test_config_54_dummies_grid <- tidyr::crossing(penalty = 10^seq(-6, -1, length.out = 20), 2502 | mixture = c(0.05, 0.2, 0.4, 0.6, 0.8, 1)) 2503 | 2504 | test_config_54_dummies_tune <- 2505 | tune_grid(test_config_54_dummies_workflow, resamples = stop("add your rsample object"), 2506 | grid = test_config_54_dummies_grid) 2507 | 2508 | 2509 | --- 2510 | 2511 | Code 2512 | no_dummy_template(model, prefix, verbose, tune) 2513 | Output 2514 | test_config_54_no_dummies_recipe <- 2515 | recipe(formula = species ~ ., data = penguins) |> 2516 | step_novel(all_nominal_predictors()) |> 2517 | step_dummy(all_nominal_predictors()) |> 2518 | step_zv(all_predictors()) |> 2519 | step_normalize(all_numeric_predictors()) 2520 | 2521 | test_config_54_no_dummies_spec <- 2522 | multinom_reg(penalty = tune(), mixture = tune()) |> 2523 | set_mode("classification") |> 2524 | set_engine("glmnet") 2525 | 2526 | test_config_54_no_dummies_workflow <- 2527 | workflow() |> 2528 | add_recipe(test_config_54_no_dummies_recipe) |> 2529 | add_model(test_config_54_no_dummies_spec) 2530 | 2531 | test_config_54_no_dummies_grid <- tidyr::crossing(penalty = 10^seq(-6, -1, 2532 | length.out = 20), mixture = c(0.05, 0.2, 0.4, 0.6, 0.8, 1)) 2533 | 2534 | test_config_54_no_dummies_tune <- 2535 | tune_grid(test_config_54_no_dummies_workflow, resamples = stop("add your rsample object"), 2536 | grid = test_config_54_no_dummies_grid) 2537 | 2538 | 2539 | --- 2540 | 2541 | Code 2542 | dummy_template(model, prefix, verbose, tune) 2543 | Output 2544 | test_config_55_dummies_recipe <- 2545 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2546 | step_zv(all_predictors()) |> 2547 | step_normalize(all_numeric_predictors()) 2548 | 2549 | test_config_55_dummies_spec <- 2550 | svm_poly(cost = tune(), degree = tune(), scale_factor = tune()) |> 2551 | set_mode("regression") 2552 | 2553 | test_config_55_dummies_workflow <- 2554 | workflow() |> 2555 | add_recipe(test_config_55_dummies_recipe) |> 2556 | add_model(test_config_55_dummies_spec) 2557 | 2558 | set.seed(27246) 2559 | test_config_55_dummies_tune <- 2560 | tune_grid(test_config_55_dummies_workflow, resamples = stop("add your rsample object"), 2561 | grid = stop("add number of candidate points")) 2562 | 2563 | 2564 | --- 2565 | 2566 | Code 2567 | no_dummy_template(model, prefix, verbose, tune) 2568 | Output 2569 | test_config_55_no_dummies_recipe <- 2570 | recipe(formula = species ~ ., data = penguins) |> 2571 | step_zv(all_predictors()) |> 2572 | step_normalize(all_numeric_predictors()) 2573 | 2574 | test_config_55_no_dummies_spec <- 2575 | svm_poly(cost = tune(), degree = tune(), scale_factor = tune()) |> 2576 | set_mode("classification") 2577 | 2578 | test_config_55_no_dummies_workflow <- 2579 | workflow() |> 2580 | add_recipe(test_config_55_no_dummies_recipe) |> 2581 | add_model(test_config_55_no_dummies_spec) 2582 | 2583 | set.seed(27246) 2584 | test_config_55_no_dummies_tune <- 2585 | tune_grid(test_config_55_no_dummies_workflow, resamples = stop("add your rsample object"), 2586 | grid = stop("add number of candidate points")) 2587 | 2588 | 2589 | --- 2590 | 2591 | Code 2592 | dummy_template(model, prefix, verbose, tune) 2593 | Output 2594 | test_config_56_dummies_recipe <- 2595 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2596 | step_zv(all_predictors()) |> 2597 | step_normalize(all_numeric_predictors()) 2598 | 2599 | test_config_56_dummies_spec <- 2600 | svm_rbf(cost = tune(), rbf_sigma = tune()) |> 2601 | set_mode("regression") 2602 | 2603 | test_config_56_dummies_workflow <- 2604 | workflow() |> 2605 | add_recipe(test_config_56_dummies_recipe) |> 2606 | add_model(test_config_56_dummies_spec) 2607 | 2608 | set.seed(27246) 2609 | test_config_56_dummies_tune <- 2610 | tune_grid(test_config_56_dummies_workflow, resamples = stop("add your rsample object"), 2611 | grid = stop("add number of candidate points")) 2612 | 2613 | 2614 | --- 2615 | 2616 | Code 2617 | no_dummy_template(model, prefix, verbose, tune) 2618 | Output 2619 | test_config_56_no_dummies_recipe <- 2620 | recipe(formula = species ~ ., data = penguins) |> 2621 | step_zv(all_predictors()) |> 2622 | step_normalize(all_numeric_predictors()) 2623 | 2624 | test_config_56_no_dummies_spec <- 2625 | svm_rbf(cost = tune(), rbf_sigma = tune()) |> 2626 | set_mode("classification") 2627 | 2628 | test_config_56_no_dummies_workflow <- 2629 | workflow() |> 2630 | add_recipe(test_config_56_no_dummies_recipe) |> 2631 | add_model(test_config_56_no_dummies_spec) 2632 | 2633 | set.seed(27246) 2634 | test_config_56_no_dummies_tune <- 2635 | tune_grid(test_config_56_no_dummies_workflow, resamples = stop("add your rsample object"), 2636 | grid = stop("add number of candidate points")) 2637 | 2638 | 2639 | --- 2640 | 2641 | Code 2642 | dummy_template(model, prefix, verbose, tune) 2643 | Output 2644 | test_config_57_dummies_recipe <- 2645 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2646 | step_novel(all_nominal_predictors()) |> 2647 | step_dummy(all_nominal_predictors()) |> 2648 | step_zv(all_predictors()) |> 2649 | step_normalize(all_numeric_predictors()) 2650 | 2651 | test_config_57_dummies_spec <- 2652 | nearest_neighbor(neighbors = tune(), weight_func = tune()) |> 2653 | set_mode("regression") |> 2654 | set_engine("kknn") 2655 | 2656 | test_config_57_dummies_workflow <- 2657 | workflow() |> 2658 | add_recipe(test_config_57_dummies_recipe) |> 2659 | add_model(test_config_57_dummies_spec) 2660 | 2661 | set.seed(27246) 2662 | test_config_57_dummies_tune <- 2663 | tune_grid(test_config_57_dummies_workflow, resamples = stop("add your rsample object"), 2664 | grid = stop("add number of candidate points")) 2665 | 2666 | 2667 | --- 2668 | 2669 | Code 2670 | no_dummy_template(model, prefix, verbose, tune) 2671 | Output 2672 | test_config_57_no_dummies_recipe <- 2673 | recipe(formula = species ~ ., data = penguins) |> 2674 | step_novel(all_nominal_predictors()) |> 2675 | step_dummy(all_nominal_predictors()) |> 2676 | step_zv(all_predictors()) |> 2677 | step_normalize(all_numeric_predictors()) 2678 | 2679 | test_config_57_no_dummies_spec <- 2680 | nearest_neighbor(neighbors = tune(), weight_func = tune()) |> 2681 | set_mode("classification") |> 2682 | set_engine("kknn") 2683 | 2684 | test_config_57_no_dummies_workflow <- 2685 | workflow() |> 2686 | add_recipe(test_config_57_no_dummies_recipe) |> 2687 | add_model(test_config_57_no_dummies_spec) 2688 | 2689 | set.seed(27246) 2690 | test_config_57_no_dummies_tune <- 2691 | tune_grid(test_config_57_no_dummies_workflow, resamples = stop("add your rsample object"), 2692 | grid = stop("add number of candidate points")) 2693 | 2694 | 2695 | --- 2696 | 2697 | Code 2698 | dummy_template(model, prefix, verbose, tune) 2699 | Output 2700 | test_config_58_dummies_recipe <- 2701 | recipe(formula = body_mass_g ~ ., data = penguins) 2702 | 2703 | test_config_58_dummies_spec <- 2704 | gen_additive_mod(select_features = tune(), adjust_deg_free = tune()) |> 2705 | set_mode("regression") |> 2706 | set_engine("mgcv") 2707 | 2708 | test_config_58_dummies_workflow <- 2709 | workflow() |> 2710 | add_recipe(test_config_58_dummies_recipe) |> 2711 | add_model(test_config_58_dummies_spec, formula = stop("add your gam formula")) 2712 | 2713 | set.seed(27246) 2714 | test_config_58_dummies_tune <- 2715 | tune_grid(test_config_58_dummies_workflow, resamples = stop("add your rsample object"), 2716 | grid = stop("add number of candidate points")) 2717 | 2718 | 2719 | --- 2720 | 2721 | Code 2722 | no_dummy_template(model, prefix, verbose, tune) 2723 | Output 2724 | test_config_58_no_dummies_recipe <- 2725 | recipe(formula = species ~ ., data = penguins) 2726 | 2727 | test_config_58_no_dummies_spec <- 2728 | gen_additive_mod(select_features = tune(), adjust_deg_free = tune()) |> 2729 | set_mode("classification") |> 2730 | set_engine("mgcv") 2731 | 2732 | test_config_58_no_dummies_workflow <- 2733 | workflow() |> 2734 | add_recipe(test_config_58_no_dummies_recipe) |> 2735 | add_model(test_config_58_no_dummies_spec, formula = stop("add your gam formula")) 2736 | 2737 | set.seed(27246) 2738 | test_config_58_no_dummies_tune <- 2739 | tune_grid(test_config_58_no_dummies_workflow, resamples = stop("add your rsample object"), 2740 | grid = stop("add number of candidate points")) 2741 | 2742 | 2743 | --- 2744 | 2745 | Code 2746 | dummy_template(model, prefix, verbose, tune) 2747 | Output 2748 | library(plsmod) 2749 | 2750 | test_config_59_dummies_recipe <- 2751 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2752 | step_novel(all_nominal_predictors()) |> 2753 | step_dummy(all_nominal_predictors()) |> 2754 | step_zv(all_predictors()) |> 2755 | step_normalize(all_numeric_predictors()) 2756 | 2757 | test_config_59_dummies_spec <- 2758 | pls(predictor_prop = tune(), num_comp = tune()) |> 2759 | set_mode("regression") |> 2760 | set_engine("mixOmics") 2761 | 2762 | test_config_59_dummies_workflow <- 2763 | workflow() |> 2764 | add_recipe(test_config_59_dummies_recipe) |> 2765 | add_model(test_config_59_dummies_spec) 2766 | 2767 | set.seed(27246) 2768 | test_config_59_dummies_tune <- 2769 | tune_grid(test_config_59_dummies_workflow, resamples = stop("add your rsample object"), 2770 | grid = stop("add number of candidate points")) 2771 | 2772 | 2773 | --- 2774 | 2775 | Code 2776 | no_dummy_template(model, prefix, verbose, tune) 2777 | Output 2778 | library(plsmod) 2779 | 2780 | test_config_59_no_dummies_recipe <- 2781 | recipe(formula = species ~ ., data = penguins) |> 2782 | step_novel(all_nominal_predictors()) |> 2783 | step_dummy(all_nominal_predictors()) |> 2784 | step_zv(all_predictors()) |> 2785 | step_normalize(all_numeric_predictors()) 2786 | 2787 | test_config_59_no_dummies_spec <- 2788 | pls(predictor_prop = tune(), num_comp = tune()) |> 2789 | set_mode("classification") |> 2790 | set_engine("mixOmics") 2791 | 2792 | test_config_59_no_dummies_workflow <- 2793 | workflow() |> 2794 | add_recipe(test_config_59_no_dummies_recipe) |> 2795 | add_model(test_config_59_no_dummies_spec) 2796 | 2797 | set.seed(27246) 2798 | test_config_59_no_dummies_tune <- 2799 | tune_grid(test_config_59_no_dummies_workflow, resamples = stop("add your rsample object"), 2800 | grid = stop("add number of candidate points")) 2801 | 2802 | 2803 | --- 2804 | 2805 | Code 2806 | dummy_template(model, prefix, verbose, tune) 2807 | Output 2808 | test_config_60_dummies_recipe <- 2809 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2810 | step_novel(all_nominal_predictors()) |> 2811 | step_dummy(all_nominal_predictors()) |> 2812 | step_zv(all_predictors()) |> 2813 | step_normalize(all_numeric_predictors()) 2814 | 2815 | test_config_60_dummies_spec <- 2816 | mlp(hidden_units = tune(), penalty = tune(), epochs = tune()) |> 2817 | set_mode("regression") 2818 | 2819 | test_config_60_dummies_workflow <- 2820 | workflow() |> 2821 | add_recipe(test_config_60_dummies_recipe) |> 2822 | add_model(test_config_60_dummies_spec) 2823 | 2824 | set.seed(27246) 2825 | test_config_60_dummies_tune <- 2826 | tune_grid(test_config_60_dummies_workflow, resamples = stop("add your rsample object"), 2827 | grid = stop("add number of candidate points")) 2828 | 2829 | 2830 | --- 2831 | 2832 | Code 2833 | no_dummy_template(model, prefix, verbose, tune) 2834 | Output 2835 | test_config_60_no_dummies_recipe <- 2836 | recipe(formula = species ~ ., data = penguins) |> 2837 | step_novel(all_nominal_predictors()) |> 2838 | step_dummy(all_nominal_predictors()) |> 2839 | step_zv(all_predictors()) |> 2840 | step_normalize(all_numeric_predictors()) 2841 | 2842 | test_config_60_no_dummies_spec <- 2843 | mlp(hidden_units = tune(), penalty = tune(), epochs = tune()) |> 2844 | set_mode("classification") 2845 | 2846 | test_config_60_no_dummies_workflow <- 2847 | workflow() |> 2848 | add_recipe(test_config_60_no_dummies_recipe) |> 2849 | add_model(test_config_60_no_dummies_spec) 2850 | 2851 | set.seed(27246) 2852 | test_config_60_no_dummies_tune <- 2853 | tune_grid(test_config_60_no_dummies_workflow, resamples = stop("add your rsample object"), 2854 | grid = stop("add number of candidate points")) 2855 | 2856 | 2857 | --- 2858 | 2859 | Code 2860 | dummy_template(model, prefix, verbose, tune) 2861 | Output 2862 | test_config_61_dummies_recipe <- 2863 | recipe(formula = body_mass_g ~ ., data = penguins) 2864 | 2865 | test_config_61_dummies_spec <- 2866 | rand_forest(mtry = tune(), min_n = tune(), trees = 1000) |> 2867 | set_mode("regression") |> 2868 | set_engine("ranger") 2869 | 2870 | test_config_61_dummies_workflow <- 2871 | workflow() |> 2872 | add_recipe(test_config_61_dummies_recipe) |> 2873 | add_model(test_config_61_dummies_spec) 2874 | 2875 | set.seed(27246) 2876 | test_config_61_dummies_tune <- 2877 | tune_grid(test_config_61_dummies_workflow, resamples = stop("add your rsample object"), 2878 | grid = stop("add number of candidate points")) 2879 | 2880 | 2881 | --- 2882 | 2883 | Code 2884 | no_dummy_template(model, prefix, verbose, tune) 2885 | Output 2886 | test_config_61_no_dummies_recipe <- 2887 | recipe(formula = species ~ ., data = penguins) 2888 | 2889 | test_config_61_no_dummies_spec <- 2890 | rand_forest(mtry = tune(), min_n = tune(), trees = 1000) |> 2891 | set_mode("classification") |> 2892 | set_engine("ranger") 2893 | 2894 | test_config_61_no_dummies_workflow <- 2895 | workflow() |> 2896 | add_recipe(test_config_61_no_dummies_recipe) |> 2897 | add_model(test_config_61_no_dummies_spec) 2898 | 2899 | set.seed(27246) 2900 | test_config_61_no_dummies_tune <- 2901 | tune_grid(test_config_61_no_dummies_workflow, resamples = stop("add your rsample object"), 2902 | grid = stop("add number of candidate points")) 2903 | 2904 | 2905 | --- 2906 | 2907 | Code 2908 | dummy_template(model, prefix, verbose, tune) 2909 | Output 2910 | test_config_62_dummies_recipe <- 2911 | recipe(formula = body_mass_g ~ ., data = penguins) 2912 | 2913 | test_config_62_dummies_spec <- 2914 | decision_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 2915 | set_mode("regression") |> 2916 | set_engine("rpart") 2917 | 2918 | test_config_62_dummies_workflow <- 2919 | workflow() |> 2920 | add_recipe(test_config_62_dummies_recipe) |> 2921 | add_model(test_config_62_dummies_spec) 2922 | 2923 | set.seed(27246) 2924 | test_config_62_dummies_tune <- 2925 | tune_grid(test_config_62_dummies_workflow, resamples = stop("add your rsample object"), 2926 | grid = stop("add number of candidate points")) 2927 | 2928 | 2929 | --- 2930 | 2931 | Code 2932 | no_dummy_template(model, prefix, verbose, tune) 2933 | Output 2934 | test_config_62_no_dummies_recipe <- 2935 | recipe(formula = species ~ ., data = penguins) 2936 | 2937 | test_config_62_no_dummies_spec <- 2938 | decision_tree(tree_depth = tune(), min_n = tune(), cost_complexity = tune()) |> 2939 | set_mode("classification") |> 2940 | set_engine("rpart") 2941 | 2942 | test_config_62_no_dummies_workflow <- 2943 | workflow() |> 2944 | add_recipe(test_config_62_no_dummies_recipe) |> 2945 | add_model(test_config_62_no_dummies_spec) 2946 | 2947 | set.seed(27246) 2948 | test_config_62_no_dummies_tune <- 2949 | tune_grid(test_config_62_no_dummies_workflow, resamples = stop("add your rsample object"), 2950 | grid = stop("add number of candidate points")) 2951 | 2952 | 2953 | --- 2954 | 2955 | Code 2956 | dummy_template(model, prefix, verbose, tune) 2957 | Output 2958 | test_config_63_dummies_recipe <- 2959 | recipe(formula = body_mass_g ~ ., data = penguins) |> 2960 | step_novel(all_nominal_predictors()) |> 2961 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 2962 | step_zv(all_predictors()) 2963 | 2964 | test_config_63_dummies_spec <- 2965 | boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 2966 | loss_reduction = tune(), sample_size = tune()) |> 2967 | set_mode("regression") |> 2968 | set_engine("xgboost") 2969 | 2970 | test_config_63_dummies_workflow <- 2971 | workflow() |> 2972 | add_recipe(test_config_63_dummies_recipe) |> 2973 | add_model(test_config_63_dummies_spec) 2974 | 2975 | set.seed(27246) 2976 | test_config_63_dummies_tune <- 2977 | tune_grid(test_config_63_dummies_workflow, resamples = stop("add your rsample object"), 2978 | grid = stop("add number of candidate points")) 2979 | 2980 | 2981 | --- 2982 | 2983 | Code 2984 | no_dummy_template(model, prefix, verbose, tune) 2985 | Output 2986 | test_config_63_no_dummies_recipe <- 2987 | recipe(formula = species ~ ., data = penguins) |> 2988 | step_novel(all_nominal_predictors()) |> 2989 | step_dummy(all_nominal_predictors(), one_hot = TRUE) |> 2990 | step_zv(all_predictors()) 2991 | 2992 | test_config_63_no_dummies_spec <- 2993 | boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 2994 | loss_reduction = tune(), sample_size = tune()) |> 2995 | set_mode("classification") |> 2996 | set_engine("xgboost") 2997 | 2998 | test_config_63_no_dummies_workflow <- 2999 | workflow() |> 3000 | add_recipe(test_config_63_no_dummies_recipe) |> 3001 | add_model(test_config_63_no_dummies_spec) 3002 | 3003 | set.seed(27246) 3004 | test_config_63_no_dummies_tune <- 3005 | tune_grid(test_config_63_no_dummies_workflow, resamples = stop("add your rsample object"), 3006 | grid = stop("add number of candidate points")) 3007 | 3008 | 3009 | --- 3010 | 3011 | Code 3012 | dummy_template(model, prefix, verbose, tune) 3013 | Output 3014 | library(rules) 3015 | 3016 | test_config_64_dummies_recipe <- 3017 | recipe(formula = body_mass_g ~ ., data = penguins) |> 3018 | step_novel(all_nominal_predictors()) |> 3019 | step_dummy(all_nominal_predictors()) |> 3020 | step_zv(all_predictors()) |> 3021 | step_normalize(all_numeric_predictors()) 3022 | 3023 | test_config_64_dummies_spec <- 3024 | rule_fit(mtry = tune(), trees = tune(), min_n = tune(), tree_depth = tune(), 3025 | learn_rate = tune(), loss_reduction = tune(), sample_size = tune(), penalty = tune()) |> 3026 | set_mode("regression") |> 3027 | set_engine("xrf") 3028 | 3029 | test_config_64_dummies_workflow <- 3030 | workflow() |> 3031 | add_recipe(test_config_64_dummies_recipe) |> 3032 | add_model(test_config_64_dummies_spec) 3033 | 3034 | set.seed(27246) 3035 | test_config_64_dummies_tune <- 3036 | tune_grid(test_config_64_dummies_workflow, resamples = stop("add your rsample object"), 3037 | grid = stop("add number of candidate points")) 3038 | 3039 | 3040 | --- 3041 | 3042 | Code 3043 | no_dummy_template(model, prefix, verbose, tune) 3044 | Output 3045 | library(rules) 3046 | 3047 | test_config_64_no_dummies_recipe <- 3048 | recipe(formula = species ~ ., data = penguins) |> 3049 | step_novel(all_nominal_predictors()) |> 3050 | step_dummy(all_nominal_predictors()) |> 3051 | step_zv(all_predictors()) |> 3052 | step_normalize(all_numeric_predictors()) 3053 | 3054 | test_config_64_no_dummies_spec <- 3055 | rule_fit(mtry = tune(), trees = tune(), min_n = tune(), tree_depth = tune(), 3056 | learn_rate = tune(), loss_reduction = tune(), sample_size = tune(), penalty = tune()) |> 3057 | set_mode("classification") |> 3058 | set_engine("xrf") 3059 | 3060 | test_config_64_no_dummies_workflow <- 3061 | workflow() |> 3062 | add_recipe(test_config_64_no_dummies_recipe) |> 3063 | add_model(test_config_64_no_dummies_spec) 3064 | 3065 | set.seed(27246) 3066 | test_config_64_no_dummies_tune <- 3067 | tune_grid(test_config_64_no_dummies_workflow, resamples = stop("add your rsample object"), 3068 | grid = stop("add number of candidate points")) 3069 | 3070 | 3071 | -------------------------------------------------------------------------------- /tests/testthat/test-basics.R: -------------------------------------------------------------------------------- 1 | test_that("wrong model type", { 2 | skip_if_not_installed("modeldata") 3 | library(modeldata) 4 | data("penguins") 5 | 6 | expect_snapshot(use_cubist(island ~ ., data = penguins), error = TRUE) 7 | expect_snapshot(use_C5.0(bill_depth_mm ~ ., data = penguins), error = TRUE) 8 | }) 9 | 10 | test_that("no access to clipboard", { 11 | Sys.setenv(CLIPR_ALLOW = FALSE) 12 | expect_snapshot( 13 | use_kknn(mpg ~ ., data = mtcars, clipboard = TRUE), 14 | error = TRUE 15 | ) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test-clipboard.R: -------------------------------------------------------------------------------- 1 | # Code to loop over all tests and configurations 2 | 3 | dummy_clip_template <- function(model, prefix, verbose, tune) { 4 | Sys.setenv(CLIPR_ALLOW = TRUE) 5 | set.seed(3522) # for models where a seed is set 6 | rlang::eval_tidy( 7 | rlang::call2( 8 | paste0("use_", model), 9 | formula = body_mass_g ~ ., 10 | data = expr(penguins), 11 | verbose = enexpr(verbose), 12 | tune = enexpr(tune), 13 | prefix = paste0(prefix, "_dummies"), 14 | colors = TRUE, 15 | clipboard = TRUE 16 | ) 17 | ) 18 | print(clipr::read_clip()) 19 | clipr::clear_clip() 20 | } 21 | 22 | no_dummy_clip_template <- function(model, prefix, verbose, tune) { 23 | set.seed(3522) # for models where a seed is set 24 | rlang::eval_tidy( 25 | rlang::call2( 26 | paste0("use_", model), 27 | formula = species ~ ., 28 | data = expr(penguins), 29 | verbose = enexpr(verbose), 30 | tune = enexpr(tune), 31 | prefix = paste0(prefix, "_no_dummies"), 32 | colors = TRUE, 33 | clipboard = TRUE 34 | ) 35 | ) 36 | print(clipr::read_clip()) 37 | clipr::clear_clip() 38 | } 39 | 40 | verify_models <- function(model, prefix, tune, verbose) { 41 | skip_if_not_installed("modeldata") 42 | # These are automatically skipped on CRAN 43 | library(modeldata) 44 | data("penguins") 45 | 46 | penguins$island <- as.character(penguins$island) 47 | 48 | if (model != "C5.0") { 49 | expect_snapshot(dummy_clip_template(model, prefix, verbose, tune)) 50 | } 51 | if (model != "cubist") { 52 | expect_snapshot(no_dummy_clip_template(model, prefix, verbose, tune)) 53 | } 54 | } 55 | 56 | 57 | test_that("all model templates with clipboard", { 58 | skip_if_not_installed("modeldata") 59 | library(modeldata) 60 | data("penguins") 61 | 62 | penguins$island <- as.character(penguins$island) 63 | 64 | skip_on_cran() 65 | skip_on_os("linux") 66 | skip_on_os("windows") 67 | 68 | models <- c( 69 | "bag_tree_rpart", 70 | "C5.0", 71 | "cubist", 72 | "dbarts", 73 | "earth", 74 | "glmnet", 75 | "kernlab_svm_poly", 76 | "kernlab_svm_rbf", 77 | "kknn", 78 | "mgcv", 79 | "mixOmics", 80 | "nnet", 81 | "ranger", 82 | "rpart", 83 | "xgboost", 84 | "xrf" 85 | ) 86 | 87 | test_config <- 88 | expand.grid( 89 | model = models, 90 | tune = c(FALSE, TRUE), 91 | verbose = c(TRUE, FALSE) 92 | ) 93 | test_config$pref <- paste0("test_config_", 1:nrow(test_config)) 94 | 95 | res <- purrr::pmap(test_config, verify_models) 96 | }) 97 | -------------------------------------------------------------------------------- /tests/testthat/test-templates.R: -------------------------------------------------------------------------------- 1 | # Code to loop over all tests and configurations 2 | 3 | dummy_template <- function(model, prefix, verbose, tune) { 4 | set.seed(3522) # for models where a seed is set 5 | rlang::eval_tidy( 6 | rlang::call2( 7 | paste0("use_", model), 8 | formula = body_mass_g ~ ., 9 | data = expr(penguins), 10 | verbose = enexpr(verbose), 11 | tune = enexpr(tune), 12 | prefix = paste0(prefix, "_dummies"), 13 | colors = TRUE 14 | ) 15 | ) 16 | } 17 | 18 | no_dummy_template <- function(model, prefix, verbose, tune) { 19 | set.seed(3522) # for models where a seed is set 20 | rlang::eval_tidy( 21 | rlang::call2( 22 | paste0("use_", model), 23 | formula = species ~ ., 24 | data = expr(penguins), 25 | verbose = enexpr(verbose), 26 | tune = enexpr(tune), 27 | prefix = paste0(prefix, "_no_dummies"), 28 | colors = TRUE 29 | ) 30 | ) 31 | } 32 | 33 | verify_models <- function(model, prefix, tune, verbose) { 34 | skip_if_not_installed("modeldata") 35 | library(modeldata) 36 | data("penguins") 37 | 38 | penguins$island <- as.character(penguins$island) 39 | 40 | # These are automatically skipped on CRAN 41 | if (model != "C5.0") { 42 | expect_snapshot(dummy_template(model, prefix, verbose, tune)) 43 | } 44 | if (model != "cubist") { 45 | expect_snapshot(no_dummy_template(model, prefix, verbose, tune)) 46 | } 47 | } 48 | 49 | 50 | test_that("all model templates", { 51 | skip_if_not_installed("modeldata") 52 | library(modeldata) 53 | data("penguins") 54 | 55 | penguins$island <- as.character(penguins$island) 56 | 57 | models <- c( 58 | "bag_tree_rpart", 59 | "C5.0", 60 | "cubist", 61 | "dbarts", 62 | "earth", 63 | "glmnet", 64 | "kernlab_svm_poly", 65 | "kernlab_svm_rbf", 66 | "kknn", 67 | "mgcv", 68 | "mixOmics", 69 | "nnet", 70 | "ranger", 71 | "rpart", 72 | "xgboost", 73 | "xrf" 74 | ) 75 | 76 | test_config <- 77 | expand.grid( 78 | model = models, 79 | tune = c(FALSE, TRUE), 80 | verbose = c(TRUE, FALSE) 81 | ) 82 | test_config$pref <- paste0("test_config_", 1:nrow(test_config)) 83 | 84 | res <- purrr::pmap(test_config, verify_models) 85 | }) 86 | -------------------------------------------------------------------------------- /usemodels.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | --------------------------------------------------------------------------------