├── .github
    ├── .gitignore
    └── workflows
    │   ├── check-release.yaml
    │   ├── pkgdown.yaml
    │   └── test-coverage.yaml
├── vignettes
    └── .gitignore
├── revdep
    ├── problems.md
    ├── .gitignore
    ├── email.yml
    ├── cran.md
    ├── failures.md
    └── README.md
├── README_cache
    └── gfm
    │   ├── ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdb
    │   ├── ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdb
    │   ├── ex_penguins_94ce9c85c6a85209aee72043527a7434.rdb
    │   ├── ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdb
    │   ├── ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdb
    │   ├── ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdb
    │   ├── ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdb
    │   ├── ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdb
    │   ├── ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdb
    │   ├── unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdb
    │   ├── unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdb
    │   ├── ex0_08952e0ed30fe2644cd3463413708f0d.rdb
    │   ├── ex0_08952e0ed30fe2644cd3463413708f0d.rdx
    │   ├── ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdx
    │   ├── ex0_08952e0ed30fe2644cd3463413708f0d.RData
    │   ├── ex1_2b50644f33f9242e4b3a2f6a7181e16f.RData
    │   ├── hdrcde_24608eda67b7b094496dd13a3d335ced.rdb
    │   ├── hdrcde_24608eda67b7b094496dd13a3d335ced.rdx
    │   ├── get-hdr_56a211107c10d29abbf2427dc2311e41.rdb
    │   ├── get-hdr_56a211107c10d29abbf2427dc2311e41.rdx
    │   ├── hdrcde_24608eda67b7b094496dd13a3d335ced.RData
    │   ├── ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdb
    │   ├── ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdx
    │   ├── ex_penguins_94ce9c85c6a85209aee72043527a7434.rdx
    │   ├── get-hdr-1d_9207c40522aca39a887fed29e70de154.rdb
    │   ├── get-hdr-1d_9207c40522aca39a887fed29e70de154.rdx
    │   ├── get-hdr_56a211107c10d29abbf2427dc2311e41.RData
    │   ├── ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdx
    │   ├── ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdb
    │   ├── ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdx
    │   ├── ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdb
    │   ├── ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdx
    │   ├── ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.RData
    │   ├── ex_penguins_94ce9c85c6a85209aee72043527a7434.RData
    │   ├── get-hdr-1d_9207c40522aca39a887fed29e70de154.RData
    │   ├── ex-expand-lims_b653f06305dd4a631453adfca8caca66.RData
    │   ├── ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdb
    │   ├── ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdx
    │   ├── ex_after_stat_44cf34e134db261c5d53879c855e5cb7.RData
    │   ├── ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.RData
    │   ├── ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.RData
    │   ├── geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdb
    │   ├── geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdx
    │   ├── unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdx
    │   ├── unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdx
    │   ├── __packages
    │   ├── ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdb
    │   ├── ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdx
    │   ├── ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdx
    │   ├── ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdx
    │   ├── ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdx
    │   ├── ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdx
    │   ├── ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdx
    │   ├── ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdx
    │   ├── geom_hdr_points_db8f7c026af53acd936d8266c73964ed.RData
    │   ├── unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.RData
    │   ├── unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.RData
    │   ├── ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.RData
    │   ├── ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.RData
    │   ├── ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.RData
    │   ├── ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.RData
    │   ├── ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.RData
    │   ├── ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.RData
    │   └── ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.RData
├── LICENSE
├── man
    ├── figures
    │   ├── logo.png
    │   ├── README-ex0-1.png
    │   ├── README-ex1-1.png
    │   ├── README-hdrcde-1.png
    │   ├── README-hdrcde-2.png
    │   ├── README-ex_methods-1.png
    │   ├── README-hdrcde-2-1.png
    │   ├── README-ex_hdr_fun_1-1.png
    │   ├── README-ex_hdr_fun_2-1.png
    │   ├── README-ex_penguins-1.png
    │   ├── README-ex-expand-lims-1.png
    │   ├── README-ex_after_stat-1.png
    │   ├── README-ex_after_stat-2.png
    │   ├── README-geom_hdr_points-1.png
    │   ├── README-unnamed-chunk-3-1.png
    │   ├── README-ex_geom_hdr_rug_1-1.png
    │   ├── README-ex_geom_hdr_rug_2-1.png
    │   ├── README-ex_geom_hdr_rug_3-1.png
    │   ├── README-ex_geom_hdr_rug_4-1.png
    │   ├── README-ex_penguins_facet-1.png
    │   ├── README-ex_penguins_lines-1.png
    │   └── README-ex_geom_hdr_points-1.png
    ├── ggdensity.Rd
    ├── method_norm_1d.Rd
    ├── method_mvnorm.Rd
    ├── method_freqpoly.Rd
    ├── method_freqpoly_1d.Rd
    ├── method_histogram_1d.Rd
    ├── method_kde.Rd
    ├── method_histogram.Rd
    ├── method_kde_1d.Rd
    ├── get_hdr_1d.Rd
    ├── geom_hdr_points.Rd
    ├── get_hdr.Rd
    ├── geom_hdr_points_fun.Rd
    ├── geom_hdr_fun.Rd
    ├── geom_hdr_rug_fun.Rd
    ├── geom_hdr_rug.Rd
    └── geom_hdr.Rd
├── _pkgdown.yml
├── pkgdown
    └── favicon
    │   ├── favicon.ico
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── apple-touch-icon.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   └── apple-touch-icon-180x180.png
├── CRAN-SUBMISSION
├── cran-comments.md
├── tests
    ├── testthat
    │   ├── fixtures
    │   │   └── df_norm.rds
    │   ├── test-res_to_df_1d.R
    │   ├── test-fix_probs.R
    │   ├── test-res_to_df.R
    │   ├── test-get_hdr_1d.R
    │   ├── test-visual-tests.R
    │   ├── test-layer-wrappers.R
    │   └── test-get_hdr.R
    └── testthat.R
├── R
    ├── attach.R
    ├── ggdensity-package.R
    ├── helpers.R
    ├── hdr_lines.R
    ├── hdr_lines_fun.R
    ├── helpers-ggplot2.R
    ├── hdr_points.R
    ├── hdr_points_fun.R
    ├── hdr_fun.R
    ├── hdr_rug_fun.R
    ├── method_1d.R
    ├── hdr.R
    ├── get_hdr_1d.R
    ├── get_hdr.R
    ├── hdr_rug.R
    └── method.R
├── .gitignore
├── .Rbuildignore
├── ggdensity.Rproj
├── LICENSE.md
├── DESCRIPTION
├── NAMESPACE
└── NEWS.md


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2021
2 | COPYRIGHT HOLDER: ggdensity authors
3 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://jamesotto852.github.io/ggdensity/
2 | template:
3 |   bootstrap: 5
4 | 
5 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 1.0.0
2 | Date: 2023-02-09 22:57:39 UTC
3 | SHA: 54e4677246f7f7d4e50b02d4a5d61b993900c46f
4 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 | 
3 | 0 errors | 0 warnings | 0 notes
4 | 
5 | * This is a new release.
6 | 


--------------------------------------------------------------------------------
/man/figures/README-ex0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex0-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex1-1.png


--------------------------------------------------------------------------------
/man/figures/README-hdrcde-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-hdrcde-1.png


--------------------------------------------------------------------------------
/man/figures/README-hdrcde-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-hdrcde-2.png


--------------------------------------------------------------------------------
/man/figures/README-ex_methods-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_methods-1.png


--------------------------------------------------------------------------------
/man/figures/README-hdrcde-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-hdrcde-2-1.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | cloud.noindex
6 | data.sqlite
7 | *.html
8 | 


--------------------------------------------------------------------------------
/tests/testthat/fixtures/df_norm.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/tests/testthat/fixtures/df_norm.rds


--------------------------------------------------------------------------------
/man/figures/README-ex_hdr_fun_1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_hdr_fun_1-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_hdr_fun_2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_hdr_fun_2-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_penguins-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_penguins-1.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/man/figures/README-ex-expand-lims-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex-expand-lims-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_after_stat-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_after_stat-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_after_stat-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_after_stat-2.png


--------------------------------------------------------------------------------
/man/figures/README-geom_hdr_points-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-geom_hdr_points-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/revdep/email.yml:
--------------------------------------------------------------------------------
1 | release_date: ???
2 | rel_release_date: ???
3 | my_news_url: ???
4 | release_version: ???
5 | release_details: ???
6 | 


--------------------------------------------------------------------------------
/man/figures/README-ex_geom_hdr_rug_1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_1-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_geom_hdr_rug_2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_2-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_geom_hdr_rug_3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_3-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_geom_hdr_rug_4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_4-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_penguins_facet-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_penguins_facet-1.png


--------------------------------------------------------------------------------
/man/figures/README-ex_penguins_lines-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_penguins_lines-1.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/man/figures/README-ex_geom_hdr_points-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_points-1.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.RData


--------------------------------------------------------------------------------
/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.RData


--------------------------------------------------------------------------------
/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.RData


--------------------------------------------------------------------------------
/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdx


--------------------------------------------------------------------------------
/R/attach.R:
--------------------------------------------------------------------------------
1 | .onAttach <- function(...) {
2 |   if(!interactive() || stats::runif(1) > 0.1) return()
3 |   packageStartupMessage('  Please cite ggdensity! See citation("ggdensity") for details.')
4 | }
5 | 


--------------------------------------------------------------------------------
/README_cache/gfm/__packages:
--------------------------------------------------------------------------------
 1 | ggplot2
 2 | ggdensity
 3 | palmerpenguins
 4 | purrr
 5 | patchwork
 6 | tidyverse
 7 | tibble
 8 | tidyr
 9 | readr
10 | dplyr
11 | stringr
12 | forcats
13 | hdrcde
14 | 


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.RData


--------------------------------------------------------------------------------
/README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.RData


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 | 
3 | We checked 1 reverse dependencies (0 from CRAN + 1 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package.
4 | 
5 |  * We saw 0 new problems
6 |  * We failed to check 0 packages
7 | 
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .Rdata
 4 | .httr-oauth
 5 | .DS_Store
 6 | docs
 7 | inst/doc
 8 | /doc/
 9 | /Meta/
10 | 
11 | revdep/checks
12 | revdep/library
13 | revdep/checks.noindex
14 | revdep/library.noindex
15 | revdep/data.sqlite
16 | revdep/cloud.noindex
17 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^ggdensity\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^README\.Rmd$
 5 | ^README\.md$
 6 | ^README_cache$
 7 | ^man/figures$
 8 | ^_pkgdown\.yml$
 9 | ^docs$
10 | ^pkgdown$
11 | ^\.github$
12 | ^CRAN-SUBMISSION$
13 | ^doc$
14 | ^Meta$
15 | ^revdep$
16 | ^cran-comments\.md$
17 | 


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
 1 | # ggblanket
 2 | 
 3 | <details>
 4 | 
 5 | * Version: 
 6 | * GitHub: https://github.com/jamesotto852/ggdensity
 7 | * Source code: NA
 8 | * Number of recursive dependencies: 0
 9 | 
10 | </details>
11 | 
12 | ## Error before installation
13 | 
14 | ### Devel
15 | 
16 | ```
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 
23 | ```
24 | ### CRAN
25 | 
26 | ```
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | ```
34 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/tests.html
 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
 8 | 
 9 | library(testthat)
10 | library(ggdensity)
11 | 
12 | test_check("ggdensity")
13 | 


--------------------------------------------------------------------------------
/ggdensity.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/man/ggdensity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ggdensity-package.R
 3 | \docType{package}
 4 | \name{ggdensity}
 5 | \alias{ggdensity}
 6 | \alias{package-ggdensity}
 7 | \title{ggdensity: Stats and Geoms for Density Estimation with ggplot2}
 8 | \description{
 9 | A package that allows more flexible computations for visualization of density
10 | estimates with ggplot2.
11 | }
12 | \seealso{
13 | Useful links:
14 | \itemize{
15 | \item \url{https://jamesotto852.github.io/ggdensity/}
16 | \item \url{https://github.com/jamesotto852/ggdensity/}
17 | }
18 | }
19 | 


--------------------------------------------------------------------------------
/R/ggdensity-package.R:
--------------------------------------------------------------------------------
 1 | #' ggdensity: Stats and Geoms for Density Estimation with ggplot2
 2 | #'
 3 | #' A package that allows more flexible computations for visualization of density
 4 | #' estimates with ggplot2.
 5 | #'
 6 | #' @seealso
 7 | #'
 8 | #' Useful links:
 9 | #' * \url{https://jamesotto852.github.io/ggdensity/}
10 | #' * \url{https://github.com/jamesotto852/ggdensity/}
11 | #'
12 | #' @import ggplot2
13 | #' @importFrom MASS bandwidth.nrd kde2d
14 | #' @importFrom stats uniroot cov pchisq setNames sd cor dnorm
15 | #' @docType package
16 | #' @name ggdensity
17 | #' @aliases ggdensity package-ggdensity
18 | NULL
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-res_to_df_1d.R:
--------------------------------------------------------------------------------
 1 | test_that("res_to_df_1d returns correct structure for each value of output", {
 2 | 
 3 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
 4 |   probs <- c(.99, .95, .80, .50)
 5 | 
 6 |   res <- get_hdr_1d(data$x, method_kde_1d(), probs)
 7 | 
 8 |   # Checking output == "rug"
 9 |   df_rug <- res_to_df_1d(res, probs, group = 1, output = "rug")
10 |   expect_type(df_rug, "list")
11 |   expect_equal(colnames(df_rug), c("x", "fhat", "fhat_discretized", "probs"))
12 |   expect(is.ordered(df_rug$probs), "probs is an ordered object")
13 |   expect_equal(levels(df_rug$probs), scales::percent_format(accuracy = 1)(probs))
14 | 
15 | })
16 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fix_probs.R:
--------------------------------------------------------------------------------
 1 | test_that("fix_probs() works as intended", {
 2 | 
 3 |   # Check defaults
 4 |   expect_equal(fix_probs(c(.99, .95, .80, .50)), c(.99, .95, .80, .50))
 5 | 
 6 |   # Reorders probabilities correctly
 7 |   expect_equal(fix_probs(c(.80, .50, .99, .95)), c(.99, .95, .80, .50))
 8 | 
 9 |   # Works with vectors of length 1
10 |   expect_equal(fix_probs(.50), .5)
11 | 
12 |   # Issues error if any probabilites are outside (0, 1)
13 |   expect_error(fix_probs(c(1.1, .80, .5)), regexp = "must be between")
14 |   expect_error(fix_probs(c(.80, .5, -1)), regexp = "must be between")
15 |   expect_error(fix_probs(c(1)), regexp = "must be between")
16 |   expect_error(fix_probs(c(0)), regexp = "must be between")
17 | 
18 | })
19 | 


--------------------------------------------------------------------------------
/.github/workflows/check-release.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |       R_KEEP_PKG_SOURCE: yes
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::rcmdcheck
27 |           needs: check
28 | 
29 |       - uses: r-lib/actions/check-r-package@v2
30 | 


--------------------------------------------------------------------------------
/man/method_norm_1d.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method_1d.R
 3 | \name{method_norm_1d}
 4 | \alias{method_norm_1d}
 5 | \title{Univariate parametric normal HDR estimator}
 6 | \usage{
 7 | method_norm_1d()
 8 | }
 9 | \description{
10 | Function used to specify univariate normal density estimator
11 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}).
12 | }
13 | \details{
14 | For more details on the use and implementation of the \verb{method_*_1d()} functions,
15 | see \code{vignette("method", "ggdensity")}.
16 | }
17 | \examples{
18 | # Normal estimators are useful when an assumption of normality is appropriate
19 | df <- data.frame(x = rnorm(1e3))
20 | 
21 | ggplot(df, aes(x)) +
22 |   geom_hdr_rug(method = method_norm_1d()) +
23 |   geom_density()
24 | 
25 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
26 | res <- get_hdr_1d(df$x, method = method_norm_1d())
27 | str(res)
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/method_mvnorm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method.R
 3 | \name{method_mvnorm}
 4 | \alias{method_mvnorm}
 5 | \title{Bivariate parametric normal HDR estimator}
 6 | \usage{
 7 | method_mvnorm()
 8 | }
 9 | \description{
10 | Function used to specify bivariate normal density estimator
11 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}).
12 | }
13 | \details{
14 | For more details on the use and implementation of the \verb{method_*()} functions,
15 | see \code{vignette("method", "ggdensity")}.
16 | }
17 | \examples{
18 | # Normal estimator is useful when an assumption of normality is appropriate
19 | set.seed(1)
20 | df <- data.frame(x = rnorm(1e3), y = rnorm(1e3))
21 | 
22 | ggplot(df, aes(x, y)) +
23 |   geom_hdr(method = method_mvnorm(), xlim = c(-4, 4), ylim = c(-4, 4)) +
24 |   geom_point(size = 1)
25 | 
26 | # Can also be used with `get_hdr()` for numerical summary of HDRs
27 | res <- get_hdr(df, method = method_mvnorm())
28 | str(res)
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2021 ggdensity authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/R/helpers.R:
--------------------------------------------------------------------------------
 1 | # this script contains several unexported helper functions
 2 | 
 3 | # normalization/scaling functions
 4 | normalize <- function(v) v / sum(v)
 5 | standardize <- function(v, min = min(v), max = max(v)) (v - min) / (max - min)
 6 | rescale <- function(v) v / max(v)
 7 | 
 8 | # discrete approximation to P[f^(X,Y) >= c]
 9 | prob_above_c <- function(df, c) {
10 |   if (length(c) > 1) return(vapply(c, prob_above_c, df = df, numeric(1)))
11 |   with(df, sum(fhat_discretized[fhat >= c]))
12 | }
13 | 
14 | # numerical approximation for finding hdr
15 | # if method = "histogram", don't want to use uniroot, runs into issue if n is small
16 | find_cutoff <- function(df, conf, uniroot = TRUE) {
17 | 
18 |   if (length(conf) > 1) return(vapply(conf, function(x) find_cutoff(df, x, uniroot), numeric(1)))
19 | 
20 |   # the following is set to FALSE to override when other code calls
21 |   # the function with uniroot = TRUE, remove once we're confident the
22 |   # code works in all circumstances
23 |   if (FALSE) {
24 | 
25 |     uniroot(function(c) prob_above_c(df, c) - conf, lower = 0, upper = 1)$root
26 | 
27 |   } else {
28 | 
29 |     # sort df rows by fhat
30 |     df <- df[order(df$fhat, decreasing = TRUE),]
31 | 
32 |     # compute cumsum of probs
33 |     df$cumprob <- cumsum(df$fhat_discretized)
34 | 
35 |     # determine cutoff
36 |     max(df[df$cumprob >= conf,]$fhat)
37 | 
38 |   }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test-res_to_df.R:
--------------------------------------------------------------------------------
 1 | test_that("res_to_df returns correct structure for each value of output", {
 2 | 
 3 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
 4 |   probs <- c(.99, .95, .80, .50)
 5 | 
 6 |   res <- get_hdr(data, method_kde(), probs)
 7 | 
 8 |   # Checking output == "bands"
 9 |   df_bands <- res_to_df(res, probs, group = 1, output = "bands")
10 |   expect_type(df_bands, "list")
11 |   expect_equal(colnames(df_bands), c("x", "y", "piece", "group", "subgroup", ".size", "probs"))
12 |   expect(is.ordered(df_bands$probs), "probs is an ordered object")
13 |   expect_equal(levels(df_bands$probs), scales::percent_format(accuracy = 1)(probs))
14 | 
15 |   # Checking output == "lines"
16 |   df_lines <- res_to_df(res, probs, group = 1, output = "lines")
17 |   expect_type(df_lines, "list")
18 |   expect_equal(colnames(df_lines), c("x", "y", "piece", "group", ".size", "probs"))
19 |   expect(is.ordered(df_lines$probs), "probs is an ordered object")
20 |   expect_equal(levels(df_lines$probs), scales::percent_format(accuracy = 1)(probs))
21 | 
22 |   # Checking output == "points"
23 |   df_points <- res_to_df(res, probs, group = 1, output = "points")
24 |   expect_type(df_points, "list")
25 |   expect_equal(colnames(df_points), c("x", "y", "probs"))
26 |   expect(is.ordered(df_points$probs), "probs is an ordered object")
27 |   expect_equal(levels(df_points$probs), scales::percent_format(accuracy = 1)(c(1, probs)))
28 | 
29 | })
30 | 


--------------------------------------------------------------------------------
/man/method_freqpoly.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method.R
 3 | \name{method_freqpoly}
 4 | \alias{method_freqpoly}
 5 | \title{Bivariate frequency polygon HDR estimator}
 6 | \usage{
 7 | method_freqpoly(bins = NULL)
 8 | }
 9 | \arguments{
10 | \item{bins}{Number of bins along each axis.
11 | Either a vector of length 2 or a scalar value which is recycled for both dimensions.
12 | Defaults to normal reference rule (Scott, pg 87).}
13 | }
14 | \description{
15 | Function used to specify bivariate frequency polygon density estimator
16 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}).
17 | }
18 | \details{
19 | For more details on the use and implementation of the \verb{method_*()} functions,
20 | see \code{vignette("method", "ggdensity")}.
21 | }
22 | \examples{
23 | set.seed(1)
24 | df <- data.frame(x = rnorm(1e3), y = rnorm(1e3))
25 | 
26 | ggplot(df, aes(x, y)) +
27 |   geom_hdr(method = method_freqpoly()) +
28 |   geom_point(size = 1)
29 | 
30 | # The resolution of the frequency polygon estimator can be set via `bins`
31 | ggplot(df, aes(x, y)) +
32 |   geom_hdr(method = method_freqpoly(bins = c(8, 25))) +
33 |   geom_point(size = 1)
34 | 
35 | # Can also be used with `get_hdr()` for numerical summary of HDRs
36 | res <- get_hdr(df, method = method_freqpoly())
37 | str(res)
38 | 
39 | }
40 | \references{
41 | Scott, David W. Multivariate Density Estimation (2e), Wiley.
42 | }
43 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: ggdensity
 2 | Title: Interpretable Bivariate Density Visualization with 'ggplot2'
 3 | Version: 1.0.0.900
 4 | Authors@R: 
 5 |     c(person(given = "James",
 6 |            family = "Otto",
 7 |            role = c("aut", "cre"),
 8 |            email = "jamesotto852@gmail.com",
 9 |            comment = c(ORCID = "0000-0002-0665-2515")),
10 |       person(given = "David",
11 |            family = "Kahle",
12 |            role = c("aut"),
13 |            email = "david@kahle.io",
14 |            comment = c(ORCID = "0000-0002-9999-1558")))
15 | Description: The 'ggplot2' package provides simple functions for visualizing contours
16 |   of 2-d kernel density estimates. 'ggdensity' implements several additional density estimators 
17 |   as well as more interpretable visualizations based on highest density regions instead of
18 |   the traditional height of the estimated density surface. 
19 | License: MIT + file LICENSE
20 | Encoding: UTF-8
21 | Roxygen: list(markdown = TRUE)
22 | RoxygenNote: 7.2.3
23 | Depends:
24 |     ggplot2
25 | Imports:
26 |     isoband,
27 |     vctrs,
28 |     tibble,
29 |     MASS,
30 |     stats,
31 |     scales
32 | URL: https://jamesotto852.github.io/ggdensity/, https://github.com/jamesotto852/ggdensity/
33 | BugReports: https://github.com/jamesotto852/ggdensity/issues/
34 | Suggests: 
35 |     vdiffr,
36 |     testthat (>= 3.0.0),
37 |     knitr,
38 |     rmarkdown
39 | Config/testthat/edition: 3
40 | VignetteBuilder: knitr
41 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(GeomHdr)
 4 | export(GeomHdrFun)
 5 | export(GeomHdrLines)
 6 | export(GeomHdrLinesFun)
 7 | export(GeomHdrRug)
 8 | export(GeomHdrRugFun)
 9 | export(StatHdr)
10 | export(StatHdrFun)
11 | export(StatHdrLines)
12 | export(StatHdrLinesFun)
13 | export(StatHdrPoints)
14 | export(StatHdrPointsFun)
15 | export(StatHdrRug)
16 | export(StatHdrRugFun)
17 | export(geom_hdr)
18 | export(geom_hdr_fun)
19 | export(geom_hdr_lines)
20 | export(geom_hdr_lines_fun)
21 | export(geom_hdr_points)
22 | export(geom_hdr_points_fun)
23 | export(geom_hdr_rug)
24 | export(geom_hdr_rug_fun)
25 | export(get_hdr)
26 | export(get_hdr_1d)
27 | export(method_freqpoly)
28 | export(method_freqpoly_1d)
29 | export(method_histogram)
30 | export(method_histogram_1d)
31 | export(method_kde)
32 | export(method_kde_1d)
33 | export(method_mvnorm)
34 | export(method_norm_1d)
35 | export(stat_hdr)
36 | export(stat_hdr_fun)
37 | export(stat_hdr_lines)
38 | export(stat_hdr_lines_fun)
39 | export(stat_hdr_points)
40 | export(stat_hdr_points_fun)
41 | export(stat_hdr_rug)
42 | export(stat_hdr_rug_fun)
43 | import(ggplot2)
44 | importFrom(MASS,bandwidth.nrd)
45 | importFrom(MASS,kde2d)
46 | importFrom(scales,percent)
47 | importFrom(scales,percent_format)
48 | importFrom(stats,cor)
49 | importFrom(stats,cov)
50 | importFrom(stats,dnorm)
51 | importFrom(stats,pchisq)
52 | importFrom(stats,sd)
53 | importFrom(stats,setNames)
54 | importFrom(stats,uniroot)
55 | 


--------------------------------------------------------------------------------
/man/method_freqpoly_1d.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method_1d.R
 3 | \name{method_freqpoly_1d}
 4 | \alias{method_freqpoly_1d}
 5 | \title{Univariate frequency polygon HDR estimator}
 6 | \usage{
 7 | method_freqpoly_1d(bins = NULL)
 8 | }
 9 | \arguments{
10 | \item{bins}{Number of bins. Defaults to normal reference rule (Scott, pg 59).}
11 | }
12 | \description{
13 | Function used to specify univariate frequency polygon density estimator
14 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}).
15 | }
16 | \details{
17 | For more details on the use and implementation of the \verb{method_*_1d()} functions,
18 | see \code{vignette("method", "ggdensity")}.
19 | }
20 | \examples{
21 | df <- data.frame(x = rnorm(1e3))
22 | 
23 | # Strip chart to visualize 1-d data
24 | p <- ggplot(df, aes(x)) +
25 |   geom_jitter(aes(y = 0), width = 0, height = 2) +
26 |   scale_y_continuous(name = NULL, breaks = NULL) +
27 |   coord_cartesian(ylim = c(-3, 3))
28 | 
29 | p
30 | 
31 | p + geom_hdr_rug(method = method_freqpoly_1d())
32 | 
33 | # The resolution of the frequency polygon estimator can be set via `bins`
34 | p + geom_hdr_rug(method = method_freqpoly_1d(bins = 100))
35 | 
36 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
37 | res <- get_hdr_1d(df$x, method = method_freqpoly_1d())
38 | str(res)
39 | 
40 | }
41 | \references{
42 | Scott, David W. Multivariate Density Estimation (2e), Wiley.
43 | }
44 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     steps:
23 |       - uses: actions/checkout@v3
24 | 
25 |       - uses: r-lib/actions/setup-pandoc@v2
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::pkgdown, local::.
34 |           needs: website
35 | 
36 |       - name: Build site
37 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
38 |         shell: Rscript {0}
39 | 
40 |       - name: Deploy to GitHub pages 🚀
41 |         if: github.event_name != 'pull_request'
42 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
43 |         with:
44 |           clean: false
45 |           branch: gh-pages
46 |           folder: docs
47 | 


--------------------------------------------------------------------------------
/man/method_histogram_1d.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method_1d.R
 3 | \name{method_histogram_1d}
 4 | \alias{method_histogram_1d}
 5 | \title{Univariate histogram HDR estimator}
 6 | \usage{
 7 | method_histogram_1d(bins = NULL)
 8 | }
 9 | \arguments{
10 | \item{bins}{Number of bins. Defaults to normal reference rule (Scott, pg 59).}
11 | }
12 | \description{
13 | Function used to specify univariate histogram density estimator
14 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}).
15 | }
16 | \details{
17 | For more details on the use and implementation of the \verb{method_*_1d()} functions,
18 | see \code{vignette("method", "ggdensity")}.
19 | }
20 | \examples{
21 | # Histogram estimators can be useful when data has boundary constraints
22 | df <- data.frame(x = rexp(1e3))
23 | 
24 | # Strip chart to visualize 1-d data
25 | p <- ggplot(df, aes(x)) +
26 |   geom_jitter(aes(y = 0), width = 0, height = 2) +
27 |   scale_y_continuous(name = NULL, breaks = NULL) +
28 |   coord_cartesian(ylim = c(-3, 3))
29 | 
30 | p
31 | 
32 | p + geom_hdr_rug(method = method_histogram_1d())
33 | 
34 | # The resolution of the histogram estimator can be set via `bins`
35 | p + geom_hdr_rug(method = method_histogram_1d(bins = 5))
36 | 
37 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
38 | res <- get_hdr_1d(df$x, method = method_histogram_1d())
39 | str(res)
40 | 
41 | }
42 | \references{
43 | Scott, David W. Multivariate Density Estimation (2e), Wiley.
44 | }
45 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload test results
46 |         if: failure()
47 |         uses: actions/upload-artifact@v3
48 |         with:
49 |           name: coverage-test-failures
50 |           path: ${{ runner.temp }}/package
51 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # ggdensity (development version)
 2 | 
 3 | * Fixed ordering of probabilities in the plot legend to be independent of order specified in `probs` argument (Reported by @z3tt #32)
 4 | 
 5 | # ggdensity 1.0.0
 6 | 
 7 | ## Features
 8 | 
 9 | * Added `get_hdr()` and `get_hdr_1d()` functions, 
10 | exporting implementation of HDR computations (Suggested by @eliocamp #28)
11 | 
12 | * Reworked `method` argument, allowing for either character or function call specification.
13 | Implemented related `method_*()` and `method_*_1d()` functions (e.g. `method_kde()` and `method_kde_1d()`).
14 | See `?get_hdr` or `vignette("method", "ggdensity")` for details (Suggested by @eliocamp #29)
15 | 
16 | * Added unit tests (Suggested by @eliocamp, #30)
17 | 
18 | ## Breaking Changes
19 | 
20 | * Removed arguments governing density estimators from `stat_hdr()` and other layer functions--these
21 | are now specified with `method_*()` and `method_*_1d()` functions
22 | 
23 | ## Fixes
24 | 
25 | * [Added support](https://www.tidyverse.org/blog/2022/08/ggplot2-3-4-0-size-to-linewidth/) for the new `linewidth` aesthetic (Reported by @eliocamp, #23)
26 | 
27 | # ggdensity 0.1.1
28 | 
29 | ## Fixes
30 | 
31 | * Removed **ggplot2** build-time dependencies (Reported by @thomasp85, #21)
32 | 
33 | * Fixed bug in `stat_hdr_lines_fun()` which drew lines between components of disconnected HDRs (Reported by @afranks86, #20)
34 | 
35 | 
36 | # ggdensity 0.1.0
37 | 
38 | ## Features
39 | 
40 | * Added `geom`/`stat_hdr_rug()` for visualizing marginal HDRs via "rug plot"
41 | style graphics along plot axes (#14)
42 | 
43 | * Added `geom`/`stat_hdr_points()` and `geom`/`stat_hdr_points_fun()` for 
44 | visualizing HDR membership of points via a colored scatterplot (#15)
45 | 
46 | ## Fixes
47 | 
48 | * Changed name of computed variable in all stat functions from `level` to `probs`
49 | 


--------------------------------------------------------------------------------
/man/method_kde.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method.R
 3 | \name{method_kde}
 4 | \alias{method_kde}
 5 | \title{Bivariate kernel density HDR estimator}
 6 | \usage{
 7 | method_kde(h = NULL, adjust = c(1, 1))
 8 | }
 9 | \arguments{
10 | \item{h}{Bandwidth (vector of length two). If \code{NULL}, estimated
11 | using \code{\link[MASS:bandwidth.nrd]{MASS::bandwidth.nrd()}}.}
12 | 
13 | \item{adjust}{A multiplicative bandwidth adjustment to be used if 'h' is
14 | 'NULL'. This makes it possible to adjust the bandwidth while still
15 | using the a bandwidth estimator. For example, \code{adjust = 1/2} means
16 | use half of the default bandwidth.}
17 | }
18 | \description{
19 | Function used to specify bivariate kernel density estimator
20 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}).
21 | }
22 | \details{
23 | For more details on the use and implementation of the \verb{method_*()} functions,
24 | see \code{vignette("method", "ggdensity")}.
25 | }
26 | \examples{
27 | set.seed(1)
28 | df <- data.frame(x = rnorm(1e3, sd = 3), y = rnorm(1e3, sd = 3))
29 | 
30 | ggplot(df, aes(x, y)) +
31 |   geom_hdr(method = method_kde()) +
32 |   geom_point(size = 1)
33 | 
34 | # The defaults of `method_kde()` are the same as the estimator for `ggplot2::geom_density_2d()`
35 | ggplot(df, aes(x, y)) +
36 |   geom_density_2d_filled() +
37 |   geom_hdr_lines(method = method_kde(), probs = seq(.1, .9, by = .1)) +
38 |   theme(legend.position = "none")
39 | 
40 | # The bandwidth of the estimator can be set directly with `h` or scaled with `adjust`
41 | ggplot(df, aes(x, y)) +
42 |   geom_hdr(method = method_kde(h = 1)) +
43 |   geom_point(size = 1)
44 | 
45 | ggplot(df, aes(x, y)) +
46 |   geom_hdr(method = method_kde(adjust = 1/2)) +
47 |   geom_point(size = 1)
48 | 
49 | # Can also be used with `get_hdr()` for numerical summary of HDRs
50 | res <- get_hdr(df, method = method_kde())
51 | str(res)
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/R/hdr_lines.R:
--------------------------------------------------------------------------------
 1 | #' @rdname geom_hdr
 2 | #' @usage NULL
 3 | #' @export
 4 | stat_hdr_lines <- function(mapping = NULL, data = NULL,
 5 |                            geom = "hdr_lines", position = "identity",
 6 |                            ...,
 7 |                            method = "kde",
 8 |                            probs = c(.99, .95, .8, .5),
 9 |                            n = 100,
10 |                            xlim = NULL,
11 |                            ylim = NULL,
12 |                            na.rm = FALSE,
13 |                            show.legend = NA,
14 |                            inherit.aes = TRUE) {
15 |   layer(
16 |     data = data,
17 |     mapping = mapping,
18 |     stat = StatHdrLines,
19 |     geom = geom,
20 |     position = position,
21 |     show.legend = show.legend,
22 |     inherit.aes = inherit.aes,
23 |     params = list(
24 |       method = method,
25 |       probs = probs,
26 |       n = n,
27 |       xlim = xlim,
28 |       ylim = ylim,
29 |       na.rm = na.rm,
30 |       ...
31 |     )
32 |   )
33 | }
34 | 
35 | 
36 | #' @rdname geom_hdr
37 | #' @format NULL
38 | #' @usage NULL
39 | #' @importFrom scales percent_format
40 | #' @export
41 | StatHdrLines <- ggproto("StatHdrLines", StatHdr,
42 |   output = "lines"
43 | )
44 | 
45 | 
46 | #' @rdname geom_hdr
47 | #' @usage NULL
48 | #' @export
49 | geom_hdr_lines <- function(mapping = NULL, data = NULL,
50 |                            stat = "hdr_lines", position = "identity",
51 |                            ...,
52 |                            na.rm = FALSE,
53 |                            show.legend = NA,
54 |                            inherit.aes = TRUE) {
55 |   layer(
56 |     data = data,
57 |     mapping = mapping,
58 |     stat = stat,
59 |     geom = GeomHdrLines,
60 |     position = position,
61 |     show.legend = show.legend,
62 |     inherit.aes = inherit.aes,
63 |     params = list(
64 |       na.rm = na.rm,
65 |       ...
66 |     )
67 |   )
68 | }
69 | 
70 | 
71 | #' @rdname geom_hdr
72 | #' @format NULL
73 | #' @usage NULL
74 | #' @export
75 | GeomHdrLines <- ggproto("GeomHdrLines", GeomPath,
76 |   default_aes = aes(
77 |     colour = "#000000",
78 |     linewidth = 1,
79 |     linetype = 1,
80 |     alpha = NA
81 |   ))
82 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Platform
 2 | 
 3 | |field    |value                                                                |
 4 | |:--------|:--------------------------------------------------------------------|
 5 | |version  |R version 4.2.1 (2022-06-23)                                         |
 6 | |os       |Ubuntu 20.04.3 LTS                                                   |
 7 | |system   |x86_64, linux-gnu                                                    |
 8 | |ui       |RStudio                                                              |
 9 | |language |(EN)                                                                 |
10 | |collate  |C.UTF-8                                                              |
11 | |ctype    |C.UTF-8                                                              |
12 | |tz       |Etc/UTC                                                              |
13 | |date     |2023-02-09                                                           |
14 | |rstudio  |2022.07.0+548 Spotted Wakerobin (server)                             |
15 | |pandoc   |2.18 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown) |
16 | 
17 | # Dependencies
18 | 
19 | |package      |old   |new   |Δ  |
20 | |:------------|:-----|:-----|:--|
21 | |ggdensity    |0.1.1 |1.0.0 |*  |
22 | |cli          |NA    |3.6.0 |*  |
23 | |colorspace   |NA    |2.1-0 |*  |
24 | |fansi        |NA    |1.0.4 |*  |
25 | |farver       |NA    |2.1.1 |*  |
26 | |ggplot2      |NA    |3.4.0 |*  |
27 | |glue         |NA    |1.6.2 |*  |
28 | |gtable       |NA    |0.3.1 |*  |
29 | |isoband      |NA    |0.2.7 |*  |
30 | |lifecycle    |NA    |1.0.3 |*  |
31 | |magrittr     |NA    |2.0.3 |*  |
32 | |pillar       |NA    |1.8.1 |*  |
33 | |RColorBrewer |NA    |1.1-3 |*  |
34 | |rlang        |NA    |1.0.6 |*  |
35 | |scales       |NA    |1.2.1 |*  |
36 | |tibble       |NA    |3.1.8 |*  |
37 | |utf8         |NA    |1.2.3 |*  |
38 | |vctrs        |NA    |0.5.2 |*  |
39 | |viridisLite  |NA    |0.4.1 |*  |
40 | |withr        |NA    |2.5.0 |*  |
41 | 
42 | # Revdeps
43 | 
44 | ## Failed to check (1)
45 | 
46 | |package   |version |error |warning |note |
47 | |:---------|:-------|:-----|:-------|:----|
48 | |ggblanket |?       |      |        |     |
49 | 
50 | 


--------------------------------------------------------------------------------
/R/hdr_lines_fun.R:
--------------------------------------------------------------------------------
 1 | #' @rdname geom_hdr_fun
 2 | #' @usage NULL
 3 | #' @export
 4 | stat_hdr_lines_fun <- function(mapping = NULL, data = NULL,
 5 |                                geom = "hdr_lines_fun", position = "identity",
 6 |                                ...,
 7 |                                fun, args = list(),
 8 |                                probs = c(.99, .95, .8, .5),
 9 |                                xlim = NULL, ylim = NULL, n = 100,
10 |                                na.rm = FALSE,
11 |                                show.legend = NA,
12 |                                inherit.aes = TRUE) {
13 | 
14 |   if (is.null(data)) data <- ensure_nonempty_data
15 | 
16 |   layer(
17 |     data = data,
18 |     mapping = mapping,
19 |     stat = StatHdrLinesFun,
20 |     geom = geom,
21 |     position = position,
22 |     show.legend = show.legend,
23 |     inherit.aes = inherit.aes,
24 |     params = list(
25 |       fun = fun,
26 |       args = args,
27 |       probs = probs,
28 |       xlim = xlim,
29 |       ylim = ylim,
30 |       n = n,
31 |       na.rm = na.rm,
32 |       ...
33 |     )
34 |   )
35 | }
36 | 
37 | 
38 | 
39 | #' @rdname geom_hdr_fun
40 | #' @format NULL
41 | #' @usage NULL
42 | #' @importFrom scales percent
43 | #' @export
44 | StatHdrLinesFun <- ggproto("StatHdrLinesFun", StatHdrFun,
45 |   output = "lines"
46 | )
47 | 
48 | 
49 | #' @rdname geom_hdr_fun
50 | #' @usage NULL
51 | #' @export
52 | geom_hdr_lines_fun <- function(mapping = NULL, data = NULL,
53 |                          stat = "hdr_lines_fun", position = "identity",
54 |                          ...,
55 |                          na.rm = FALSE,
56 |                          show.legend = NA,
57 |                          inherit.aes = TRUE) {
58 | 
59 |   if (is.null(data)) data <- ensure_nonempty_data
60 | 
61 |   layer(
62 |     data = data,
63 |     mapping = mapping,
64 |     stat = stat,
65 |     geom = GeomHdrLinesFun,
66 |     position = position,
67 |     show.legend = show.legend,
68 |     inherit.aes = inherit.aes,
69 |     params = list(
70 |       na.rm = na.rm,
71 |       ...
72 |     )
73 |   )
74 | }
75 | 
76 | 
77 | 
78 | #' @rdname geom_hdr_fun
79 | #' @format NULL
80 | #' @usage NULL
81 | #' @export
82 | GeomHdrLinesFun <- ggproto("GeomHdrlinesfun", GeomHdrLines)
83 | 


--------------------------------------------------------------------------------
/man/method_histogram.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method.R
 3 | \name{method_histogram}
 4 | \alias{method_histogram}
 5 | \title{Bivariate histogram HDR estimator}
 6 | \usage{
 7 | method_histogram(bins = NULL, smooth = FALSE, nudgex = "none", nudgey = "none")
 8 | }
 9 | \arguments{
10 | \item{bins}{Number of bins along each axis.
11 | Either a vector of length 2 or a scalar value which is recycled for both dimensions.
12 | Defaults to normal reference rule (Scott, pg 87).}
13 | 
14 | \item{smooth}{If \code{TRUE}, HDRs are smoothed by the marching squares algorithm.}
15 | 
16 | \item{nudgex, nudgey}{Horizontal and vertical rules for choosing witness points when \code{smooth == TRUE}.
17 | Accepts character vector: \code{"left"}, \code{"none"}, \code{"right"} (\code{nudgex}) or  \code{"down"}, \code{"none"}, \code{"up"} (\code{nudgey}).}
18 | }
19 | \description{
20 | Function used to specify bivariate histogram density estimator
21 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}).
22 | }
23 | \details{
24 | For more details on the use and implementation of the \verb{method_*()} functions,
25 | see \code{vignette("method", "ggdensity")}.
26 | }
27 | \examples{
28 | \dontrun{
29 | 
30 | # Histogram estimators can be useful when data has boundary constraints
31 | set.seed(1)
32 | df <- data.frame(x = rexp(1e3), y = rexp(1e3))
33 | 
34 | ggplot(df, aes(x, y)) +
35 |   geom_hdr(method = method_histogram()) +
36 |   geom_point(size = 1)
37 | 
38 | # The resolution of the histogram estimator can be set via `bins`
39 | ggplot(df, aes(x, y)) +
40 |   geom_hdr(method = method_histogram(bins = c(8, 25))) +
41 |   geom_point(size = 1)
42 | 
43 | # By setting `smooth = TRUE`, we can graphically smooth the "blocky" HDRs
44 | ggplot(df, aes(x, y)) +
45 |   geom_hdr(method = method_histogram(smooth = TRUE)) +
46 |   geom_point(size = 1)
47 | 
48 | # However, we need to set `nudgex` and `nudgey` to align the HDRs correctly
49 | ggplot(df, aes(x, y)) +
50 |   geom_hdr(method = method_histogram(smooth = TRUE, nudgex = "left", nudgey = "down")) +
51 |   geom_point(size = 1)
52 | 
53 | # Can also be used with `get_hdr()` for numerical summary of HDRs
54 | res <- get_hdr(df, method = method_histogram())
55 | str(res)
56 | }
57 | 
58 | }
59 | \references{
60 | Scott, David W. Multivariate Density Estimation (2e), Wiley.
61 | }
62 | 


--------------------------------------------------------------------------------
/R/helpers-ggplot2.R:
--------------------------------------------------------------------------------
 1 | # unexported functions from ggplot2
 2 | 
 3 | `%||%` <- function(x, y) {
 4 |   if (is.null(x)) y else x
 5 | }
 6 | 
 7 | tibble0 <- function(...) {
 8 |   tibble::tibble(..., .name_repair = "minimal")
 9 | }
10 | 
11 | unique0 <- function(x, ...) {
12 |   if (is.null(x)) x else vctrs::vec_unique(x, ...)
13 | }
14 | 
15 | isoband_z_matrix <- function(data) {
16 |   x_pos <- as.integer(factor(data$x, levels = sort(unique0(data$x))))
17 |   y_pos <- as.integer(factor(data$y, levels = sort(unique0(data$y))))
18 |   nrow <- max(y_pos)
19 |   ncol <- max(x_pos)
20 |   raster <- matrix(NA_real_, nrow = nrow, ncol = ncol)
21 |   raster[cbind(y_pos, x_pos)] <- data$z
22 |   raster
23 | }
24 | 
25 | xyz_to_isobands <- function(data, breaks) {
26 |   isoband::isobands(x = sort(unique0(data$x)), y = sort(unique0(data$y)),
27 |                     z = isoband_z_matrix(data), levels_low = breaks[-length(breaks)],
28 |                     levels_high = breaks[-1])
29 | }
30 | 
31 | xyz_to_isolines <- function(data, breaks) {
32 |   isoband::isolines(x = sort(unique0(data$x)), y = sort(unique0(data$y)),
33 |                     z = isoband_z_matrix(data), levels = breaks)
34 | }
35 | 
36 | iso_to_polygon <- function(iso, group = 1) {
37 |   lengths <- vapply(iso, function(x) length(x$x), integer(1))
38 |   if (all(lengths == 0)) {
39 |     warning("Zero contours were generated")
40 |     return(tibble0())
41 |   }
42 |   levels <- names(iso)
43 |   xs <- unlist(lapply(iso, "[[", "x"), use.names = FALSE)
44 |   ys <- unlist(lapply(iso, "[[", "y"), use.names = FALSE)
45 |   ids <- unlist(lapply(iso, "[[", "id"), use.names = FALSE)
46 |   item_id <- rep(seq_along(iso), lengths)
47 |   groups <- paste(group, sprintf("%03d", item_id), sep = "-")
48 |   groups <- factor(groups)
49 |   tibble0(level = rep(levels, lengths), x = xs, y = ys,
50 |               piece = as.integer(groups), group = groups, subgroup = ids,
51 |               .size = length(xs))
52 | }
53 | 
54 | iso_to_path <- function(iso, group = 1) {
55 |   lengths <- vapply(iso, function(x) length(x$x), integer(1))
56 |   if (all(lengths == 0)) {
57 |     warning("Zero contours were generated")
58 |     return(tibble0())
59 |   }
60 |   levels <- names(iso)
61 |   xs <- unlist(lapply(iso, "[[", "x"), use.names = FALSE)
62 |   ys <- unlist(lapply(iso, "[[", "y"), use.names = FALSE)
63 |   ids <- unlist(lapply(iso, "[[", "id"), use.names = FALSE)
64 |   item_id <- rep(seq_along(iso), lengths)
65 |   groups <- paste(group, sprintf("%03d", item_id), sprintf("%03d",
66 |                                                            ids), sep = "-")
67 |   groups <- factor(groups)
68 |   tibble0(level = rep(levels, lengths), x = xs, y = ys,
69 |               piece = as.integer(groups), group = groups, .size = length(xs))
70 | }
71 | 
72 | empty <- function(df) {
73 |   is.null(df) || nrow(df) == 0 || ncol(df) == 0 || inherits(df, "waiver")
74 | }
75 | 
76 | ensure_nonempty_data <- function(data) {
77 |   if (empty(data)) {
78 |     tibble0(group = 1, .size = 1)
79 |   }
80 |   else {
81 |     data
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_hdr_1d.R:
--------------------------------------------------------------------------------
 1 | test_that("structure of get_hdr_1d() return value is as expected", {
 2 | 
 3 |   x <- 1:10
 4 | 
 5 |   res <- get_hdr_1d(x)
 6 | 
 7 |   # Checking the top level of res
 8 |   expect_type(res, "list")
 9 |   expect_equal(length(res), 3)
10 |   expect_equal(names(res), c("df_est", "breaks", "data"))
11 | 
12 |   # Checking res$df_est:
13 |   expect_type(res$df_est, "list")
14 |   expect_equal(ncol(res$df_est), 4)
15 |   expect_equal(colnames(res$df_est), c("x", "fhat", "fhat_discretized", "hdr"))
16 | 
17 |   # Checking res$data
18 |   expect_type(res$data, "list")
19 |   expect_equal(ncol(res$data), 2)
20 |   expect_equal(nrow(res$data), 10)
21 |   expect_equal(colnames(res$data), c("x", "hdr_membership"))
22 | 
23 |   # Checking res$breaks
24 |   expect_type(res$breaks, "double")
25 |   expect_equal(length(res$breaks), 5)
26 |   expect_equal(names(res$breaks), c("99%", "95%", "80%", "50%", NA))
27 | 
28 | })
29 | 
30 | test_that("`method` can be provided as a character vector or function", {
31 | 
32 |   x <- 1:10
33 | 
34 |   expect_equal(get_hdr_1d(x, "kde"), get_hdr_1d(x, method_kde_1d()))
35 |   expect_equal(get_hdr_1d(x, "norm"), get_hdr_1d(x, method_norm_1d()))
36 |   expect_equal(get_hdr_1d(x, "freqpoly"), get_hdr_1d(x, method_freqpoly_1d()))
37 |   expect_equal(get_hdr_1d(x, "histogram"), get_hdr_1d(x, method_histogram_1d()))
38 | 
39 | })
40 | 
41 | test_that("get_hdr() errors informatively if bad `method` argument", {
42 | 
43 |   x <- 1:10
44 | 
45 |   expect_error(get_hdr_1d(x, method = "not-a-method"), regexp = "Invalid method specified")
46 |   expect_error(get_hdr_1d(x, method = method_kde_1d), regexp = "did you forget")
47 |   expect_error(get_hdr_1d(x, method = method_kde()), regexp = "1d")
48 | 
49 | })
50 | 
51 | test_that("get_hdr_1d() fails if `method != 'fun' and `x` isn't provided", {
52 | 
53 |   expect_error(get_hdr_1d(method = method_kde_1d()), regexp = ".x. must be provided")
54 | 
55 | })
56 | 
57 | test_that("fun argument of get_hdr_1d() requires range", {
58 | 
59 |   expect_error(get_hdr_1d(method = "fun", fun = dexp), regexp = ".range. must be provided")
60 | 
61 | })
62 | 
63 | 
64 | test_that("fun argument of get_hdr_1d() works", {
65 | 
66 |   res <- get_hdr_1d(method = "fun", fun = dexp, range = c(0, 10))
67 | 
68 |   # Structure of res is as expected
69 |   expect_type(res, "list")
70 |   expect_equal(length(res), 3)
71 |   expect_equal(names(res), c("df_est", "breaks", "data"))
72 | 
73 |   expect_null(res$data)
74 | 
75 |   # fhat_discretized should be normalized to sum to 1
76 |   expect_equal(sum(res$df_est$fhat_discretized), 1)
77 | 
78 |   expect_equal(range(res$df_est$x), c(0, 10))
79 | 
80 |   # default grid is 512:
81 |   expect_equal(nrow(res$df_est), 512)
82 | 
83 |   # Checksums:
84 |   expect_equal(round(sum(res$df_est$fhat)), 52)
85 |   expect_equal(as.numeric(round(res$breaks, 4)), c(0.0101, 0.0501, 0.201, 0.5041, Inf))
86 |   expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1))
87 |   expect_equal(as.numeric(table(res$df_est$hdr)), c(36, 47, 71, 82, 276))
88 | })
89 | 
90 | 


--------------------------------------------------------------------------------
/man/method_kde_1d.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/method_1d.R
 3 | \name{method_kde_1d}
 4 | \alias{method_kde_1d}
 5 | \title{Univariate kernel density HDR estimator}
 6 | \usage{
 7 | method_kde_1d(
 8 |   bw = "nrd0",
 9 |   adjust = 1,
10 |   kernel = "gaussian",
11 |   weights = NULL,
12 |   window = kernel
13 | )
14 | }
15 | \arguments{
16 | \item{bw}{the smoothing bandwidth to be used.  The kernels are scaled
17 |     such that this is the standard deviation of the smoothing kernel.
18 |     (Note this differs from the reference books cited below, and from S-PLUS.)
19 | 
20 |     \code{bw} can also be a character string giving a rule to choose the
21 |     bandwidth.  See \code{\link[stats]{bw.nrd}}. \cr The default,
22 |     \code{"nrd0"}, has remained the default for historical and
23 |     compatibility reasons, rather than as a general recommendation,
24 |     where e.g., \code{"SJ"} would rather fit, see also Venables and
25 |     Ripley (2002).
26 | 
27 |     The specified (or computed) value of \code{bw} is multiplied by
28 |     \code{adjust}.
29 |   }
30 | 
31 | \item{adjust}{the bandwidth used is actually \code{adjust*bw}.
32 |     This makes it easy to specify values like \sQuote{half the default}
33 |     bandwidth.}
34 | 
35 | \item{kernel, window}{a character string giving the smoothing kernel
36 |     to be used. This must partially match one of \code{"gaussian"},
37 |     \code{"rectangular"}, \code{"triangular"}, \code{"epanechnikov"},
38 |     \code{"biweight"}, \code{"cosine"} or \code{"optcosine"}, with default
39 |     \code{"gaussian"}, and may be abbreviated to a unique prefix (single
40 |     letter).
41 | 
42 |     \code{"cosine"} is smoother than \code{"optcosine"}, which is the
43 |     usual \sQuote{cosine} kernel in the literature and almost MSE-efficient.
44 |     However, \code{"cosine"} is the version used by S.
45 |   }
46 | 
47 | \item{weights}{numeric vector of non-negative observation weights,
48 |     hence of same length as \code{x}. The default \code{NULL} is
49 |     equivalent to \code{weights = rep(1/nx, nx)} where \code{nx} is the
50 |     length of (the finite entries of) \code{x[]}.  If \code{na.rm = TRUE}
51 |     and there are \code{NA}'s in \code{x}, they \emph{and} the
52 |     corresponding weights are removed before computations.  In that case,
53 |     when the original weights have summed to one, they are re-scaled to
54 |     keep doing so.}
55 | }
56 | \description{
57 | Function used to specify univariate kernel density estimator
58 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}).
59 | }
60 | \details{
61 | For more details on the use and implementation of the \verb{method_*_1d()} functions,
62 | see \code{vignette("method", "ggdensity")}.
63 | }
64 | \examples{
65 | df <- data.frame(x = rnorm(1e3, sd = 3))
66 | 
67 | ggplot(df, aes(x)) +
68 |   geom_hdr_rug(method = method_kde_1d()) +
69 |   geom_density()
70 | 
71 | # Details of the KDE can be adjusted with arguments to `method_kde_1d()`
72 | ggplot(df, aes(x)) +
73 |   geom_hdr_rug(method = method_kde_1d(adjust = 1/5)) +
74 |   geom_density(adjust = 1/5)
75 | 
76 | ggplot(df, aes(x)) +
77 |   geom_hdr_rug(method = method_kde_1d(kernel = "triangular")) +
78 |   geom_density(kernel = "triangular")
79 | 
80 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
81 | res <- get_hdr_1d(df$x, method = method_kde_1d())
82 | str(res)
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-visual-tests.R:
--------------------------------------------------------------------------------
 1 | ## Checking basic plots with vdiffr::expect_doppelganger()
 2 | 
 3 | test_that("Basic 2d HDRs render consistently", {
 4 | 
 5 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
 6 | 
 7 |   # geom/stat_hdr
 8 |   geom_hdr_ggplot <- ggplot(data, aes(x, y)) + geom_hdr()
 9 |   stat_hdr_ggplot <- ggplot(data, aes(x, y)) + stat_hdr()
10 |   vdiffr::expect_doppelganger("geom-hdr-ggplot", geom_hdr_ggplot)
11 |   vdiffr::expect_doppelganger("stat-hdr-ggplot", stat_hdr_ggplot)
12 | 
13 |   # geom/stat_hdr_lines
14 |   geom_hdr_lines_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_lines()
15 |   stat_hdr_lines_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_lines()
16 |   vdiffr::expect_doppelganger("geom-hdr_lines-ggplot", geom_hdr_lines_ggplot)
17 |   vdiffr::expect_doppelganger("stat-hdr_lines-ggplot", stat_hdr_lines_ggplot)
18 | 
19 |   # geom/stat_hdr_points
20 |   geom_hdr_points_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_points()
21 |   stat_hdr_points_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_points()
22 |   vdiffr::expect_doppelganger("geom-hdr-points-ggplot", geom_hdr_points_ggplot)
23 |   vdiffr::expect_doppelganger("stat-hdr-points-ggplot", stat_hdr_points_ggplot)
24 | 
25 |   # geom/stat_hdr_points_fun
26 |   geom_hdr_points_fun_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_points_fun(fun = function(x, y) dnorm(x) * dnorm(y))
27 |   stat_hdr_points_fun_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_points_fun(fun = function(x, y) dnorm(x) * dnorm(y))
28 |   vdiffr::expect_doppelganger("geom-hdr-points-ggplot", geom_hdr_points_ggplot)
29 |   vdiffr::expect_doppelganger("stat-hdr-points-ggplot", stat_hdr_points_ggplot)
30 | 
31 |   # geom/stat_hdr_fun
32 |   geom_hdr_fun_ggplot <- ggplot() +
33 |     geom_hdr_fun(fun = function(x, y) dnorm(x) * dnorm(y), xlim = c(-5, 5), ylim = c(-5, 5))
34 |   stat_hdr_fun_ggplot <- ggplot() +
35 |     stat_hdr_fun(fun = function(x, y) dnorm(x) * dnorm(y), xlim = c(-5, 5), ylim = c(-5, 5))
36 |   vdiffr::expect_doppelganger("geom-hdr-fun-ggplot", geom_hdr_fun_ggplot)
37 |   vdiffr::expect_doppelganger("stat-hdr-fun-ggplot", stat_hdr_fun_ggplot)
38 | 
39 | })
40 | 
41 | test_that("Basic 1d HDRs render consistently", {
42 | 
43 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
44 | 
45 |   # geom/stat_hdr_rug
46 |   geom_hdr_rug_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_rug()
47 |   stat_hdr_rug_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_rug()
48 |   vdiffr::expect_doppelganger("geom-hdr-rug-ggplot", geom_hdr_rug_ggplot)
49 |   vdiffr::expect_doppelganger("stat-hdr-rug-ggplot", stat_hdr_rug_ggplot)
50 | 
51 |   # geom/stat_hdr_rug_fun
52 |   geom_hdr_rug_fun_ggplot <- ggplot() +
53 |     geom_hdr_rug_fun(fun_x = dnorm, fun_y = dexp, xlim = c(-5, 5), ylim = c(0, 10))
54 |   stat_hdr_rug_fun_ggplot <- ggplot() +
55 |     stat_hdr_rug_fun(fun_x = dnorm, fun_y = dexp, xlim = c(-5, 5), ylim = c(0, 10))
56 | 
57 |   vdiffr::expect_doppelganger("geom-hdr-rug-fun-ggplot", geom_hdr_rug_fun_ggplot)
58 |   vdiffr::expect_doppelganger("stat-hdr-rug-fun-ggplot", stat_hdr_rug_fun_ggplot)
59 | 
60 | })
61 | 
62 | test_that("Specified order of probabilities doesn't impact legend ordering", {
63 | 
64 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
65 | 
66 |   geom_hdr_prob_order_ggplot <- ggplot(data, aes(x, y)) +
67 |     geom_hdr(probs = c(.25, .5, .75, .95))
68 | 
69 |   geom_hdr_rug_prob_order_ggplot <- ggplot(data, aes(x, y)) +
70 |     geom_hdr_rug(probs = c(.25, .5, .75, .95))
71 | 
72 |   vdiffr::expect_doppelganger("geom_hdr_prob_order_ggplot", geom_hdr_prob_order_ggplot)
73 |   vdiffr::expect_doppelganger("geom_hdr_rug_prob_order_ggplot", geom_hdr_rug_prob_order_ggplot)
74 | })
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/tests/testthat/test-layer-wrappers.R:
--------------------------------------------------------------------------------
 1 | test_that("wrapper functions for `layer()` are passing arguments on as expected", {
 2 | 
 3 |   df <- readRDS(test_path("fixtures", "df_norm.rds"))
 4 | 
 5 |   check_layer <- function(layer_fun, Geom, Stat, mapping = aes(x, y), data = df, ...) {
 6 | 
 7 |     hdr_layer <- layer_fun(data = data, mapping = mapping, ...)
 8 | 
 9 |     expect_type(hdr_layer, "environment")
10 |     expect_identical(hdr_layer$geom, Geom)
11 |     expect_identical(hdr_layer$stat, Stat)
12 |     expect_identical(hdr_layer$mapping, mapping)
13 | 
14 |   }
15 | 
16 |   # 2-d layer functions -----------------------------------------------------
17 | 
18 |   # geom/stat_hdr()
19 |   check_layer(geom_hdr, GeomHdr, StatHdr)
20 |   check_layer(stat_hdr, GeomHdr, StatHdr)
21 | 
22 |   # geom/stat_hdr_lines()
23 |   check_layer(geom_hdr_lines, GeomHdrLines, StatHdrLines)
24 |   check_layer(stat_hdr_lines, GeomHdrLines, StatHdrLines)
25 | 
26 |   # geom/stat_hdr_points()
27 |   check_layer(geom_hdr_points, GeomPoint, StatHdrPoints)
28 |   check_layer(stat_hdr_points, GeomPoint, StatHdrPoints)
29 | 
30 |   # geom/stat_hdr_lines_fun()
31 |   # -- stat_hdr_points_fun needs to have a `fun` arg provided
32 |   check_layer(geom_hdr_points_fun, GeomPoint, StatHdrPointsFun)
33 |   check_layer(stat_hdr_points_fun, GeomPoint, StatHdrPointsFun, fun = function(x, y) dnorm(x) * dnorm(y))
34 | 
35 |   # geom/stat_hdr_fun()
36 |   # (stat_hdr_fun needs to have a `fun` arg provided)
37 |   check_layer(geom_hdr_fun, GeomHdrFun, StatHdrFun)
38 |   check_layer(stat_hdr_fun, GeomHdrFun, StatHdrFun, fun = function(x, y) dnorm(x) * dnorm(y))
39 | 
40 |   # -- checking that data doesn't need to be provided
41 |   check_layer(geom_hdr_fun, GeomHdrFun, StatHdrFun, data = NULL, mapping = NULL)
42 |   check_layer(stat_hdr_fun, GeomHdrFun, StatHdrFun, data = NULL, mapping = NULL, fun = function(x, y) dnorm(x) * dnorm(y))
43 | 
44 |   # geom/stat_hdr_lines_fun()
45 |   # -- stat_hdr_lines_fun needs to have a `fun` arg provided
46 |   check_layer(geom_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun)
47 |   check_layer(stat_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun, fun = function(x, y) dnorm(x) * dnorm(y))
48 | 
49 |   # -- checking that data doesn't need to be provided
50 |   check_layer(geom_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun, data = NULL, mapping = NULL)
51 |   check_layer(stat_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun, data = NULL, mapping = NULL, fun = function(x, y) dnorm(x) * dnorm(y))
52 | 
53 |   # 1-d layer functions -----------------------------------------------------
54 | 
55 |   # geom/stat_hdr_rug()
56 |   check_layer(geom_hdr_rug, GeomHdrRug, StatHdrRug)
57 |   check_layer(stat_hdr_rug, GeomHdrRug, StatHdrRug)
58 | 
59 |   # -- checking that single x/y aesthetics are allowed:
60 |   check_layer(geom_hdr_rug, GeomHdrRug, StatHdrRug, mapping  = aes(x))
61 |   check_layer(stat_hdr_rug, GeomHdrRug, StatHdrRug, mapping  = aes(x))
62 |   check_layer(geom_hdr_rug, GeomHdrRug, StatHdrRug, mapping  = aes(y))
63 |   check_layer(stat_hdr_rug, GeomHdrRug, StatHdrRug, mapping  = aes(y))
64 | 
65 |   # geom/stat_hdr_rug_fun()
66 |   check_layer(geom_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun)
67 |   check_layer(stat_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun)
68 | 
69 |   # -- checking that single x/y aesthetics are allowed:
70 |   check_layer(geom_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping  = aes(x))
71 |   check_layer(stat_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping  = aes(x))
72 |   check_layer(geom_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping  = aes(y))
73 |   check_layer(stat_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping  = aes(y))
74 | 
75 | })
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/R/hdr_points.R:
--------------------------------------------------------------------------------
  1 | #' Scatterplot colored by highest density regions of a 2D density estimate
  2 | #'
  3 | #' Perform 2D density estimation, compute the resulting highest density regions (HDRs),
  4 | #' and plot the provided data as a scatterplot with points colored according to
  5 | #' their corresponding HDR.
  6 | #'
  7 | #' @section Aesthetics: geom_hdr_points understands the following aesthetics (required
  8 | #'   aesthetics are in bold):
  9 | #'
 10 | #'   - **x**
 11 | #'   - **y**
 12 | #'   - alpha
 13 | #'   - color
 14 | #'   - fill
 15 | #'   - group
 16 | #'   - linetype
 17 | #'   - size
 18 | #'   - subgroup
 19 | #'
 20 | #' @section Computed variables:
 21 | #'
 22 | #'   \describe{ \item{probs}{The probability associated with the highest density region, specified
 23 | #'   by `probs`.} }
 24 | #'
 25 | #' @inheritParams ggplot2::stat_identity
 26 | #' @inheritParams ggplot2::stat_density2d
 27 | #' @inheritParams geom_hdr
 28 | #'
 29 | #' @name geom_hdr_points
 30 | #' @rdname geom_hdr_points
 31 | #'
 32 | #' @import ggplot2
 33 | #'
 34 | #' @examples
 35 | #' set.seed(1)
 36 | #' df <- data.frame(x = rnorm(500), y = rnorm(500))
 37 | #' p <- ggplot(df, aes(x, y)) +
 38 | #'  coord_equal()
 39 | #'
 40 | #' p + geom_hdr_points()
 41 | #'
 42 | #' # Setting aes(fill = after_stat(probs)), color = "black", and
 43 | #' # shape = 21 helps alleviate overplotting:
 44 | #' p + geom_hdr_points(aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2)
 45 | #'
 46 | #' # Also works well with geom_hdr_lines()
 47 | #' p +
 48 | #'  geom_hdr_lines(
 49 | #'    aes(color = after_stat(probs)), alpha = 1,
 50 | #'    xlim = c(-5, 5), ylim = c(-5, 5)
 51 | #'  ) +
 52 | #'  geom_hdr_points(
 53 | #'    aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2,
 54 | #'    xlim = c(-5, 5), ylim = c(-5, 5)
 55 | #'  )
 56 | #'
 57 | NULL
 58 | 
 59 | 
 60 | 
 61 | #' @export
 62 | #' @rdname geom_hdr_points
 63 | stat_hdr_points <- function(mapping = NULL, data = NULL,
 64 |                             geom = "point", position = "identity",
 65 |                             ...,
 66 |                             method = "kde",
 67 |                             probs = c(.99, .95, .8, .5),
 68 |                             n = 100,
 69 |                             xlim = NULL,
 70 |                             ylim = NULL,
 71 |                             na.rm = FALSE,
 72 |                             show.legend = NA,
 73 |                             inherit.aes = TRUE) {
 74 |   layer(
 75 |     data = data,
 76 |     mapping = mapping,
 77 |     stat = StatHdrPoints,
 78 |     geom = geom,
 79 |     position = position,
 80 |     show.legend = show.legend,
 81 |     inherit.aes = inherit.aes,
 82 |     params = list(
 83 |       method = method,
 84 |       probs = probs,
 85 |       n = n,
 86 |       xlim = xlim,
 87 |       ylim = ylim,
 88 |       na.rm = na.rm,
 89 |       ...
 90 |     )
 91 |   )
 92 | }
 93 | 
 94 | 
 95 | #' @export
 96 | #' @rdname geom_hdr_points
 97 | #' @format NULL
 98 | #' @usage NULL
 99 | StatHdrPoints <- ggproto("StatHdrPoints", StatHdr,
100 |   default_aes = aes(order = after_stat(probs), color = after_stat(probs)),
101 |   output = "points"
102 | )
103 | 
104 | 
105 | #' @export
106 | #' @rdname geom_hdr_points
107 | geom_hdr_points <- function(mapping = NULL, data = NULL,
108 |                             stat = "hdr_points", position = "identity",
109 |                             ...,
110 |                             na.rm = FALSE,
111 |                             show.legend = NA,
112 |                             inherit.aes = TRUE) {
113 | 
114 |   layer(
115 |     data = data,
116 |     mapping = mapping,
117 |     stat = stat,
118 |     geom = GeomPoint,
119 |     position = position,
120 |     show.legend = show.legend,
121 |     inherit.aes = inherit.aes,
122 |     params = list(
123 |       na.rm = na.rm,
124 |       ...
125 |     )
126 |   )
127 | }
128 | 
129 | 


--------------------------------------------------------------------------------
/R/hdr_points_fun.R:
--------------------------------------------------------------------------------
  1 | #' Scatterplot colored by highest density regions of a bivariate pdf
  2 | #'
  3 | #' Compute the highest density regions (HDRs) of a bivariate pdf and plot the provided
  4 | #' data as a scatterplot with points colored according to their corresponding HDR.
  5 | #'
  6 | #' @section Aesthetics: geom_hdr_points_fun understands the following aesthetics
  7 | #'   (required aesthetics are in bold):
  8 | #'
  9 | #'   - **x**
 10 | #'   - **y**
 11 | #'   - alpha
 12 | #'   - color
 13 | #'   - fill
 14 | #'   - group
 15 | #'   - linetype
 16 | #'   - size
 17 | #'   - subgroup
 18 | #'
 19 | #' @section Computed variables:
 20 | #'
 21 | #'   \describe{ \item{probs}{The probability associated with the highest density region, specified
 22 | #'   by `probs`.} }
 23 | #'
 24 | #' @inheritParams ggplot2::stat_identity
 25 | #' @inheritParams ggplot2::stat_density2d
 26 | #' @inheritParams geom_hdr_fun
 27 | #'
 28 | #' @name geom_hdr_points_fun
 29 | #' @rdname geom_hdr_points_fun
 30 | #'
 31 | #' @import ggplot2
 32 | #'
 33 | #' @examples
 34 | #' # Can plot points colored according to known pdf:
 35 | #' set.seed(1)
 36 | #' df <- data.frame(x = rexp(1000), y = rexp(1000))
 37 | #' f <- function(x, y) dexp(x) * dexp(y)
 38 | #'
 39 | #' ggplot(df, aes(x, y)) +
 40 | #'   geom_hdr_points_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10))
 41 | #'
 42 | #'
 43 | #' # Also allows for hdrs of a custom parametric model
 44 | #'
 45 | #' # generate example data
 46 | #' n <- 1000
 47 | #' th_true <- c(3, 8)
 48 | #'
 49 | #' rdata <- function(n, th) {
 50 | #'   gen_single_obs <- function(th) {
 51 | #'     rchisq(2, df = th) # can be anything
 52 | #'   }
 53 | #'   df <- replicate(n, gen_single_obs(th))
 54 | #'   setNames(as.data.frame(t(df)), c("x", "y"))
 55 | #' }
 56 | #' data <- rdata(n, th_true)
 57 | #'
 58 | #' # estimate unknown parameters via maximum likelihood
 59 | #' likelihood <- function(th) {
 60 | #'   th <- abs(th) # hack to enforce parameter space boundary
 61 | #'   log_f <- function(v) {
 62 | #'     x <- v[1]; y <- v[2]
 63 | #'     dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE)
 64 | #'   }
 65 | #'   sum(apply(data, 1, log_f))
 66 | #' }
 67 | #' (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par)
 68 | #'
 69 | #' # plot f for the give model
 70 | #' f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2])
 71 | #'
 72 | #' ggplot(data, aes(x, y)) +
 73 | #'   geom_hdr_points_fun(fun = f, args = list(th = th_hat))
 74 | #'
 75 | #' ggplot(data, aes(x, y)) +
 76 | #'   geom_hdr_points_fun(aes(fill = after_stat(probs)), shape = 21, color = "black",
 77 | #'     fun = f, args = list(th = th_hat), na.rm = TRUE) +
 78 | #'   geom_hdr_lines_fun(aes(color = after_stat(probs)), alpha = 1, fun = f, args = list(th = th_hat)) +
 79 | #'   lims(x = c(0, 15), y = c(0, 25))
 80 | #'
 81 | NULL
 82 | 
 83 | 
 84 | #' @export
 85 | #' @rdname geom_hdr_points_fun
 86 | stat_hdr_points_fun <- function(mapping = NULL, data = NULL,
 87 |                                 geom = "point", position = "identity",
 88 |                                 ...,
 89 |                                 fun, args = list(),
 90 |                                 probs = c(.99, .95, .8, .5),
 91 |                                 xlim = NULL, ylim = NULL, n = 100,
 92 |                                 na.rm = FALSE,
 93 |                                 show.legend = NA,
 94 |                                 inherit.aes = TRUE) {
 95 | 
 96 |   layer(
 97 |     data = data,
 98 |     mapping = mapping,
 99 |     stat = StatHdrPointsFun,
100 |     geom = geom,
101 |     position = position,
102 |     show.legend = show.legend,
103 |     inherit.aes = inherit.aes,
104 |     params = list(
105 |       fun = fun,
106 |       args = args,
107 |       probs = probs,
108 |       xlim = xlim,
109 |       ylim = ylim,
110 |       n = n,
111 |       na.rm = na.rm,
112 |       ...
113 |     )
114 |   )
115 | }
116 | 
117 | 
118 | #' @export
119 | #' @format NULL
120 | #' @usage NULL
121 | #' @rdname geom_hdr_points_fun
122 | StatHdrPointsFun <- ggproto("StatHdrPointsFun", StatHdrFun,
123 |   default_aes = aes(order = after_stat(probs), color = after_stat(probs)),
124 |   output = "points"
125 | )
126 | 
127 | #' @export
128 | #' @rdname geom_hdr_points_fun
129 | geom_hdr_points_fun <- function(mapping = NULL, data = NULL,
130 |                                 stat = "hdr_points_fun", position = "identity",
131 |                                 ...,
132 |                                 na.rm = FALSE,
133 |                                 show.legend = NA,
134 |                                 inherit.aes = TRUE) {
135 | 
136 |   if (is.null(data)) data <- ensure_nonempty_data
137 | 
138 |   layer(
139 |     data = data,
140 |     mapping = mapping,
141 |     stat = stat,
142 |     geom = GeomPoint,
143 |     position = position,
144 |     show.legend = show.legend,
145 |     inherit.aes = inherit.aes,
146 |     params = list(
147 |       na.rm = na.rm,
148 |       ...
149 |     )
150 |   )
151 | }
152 | 
153 | 


--------------------------------------------------------------------------------
/man/get_hdr_1d.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/get_hdr_1d.R
  3 | \name{get_hdr_1d}
  4 | \alias{get_hdr_1d}
  5 | \title{Computing the highest density regions of a 1D density}
  6 | \usage{
  7 | get_hdr_1d(
  8 |   x = NULL,
  9 |   method = "kde",
 10 |   probs = c(0.99, 0.95, 0.8, 0.5),
 11 |   n = 512,
 12 |   range = NULL,
 13 |   hdr_membership = TRUE,
 14 |   fun,
 15 |   args = list()
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{x}{A vector of data}
 20 | 
 21 | \item{method}{Either a character (\code{"kde"}, \code{"norm"}, \code{"histogram"}, \code{"freqpoly"}, or \code{"fun"}) or \verb{method_*_1d()} function.
 22 | See the "The \code{method} argument" section below for details.}
 23 | 
 24 | \item{probs}{Probabilities to compute HDRs for.}
 25 | 
 26 | \item{n}{Resolution of grid representing estimated density and HDRs.}
 27 | 
 28 | \item{range}{Range of grid representing estimated density and HDRs.}
 29 | 
 30 | \item{hdr_membership}{Should HDR membership of data points (\code{x}) be computed?}
 31 | 
 32 | \item{fun}{Optional, a probability density function, must be vectorized in its first argument.
 33 | See the "The \code{fun} argument" section below for details.}
 34 | 
 35 | \item{args}{Optional, a list of arguments to be provided to \code{fun}.}
 36 | }
 37 | \value{
 38 | \code{get_hdr_1d} returns a list with elements \code{df_est} (\code{data.frame}), \code{breaks} (named \code{numeric}), and \code{data} (\code{data.frame}).
 39 | \itemize{
 40 | \item \code{df_est}: the estimated HDRs and density evaluated on the grid defined by \code{range} and \code{n}.
 41 | The column of estimated HDRs (\code{df_est$hdr}) is a numeric vector with values from \code{probs}.
 42 | The columns \code{df_est$fhat} and \code{df_est$fhat_discretized} correspond to the estimated density
 43 | on the original scale and rescaled to sum to 1, respectively.
 44 | \item \code{breaks}: the heights of the estimated density (\code{df_est$fhat}) corresponding to the HDRs specified by \code{probs}.
 45 | Will always have additional element \code{Inf} representing the cutoff for the 100\% HDR.
 46 | \item \code{data}: the original data provided in the \code{data} argument.
 47 | If \code{hdr_membership} is set to \code{TRUE}, this includes a column (\code{data$hdr_membership})
 48 | with the HDR corresponding to each data point.
 49 | }
 50 | }
 51 | \description{
 52 | \code{get_hdr_1d} is used to estimate a 1-dimensional density and compute corresponding HDRs.
 53 | The estimated density and HDRs are represented in a discrete form as a grid, defined by arguments \code{range} and \code{n}.
 54 | \code{get_hdr_1d} is used internally by layer functions \code{stat_hdr_rug()} and \code{stat_hdr_rug_fun()}.
 55 | }
 56 | \section{The \code{method} argument}{
 57 | 
 58 | The density estimator used to estimate the HDRs is specified with the \code{method} argument.
 59 | The simplest way to specify an estimator is to provide a character value to \code{method},
 60 | for example \code{method = "kde"} specifies a kernel density estimator.
 61 | However, this specification is limited to the default behavior of the estimator.
 62 | 
 63 | Instead, it is possible to provide a function call, for example: \code{method = method_kde_1d()}.
 64 | This is slightly different from the function calls provided in \code{get_hdr()}, note the \verb{_1d} suffix.
 65 | In many cases, these functions accept parameters governing the density estimation procedure.
 66 | Here, \code{method_kde_1d()} accepts several parameters related to the choice of kernel.
 67 | For details, see \code{?method_kde_1d}.
 68 | Every method of univariate density estimation implemented has such corresponding \verb{method_*_1d()} function,
 69 | each with an associated help page.
 70 | 
 71 | Note: \code{geom_hdr_rug()} and other layer functions also have \code{method} arguments which behave in the same way.
 72 | For more details on the use and implementation of the \verb{method_*_1d()} functions,
 73 | see \code{vignette("method", "ggdensity")}.
 74 | }
 75 | 
 76 | \section{The \code{fun} argument}{
 77 | 
 78 | If \code{method} is set to \code{"fun"}, \code{get_hdr_1d()} expects a univariate probability
 79 | density function to be specified with the \code{fun} argument.
 80 | It is required that \code{fun} be a function of at least one argument (\code{x}).
 81 | Beyond this first argument, \code{fun} can have arbitrarily many arguments;
 82 | these can be set in \code{get_hdr_1d()} as a named list via the \code{args} parameter.
 83 | 
 84 | Note: \code{get_hdr_1d()} requires that \code{fun} be vectorized in \code{x}.
 85 | For an example of an appropriate choice of \code{fun}, see the final example below.
 86 | }
 87 | 
 88 | \examples{
 89 | x <- rnorm(1e3)
 90 | 
 91 | # Two ways to specify `method`
 92 | get_hdr_1d(x, method = "kde")
 93 | get_hdr_1d(x, method = method_kde_1d())
 94 | 
 95 | \dontrun{
 96 | 
 97 | # If parenthesis are omitted, `get_hdr_1d()` errors
 98 | get_hdr_1d(df, method = method_kde_1d)
 99 | 
100 | # If the `_1d` suffix is omitted, `get_hdr_1d()` errors
101 | get_hdr_1d(x, method = method_kde())
102 | }
103 | 
104 | # Adjust estimator parameters with arguments to `method_kde_1d()`
105 | get_hdr_1d(x, method = method_kde_1d(kernel = "triangular"))
106 | 
107 | # Estimate different HDRs with `probs`
108 | get_hdr_1d(x, method = method_kde_1d(), probs = c(.975, .6, .2))
109 | 
110 | # Compute "population" HDRs of specified univariate pdf with `method = "fun"`
111 | f <- function(x, sd = 1) dnorm(x, sd = sd)
112 | get_hdr_1d(method = "fun", fun = f, range = c(-5, 5))
113 | get_hdr_1d(method = "fun", fun = f, range = c(-5, 5), args = list(sd = .5))
114 | 
115 | 
116 | }
117 | 


--------------------------------------------------------------------------------
/man/geom_hdr_points.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hdr_points.R
  3 | \docType{data}
  4 | \name{geom_hdr_points}
  5 | \alias{geom_hdr_points}
  6 | \alias{stat_hdr_points}
  7 | \alias{StatHdrPoints}
  8 | \title{Scatterplot colored by highest density regions of a 2D density estimate}
  9 | \usage{
 10 | stat_hdr_points(
 11 |   mapping = NULL,
 12 |   data = NULL,
 13 |   geom = "point",
 14 |   position = "identity",
 15 |   ...,
 16 |   method = "kde",
 17 |   probs = c(0.99, 0.95, 0.8, 0.5),
 18 |   n = 100,
 19 |   xlim = NULL,
 20 |   ylim = NULL,
 21 |   na.rm = FALSE,
 22 |   show.legend = NA,
 23 |   inherit.aes = TRUE
 24 | )
 25 | 
 26 | geom_hdr_points(
 27 |   mapping = NULL,
 28 |   data = NULL,
 29 |   stat = "hdr_points",
 30 |   position = "identity",
 31 |   ...,
 32 |   na.rm = FALSE,
 33 |   show.legend = NA,
 34 |   inherit.aes = TRUE
 35 | )
 36 | }
 37 | \arguments{
 38 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
 39 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
 40 | at the top level of the plot. You must supply \code{mapping} if there is no plot
 41 | mapping.}
 42 | 
 43 | \item{data}{The data to be displayed in this layer. There are three
 44 | options:
 45 | 
 46 | If \code{NULL}, the default, the data is inherited from the plot
 47 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
 48 | 
 49 | A \code{data.frame}, or other object, will override the plot
 50 | data. All objects will be fortified to produce a data frame. See
 51 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
 52 | 
 53 | A \code{function} will be called with a single argument,
 54 | the plot data. The return value must be a \code{data.frame}, and
 55 | will be used as the layer data. A \code{function} can be created
 56 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
 57 | 
 58 | \item{geom}{The geometric object to use to display the data, either as a
 59 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the
 60 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})}
 61 | 
 62 | \item{position}{Position adjustment, either as a string naming the adjustment
 63 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a
 64 | position adjustment function. Use the latter if you need to change the
 65 | settings of the adjustment.}
 66 | 
 67 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are
 68 | often aesthetics, used to set an aesthetic to a fixed value, like
 69 | \code{colour = "red"} or \code{size = 3}. They may also be parameters
 70 | to the paired geom/stat.}
 71 | 
 72 | \item{method}{Density estimator to use, accepts character vector:
 73 | \code{"kde"},\code{"histogram"}, \code{"freqpoly"}, or \code{"mvnorm"}.
 74 | Alternatively accepts functions  which return closures corresponding to density estimates,
 75 | see \code{?get_hdr} or \code{vignette("method", "ggdensity")}.}
 76 | 
 77 | \item{probs}{Probabilities to compute highest density regions for.}
 78 | 
 79 | \item{n}{Number of grid points in each direction.}
 80 | 
 81 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to
 82 | range of data.}
 83 | 
 84 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with
 85 | a warning. If \code{TRUE}, missing values are silently removed.}
 86 | 
 87 | \item{show.legend}{logical. Should this layer be included in the legends?
 88 | \code{NA}, the default, includes if any aesthetics are mapped.
 89 | \code{FALSE} never includes, and \code{TRUE} always includes.
 90 | It can also be a named logical vector to finely select the aesthetics to
 91 | display.}
 92 | 
 93 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
 94 | rather than combining with them. This is most useful for helper functions
 95 | that define both data and aesthetics and shouldn't inherit behaviour from
 96 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
 97 | 
 98 | \item{stat}{The statistical transformation to use on the data for this
 99 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the
100 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than
101 | \code{"stat_count"})}
102 | }
103 | \description{
104 | Perform 2D density estimation, compute the resulting highest density regions (HDRs),
105 | and plot the provided data as a scatterplot with points colored according to
106 | their corresponding HDR.
107 | }
108 | \section{Aesthetics}{
109 |  geom_hdr_points understands the following aesthetics (required
110 | aesthetics are in bold):
111 | \itemize{
112 | \item \strong{x}
113 | \item \strong{y}
114 | \item alpha
115 | \item color
116 | \item fill
117 | \item group
118 | \item linetype
119 | \item size
120 | \item subgroup
121 | }
122 | }
123 | 
124 | \section{Computed variables}{
125 | 
126 | 
127 | \describe{ \item{probs}{The probability associated with the highest density region, specified
128 | by \code{probs}.} }
129 | }
130 | 
131 | \examples{
132 | set.seed(1)
133 | df <- data.frame(x = rnorm(500), y = rnorm(500))
134 | p <- ggplot(df, aes(x, y)) +
135 |  coord_equal()
136 | 
137 | p + geom_hdr_points()
138 | 
139 | # Setting aes(fill = after_stat(probs)), color = "black", and
140 | # shape = 21 helps alleviate overplotting:
141 | p + geom_hdr_points(aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2)
142 | 
143 | # Also works well with geom_hdr_lines()
144 | p +
145 |  geom_hdr_lines(
146 |    aes(color = after_stat(probs)), alpha = 1,
147 |    xlim = c(-5, 5), ylim = c(-5, 5)
148 |  ) +
149 |  geom_hdr_points(
150 |    aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2,
151 |    xlim = c(-5, 5), ylim = c(-5, 5)
152 |  )
153 | 
154 | }
155 | \keyword{datasets}
156 | 


--------------------------------------------------------------------------------
/man/get_hdr.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/get_hdr.R
  3 | \name{get_hdr}
  4 | \alias{get_hdr}
  5 | \title{Computing the highest density regions of a 2D density}
  6 | \usage{
  7 | get_hdr(
  8 |   data = NULL,
  9 |   method = "kde",
 10 |   probs = c(0.99, 0.95, 0.8, 0.5),
 11 |   n = 100,
 12 |   rangex = NULL,
 13 |   rangey = NULL,
 14 |   hdr_membership = TRUE,
 15 |   fun,
 16 |   args = list()
 17 | )
 18 | }
 19 | \arguments{
 20 | \item{data}{A data frame with columns \code{x} and \code{y}.}
 21 | 
 22 | \item{method}{Either a character (\code{"kde"}, \code{"mvnorm"}, \code{"histogram"},
 23 | \code{"freqpoly"}, or \code{"fun"}) or \verb{method_*()} function. See the "The \code{method}
 24 | argument" section below for details.}
 25 | 
 26 | \item{probs}{Probabilities to compute HDRs for.}
 27 | 
 28 | \item{n}{Resolution of grid representing estimated density and HDRs.}
 29 | 
 30 | \item{rangex, rangey}{Range of grid representing estimated density and HDRs,
 31 | along the x- and y-axes.}
 32 | 
 33 | \item{hdr_membership}{Should HDR membership of data points (\code{data}) be
 34 | computed? Defaults to \code{TRUE}, although it is computationally expensive for
 35 | large data sets.}
 36 | 
 37 | \item{fun}{Optional, a joint probability density function, must be vectorized
 38 | in its first two arguments. See the "The \code{fun} argument" section below for
 39 | details.}
 40 | 
 41 | \item{args}{Optional, a list of arguments to be provided to \code{fun}.}
 42 | }
 43 | \value{
 44 | \code{get_hdr} returns a list with elements \code{df_est} (\code{data.frame}), \code{breaks}
 45 | (named \code{numeric}), and \code{data} (\code{data.frame}).
 46 | \itemize{
 47 | \item \code{df_est}: the estimated HDRs and density evaluated on the grid defined by \code{rangex}, \code{rangey}, and \code{n}.
 48 | The column of estimated HDRs (\code{df_est$hdr}) is a numeric vector with values
 49 | from \code{probs}. The columns \code{df_est$fhat} and \code{df_est$fhat_discretized}
 50 | correspond to the estimated density on the original scale and rescaled to sum
 51 | to 1, respectively.
 52 | \item \code{breaks}: the heights of the estimated density (\code{df_est$fhat}) corresponding to the HDRs specified by \code{probs}.
 53 | Will always have additional element \code{Inf} representing the cutoff for the
 54 | 100\% HDR.
 55 | \item \code{data}: the original data provided in the \code{data} argument.
 56 | If \code{hdr_membership} is set to \code{TRUE}, this includes a column
 57 | (\code{data$hdr_membership}) with the HDR corresponding to each data point.
 58 | }
 59 | }
 60 | \description{
 61 | \code{get_hdr} is used to estimate a 2-dimensional density and compute
 62 | corresponding HDRs. The estimated density and HDRs are represented in a
 63 | discrete form as a grid, defined by arguments \code{rangex}, \code{rangey}, and \code{n}.
 64 | \code{get_hdr} is used internally by layer functions \code{stat_hdr()},
 65 | \code{stat_hdr_points()}, \code{stat_hdr_fun()}, etc.
 66 | }
 67 | \section{The \code{method} argument}{
 68 |  The density estimator used to estimate the
 69 | HDRs is specified with the \code{method} argument. The simplest way to specify
 70 | an estimator is to provide a character value to \code{method}, for example
 71 | \code{method = "kde"} specifies a kernel density estimator. However, this
 72 | specification is limited to the default behavior of the estimator.
 73 | 
 74 | Instead, it is possible to provide a function call, for example: \code{method = method_kde()}. In many cases, these functions accept parameters governing
 75 | the density estimation procedure. Here, \code{method_kde()} accepts parameters
 76 | \code{h} and \code{adjust}, both related to the kernel's bandwidth. For details, see
 77 | \code{?method_kde}. Every method of bivariate density estimation implemented has
 78 | such corresponding \verb{method_*()} function, each with an associated help
 79 | page.
 80 | 
 81 | Note: \code{geom_hdr()} and other layer functions also have \code{method} arguments
 82 | which behave in the same way. For more details on the use and
 83 | implementation of the \verb{method_*()} functions, see \code{vignette("method", "ggdensity")}.
 84 | }
 85 | 
 86 | \section{The \code{fun} argument}{
 87 |  If \code{method} is set to \code{"fun"}, \code{get_hdr()}
 88 | expects a bivariate probability density function to be specified with the
 89 | \code{fun} argument. It is required that \code{fun} be a function of at least two
 90 | arguments (\code{x} and \code{y}). Beyond these first two arguments, \code{fun} can have
 91 | arbitrarily many arguments; these can be set in \code{get_hdr()} as a named list
 92 | via the \code{args} parameter.
 93 | 
 94 | Note: \code{get_hdr()} requires that \code{fun} be vectorized in \code{x} and \code{y}. For an
 95 | example of an appropriate choice of \code{fun}, see the final example below.
 96 | }
 97 | 
 98 | \examples{
 99 | df <- data.frame(x = rnorm(1e3), y = rnorm(1e3))
100 | 
101 | # Two ways to specify `method`
102 | get_hdr(df, method = "kde")
103 | get_hdr(df, method = method_kde())
104 | 
105 | \dontrun{
106 | 
107 | # If parenthesis are omitted, `get_hdr()` errors
108 | get_hdr(df, method = method_kde)
109 | }
110 | 
111 | # Estimate different HDRs with `probs`
112 | get_hdr(df, method = method_kde(), probs = c(.975, .6, .2))
113 | 
114 | # Adjust estimator parameters with arguments to `method_kde()`
115 | get_hdr(df, method = method_kde(h = 1))
116 | 
117 | # Parametric normal estimator of density
118 | get_hdr(df, method = "mvnorm")
119 | get_hdr(df, method = method_mvnorm())
120 | 
121 | # Compute "population" HDRs of specified bivariate pdf with `method = "fun"`
122 | f <- function(x, y, sd_x = 1, sd_y = 1) dnorm(x, sd = sd_x) * dnorm(y, sd = sd_y)
123 | 
124 | get_hdr(
125 |   method = "fun", fun = f,
126 |   rangex = c(-5, 5), rangey = c(-5, 5)
127 |  )
128 | 
129 | get_hdr(
130 |   method = "fun", fun = f,
131 |   rangex = c(-5, 5), rangey = c(-5, 5),
132 |   args = list(sd_x = .5, sd_y = .5) # specify additional arguments w/ `args`
133 | )
134 | 
135 | }
136 | 


--------------------------------------------------------------------------------
/R/hdr_fun.R:
--------------------------------------------------------------------------------
  1 | #' Highest density regions of a bivariate pdf
  2 | #'
  3 | #' Compute and plot the highest density regions (HDRs) of a bivariate pdf.
  4 | #' `geom_hdr_fun()` draws filled regions, and `geom_hdr_lines_fun()` draws lines outlining the regions.
  5 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default.
  6 | #'
  7 | #' @section Aesthetics: `geom_hdr_fun()` and `geom_hdr_lines_fun()` understand the following aesthetics (required
  8 | #'   aesthetics are in bold):
  9 | #'
 10 | #'   - x
 11 | #'   - y
 12 | #'   - alpha
 13 | #'   - color
 14 | #'   - fill (only `geom_hdr_fun`)
 15 | #'   - group
 16 | #'   - linetype
 17 | #'   - linewidth
 18 | #'   - subgroup
 19 | #'
 20 | #' @section Computed variables:
 21 | #'
 22 | #'   \describe{ \item{probs}{The probability associated with the highest density region, specified
 23 | #'   by `probs`.} }
 24 | #'
 25 | #' @inheritParams ggplot2::geom_path
 26 | #' @inheritParams ggplot2::stat_identity
 27 | #' @inheritParams ggplot2::stat_density2d
 28 | #' @param fun A function, the joint probability density function, must be
 29 | #' vectorized in its first two arguments; see examples.
 30 | #' @param args Named list of additional arguments passed on to `fun`.
 31 | #' @param probs Probabilities to compute highest density regions for.
 32 | #' @param n Resolution of grid `fun` is evaluated on.
 33 | #' @param xlim,ylim Range to compute and draw regions. If `NULL`, defaults to
 34 | #'   range of data if present.
 35 | #' @name geom_hdr_fun
 36 | #' @rdname geom_hdr_fun
 37 | #'
 38 | #' @import ggplot2
 39 | #'
 40 | #' @examples
 41 | #' # HDRs of the bivariate exponential
 42 | #' f <- function(x, y) dexp(x) * dexp(y)
 43 | #' ggplot() + geom_hdr_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10))
 44 | #'
 45 | #'
 46 | #' # HDRs of a custom parametric model
 47 | #'
 48 | #' # generate example data
 49 | #' n <- 1000
 50 | #' th_true <- c(3, 8)
 51 | #'
 52 | #' rdata <- function(n, th) {
 53 | #'   gen_single_obs <- function(th) {
 54 | #'     rchisq(2, df = th) # can be anything
 55 | #'   }
 56 | #'   df <- replicate(n, gen_single_obs(th))
 57 | #'   setNames(as.data.frame(t(df)), c("x", "y"))
 58 | #' }
 59 | #' data <- rdata(n, th_true)
 60 | #'
 61 | #' # estimate unknown parameters via maximum likelihood
 62 | #' likelihood <- function(th) {
 63 | #'   th <- abs(th) # hack to enforce parameter space boundary
 64 | #'   log_f <- function(v) {
 65 | #'     x <- v[1]; y <- v[2]
 66 | #'     dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE)
 67 | #'   }
 68 | #'   sum(apply(data, 1, log_f))
 69 | #' }
 70 | #' (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par)
 71 | #'
 72 | #' # plot f for the give model
 73 | #' f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2])
 74 | #'
 75 | #' ggplot(data, aes(x, y)) +
 76 | #'   geom_hdr_fun(fun = f, args = list(th = th_hat)) +
 77 | #'   geom_point(size = .25, color = "red") +
 78 | #'   xlim(0, 30) + ylim(c(0, 30))
 79 | #'
 80 | #' ggplot(data, aes(x, y)) +
 81 | #'   geom_hdr_lines_fun(fun = f, args = list(th = th_hat)) +
 82 | #'   geom_point(size = .25, color = "red") +
 83 | #'   xlim(0, 30) + ylim(c(0, 30))
 84 | #'
 85 | #'
 86 | NULL
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | #' @rdname geom_hdr_fun
 94 | #' @export
 95 | stat_hdr_fun <- function(mapping = NULL, data = NULL,
 96 |   geom = "hdr_fun", position = "identity",
 97 |   ...,
 98 |   fun, args = list(),
 99 |   probs = c(.99, .95, .8, .5),
100 |   xlim = NULL, ylim = NULL, n = 100,
101 |   na.rm = FALSE,
102 |   show.legend = NA,
103 |   inherit.aes = TRUE) {
104 | 
105 |   if (is.null(data)) data <- ensure_nonempty_data
106 | 
107 |   layer(
108 |     data = data,
109 |     mapping = mapping,
110 |     stat = StatHdrFun,
111 |     geom = geom,
112 |     position = position,
113 |     show.legend = show.legend,
114 |     inherit.aes = inherit.aes,
115 |     params = list(
116 |       fun = fun,
117 |       args = args,
118 |       probs = probs,
119 |       xlim = xlim,
120 |       ylim = ylim,
121 |       n = n,
122 |       na.rm = na.rm,
123 |       ...
124 |     )
125 |   )
126 | }
127 | 
128 | 
129 | 
130 | #' @rdname geom_hdr_fun
131 | #' @format NULL
132 | #' @usage NULL
133 | #' @importFrom scales percent
134 | #' @export
135 | StatHdrFun <- ggproto("StatHdrFun", Stat,
136 | 
137 |   default_aes = aes(order = after_stat(probs), alpha = after_stat(probs)),
138 | 
139 |   output = "bands",
140 | 
141 |   # very similar to StatHdr$compute_group(),
142 |   # only difference are the parameters fun + args (vs. method + parameters)
143 |   # -- this prevents factoring into one compute_group() method,
144 |   #    compute_group()'s arguments are different.
145 |   compute_group = function(self, data, scales, na.rm = FALSE,
146 |                            fun, args = list(), probs = c(.99, .95, .8, .5),
147 |                            n = 100, xlim = NULL, ylim = NULL) {
148 | 
149 |     if ((is.null(xlim) & is.null(scales$x)) | (is.null(ylim) & is.null(scales$y))) {
150 |       stop("If no data is provided to StatHdrFun, xlim and ylim must be specified")
151 |     }
152 | 
153 |     rangex <- xlim %||% scales$x$dimension()
154 |     rangey <- ylim %||% scales$y$dimension()
155 | 
156 |     # Only calculate HDR membership if we need to
157 |     need_membership <- (self$output == "points")
158 | 
159 |     res <- get_hdr(data, method = "fun", probs, n, rangex, rangey, hdr_membership = need_membership, fun = fun, args = args)
160 | 
161 |     res_to_df(res, probs, data$group[1], self$output)
162 | 
163 |   }
164 | )
165 | 
166 | 
167 | #' @rdname geom_hdr_fun
168 | #' @export
169 | geom_hdr_fun <- function(mapping = NULL, data = NULL,
170 |   stat = "hdr_fun", position = "identity",
171 |   ...,
172 |   na.rm = FALSE,
173 |   show.legend = NA,
174 |   inherit.aes = TRUE) {
175 | 
176 |   if (is.null(data)) data <- ensure_nonempty_data
177 | 
178 |   layer(
179 |     data = data,
180 |     mapping = mapping,
181 |     stat = stat,
182 |     geom = GeomHdrFun,
183 |     position = position,
184 |     show.legend = show.legend,
185 |     inherit.aes = inherit.aes,
186 |     params = list(
187 |       na.rm = na.rm,
188 |       ...
189 |     )
190 |   )
191 | }
192 | 
193 | 
194 | 
195 | #' @rdname geom_hdr_fun
196 | #' @format NULL
197 | #' @usage NULL
198 | #' @export
199 | GeomHdrFun <- ggproto("GeomHdrFun", GeomHdr)
200 | 


--------------------------------------------------------------------------------
/man/geom_hdr_points_fun.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hdr_points_fun.R
  3 | \docType{data}
  4 | \name{geom_hdr_points_fun}
  5 | \alias{geom_hdr_points_fun}
  6 | \alias{stat_hdr_points_fun}
  7 | \alias{StatHdrPointsFun}
  8 | \title{Scatterplot colored by highest density regions of a bivariate pdf}
  9 | \usage{
 10 | stat_hdr_points_fun(
 11 |   mapping = NULL,
 12 |   data = NULL,
 13 |   geom = "point",
 14 |   position = "identity",
 15 |   ...,
 16 |   fun,
 17 |   args = list(),
 18 |   probs = c(0.99, 0.95, 0.8, 0.5),
 19 |   xlim = NULL,
 20 |   ylim = NULL,
 21 |   n = 100,
 22 |   na.rm = FALSE,
 23 |   show.legend = NA,
 24 |   inherit.aes = TRUE
 25 | )
 26 | 
 27 | geom_hdr_points_fun(
 28 |   mapping = NULL,
 29 |   data = NULL,
 30 |   stat = "hdr_points_fun",
 31 |   position = "identity",
 32 |   ...,
 33 |   na.rm = FALSE,
 34 |   show.legend = NA,
 35 |   inherit.aes = TRUE
 36 | )
 37 | }
 38 | \arguments{
 39 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
 40 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
 41 | at the top level of the plot. You must supply \code{mapping} if there is no plot
 42 | mapping.}
 43 | 
 44 | \item{data}{The data to be displayed in this layer. There are three
 45 | options:
 46 | 
 47 | If \code{NULL}, the default, the data is inherited from the plot
 48 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
 49 | 
 50 | A \code{data.frame}, or other object, will override the plot
 51 | data. All objects will be fortified to produce a data frame. See
 52 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
 53 | 
 54 | A \code{function} will be called with a single argument,
 55 | the plot data. The return value must be a \code{data.frame}, and
 56 | will be used as the layer data. A \code{function} can be created
 57 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
 58 | 
 59 | \item{geom}{The geometric object to use to display the data, either as a
 60 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the
 61 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})}
 62 | 
 63 | \item{position}{Position adjustment, either as a string naming the adjustment
 64 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a
 65 | position adjustment function. Use the latter if you need to change the
 66 | settings of the adjustment.}
 67 | 
 68 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are
 69 | often aesthetics, used to set an aesthetic to a fixed value, like
 70 | \code{colour = "red"} or \code{size = 3}. They may also be parameters
 71 | to the paired geom/stat.}
 72 | 
 73 | \item{fun}{A function, the joint probability density function, must be
 74 | vectorized in its first two arguments; see examples.}
 75 | 
 76 | \item{args}{Named list of additional arguments passed on to \code{fun}.}
 77 | 
 78 | \item{probs}{Probabilities to compute highest density regions for.}
 79 | 
 80 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to
 81 | range of data if present.}
 82 | 
 83 | \item{n}{Number of grid points in each direction.}
 84 | 
 85 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with
 86 | a warning. If \code{TRUE}, missing values are silently removed.}
 87 | 
 88 | \item{show.legend}{logical. Should this layer be included in the legends?
 89 | \code{NA}, the default, includes if any aesthetics are mapped.
 90 | \code{FALSE} never includes, and \code{TRUE} always includes.
 91 | It can also be a named logical vector to finely select the aesthetics to
 92 | display.}
 93 | 
 94 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
 95 | rather than combining with them. This is most useful for helper functions
 96 | that define both data and aesthetics and shouldn't inherit behaviour from
 97 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
 98 | 
 99 | \item{stat}{The statistical transformation to use on the data for this
100 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the
101 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than
102 | \code{"stat_count"})}
103 | }
104 | \description{
105 | Compute the highest density regions (HDRs) of a bivariate pdf and plot the provided
106 | data as a scatterplot with points colored according to their corresponding HDR.
107 | }
108 | \section{Aesthetics}{
109 |  geom_hdr_points_fun understands the following aesthetics
110 | (required aesthetics are in bold):
111 | \itemize{
112 | \item \strong{x}
113 | \item \strong{y}
114 | \item alpha
115 | \item color
116 | \item fill
117 | \item group
118 | \item linetype
119 | \item size
120 | \item subgroup
121 | }
122 | }
123 | 
124 | \section{Computed variables}{
125 | 
126 | 
127 | \describe{ \item{probs}{The probability associated with the highest density region, specified
128 | by \code{probs}.} }
129 | }
130 | 
131 | \examples{
132 | # Can plot points colored according to known pdf:
133 | set.seed(1)
134 | df <- data.frame(x = rexp(1000), y = rexp(1000))
135 | f <- function(x, y) dexp(x) * dexp(y)
136 | 
137 | ggplot(df, aes(x, y)) +
138 |   geom_hdr_points_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10))
139 | 
140 | 
141 | # Also allows for hdrs of a custom parametric model
142 | 
143 | # generate example data
144 | n <- 1000
145 | th_true <- c(3, 8)
146 | 
147 | rdata <- function(n, th) {
148 |   gen_single_obs <- function(th) {
149 |     rchisq(2, df = th) # can be anything
150 |   }
151 |   df <- replicate(n, gen_single_obs(th))
152 |   setNames(as.data.frame(t(df)), c("x", "y"))
153 | }
154 | data <- rdata(n, th_true)
155 | 
156 | # estimate unknown parameters via maximum likelihood
157 | likelihood <- function(th) {
158 |   th <- abs(th) # hack to enforce parameter space boundary
159 |   log_f <- function(v) {
160 |     x <- v[1]; y <- v[2]
161 |     dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE)
162 |   }
163 |   sum(apply(data, 1, log_f))
164 | }
165 | (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par)
166 | 
167 | # plot f for the give model
168 | f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2])
169 | 
170 | ggplot(data, aes(x, y)) +
171 |   geom_hdr_points_fun(fun = f, args = list(th = th_hat))
172 | 
173 | ggplot(data, aes(x, y)) +
174 |   geom_hdr_points_fun(aes(fill = after_stat(probs)), shape = 21, color = "black",
175 |     fun = f, args = list(th = th_hat), na.rm = TRUE) +
176 |   geom_hdr_lines_fun(aes(color = after_stat(probs)), alpha = 1, fun = f, args = list(th = th_hat)) +
177 |   lims(x = c(0, 15), y = c(0, 25))
178 | 
179 | }
180 | \keyword{datasets}
181 | 


--------------------------------------------------------------------------------
/man/geom_hdr_fun.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hdr_fun.R, R/hdr_lines_fun.R
  3 | \docType{data}
  4 | \name{geom_hdr_fun}
  5 | \alias{geom_hdr_fun}
  6 | \alias{stat_hdr_fun}
  7 | \alias{StatHdrFun}
  8 | \alias{GeomHdrFun}
  9 | \alias{stat_hdr_lines_fun}
 10 | \alias{StatHdrLinesFun}
 11 | \alias{geom_hdr_lines_fun}
 12 | \alias{GeomHdrLinesFun}
 13 | \title{Highest density regions of a bivariate pdf}
 14 | \usage{
 15 | stat_hdr_fun(
 16 |   mapping = NULL,
 17 |   data = NULL,
 18 |   geom = "hdr_fun",
 19 |   position = "identity",
 20 |   ...,
 21 |   fun,
 22 |   args = list(),
 23 |   probs = c(0.99, 0.95, 0.8, 0.5),
 24 |   xlim = NULL,
 25 |   ylim = NULL,
 26 |   n = 100,
 27 |   na.rm = FALSE,
 28 |   show.legend = NA,
 29 |   inherit.aes = TRUE
 30 | )
 31 | 
 32 | geom_hdr_fun(
 33 |   mapping = NULL,
 34 |   data = NULL,
 35 |   stat = "hdr_fun",
 36 |   position = "identity",
 37 |   ...,
 38 |   na.rm = FALSE,
 39 |   show.legend = NA,
 40 |   inherit.aes = TRUE
 41 | )
 42 | }
 43 | \arguments{
 44 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
 45 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
 46 | at the top level of the plot. You must supply \code{mapping} if there is no plot
 47 | mapping.}
 48 | 
 49 | \item{data}{The data to be displayed in this layer. There are three
 50 | options:
 51 | 
 52 | If \code{NULL}, the default, the data is inherited from the plot
 53 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
 54 | 
 55 | A \code{data.frame}, or other object, will override the plot
 56 | data. All objects will be fortified to produce a data frame. See
 57 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
 58 | 
 59 | A \code{function} will be called with a single argument,
 60 | the plot data. The return value must be a \code{data.frame}, and
 61 | will be used as the layer data. A \code{function} can be created
 62 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
 63 | 
 64 | \item{geom}{The geometric object to use to display the data, either as a
 65 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the
 66 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})}
 67 | 
 68 | \item{position}{Position adjustment, either as a string naming the adjustment
 69 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a
 70 | position adjustment function. Use the latter if you need to change the
 71 | settings of the adjustment.}
 72 | 
 73 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are
 74 | often aesthetics, used to set an aesthetic to a fixed value, like
 75 | \code{colour = "red"} or \code{size = 3}. They may also be parameters
 76 | to the paired geom/stat.}
 77 | 
 78 | \item{fun}{A function, the joint probability density function, must be
 79 | vectorized in its first two arguments; see examples.}
 80 | 
 81 | \item{args}{Named list of additional arguments passed on to \code{fun}.}
 82 | 
 83 | \item{probs}{Probabilities to compute highest density regions for.}
 84 | 
 85 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to
 86 | range of data if present.}
 87 | 
 88 | \item{n}{Resolution of grid \code{fun} is evaluated on.}
 89 | 
 90 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with
 91 | a warning. If \code{TRUE}, missing values are silently removed.}
 92 | 
 93 | \item{show.legend}{logical. Should this layer be included in the legends?
 94 | \code{NA}, the default, includes if any aesthetics are mapped.
 95 | \code{FALSE} never includes, and \code{TRUE} always includes.
 96 | It can also be a named logical vector to finely select the aesthetics to
 97 | display.}
 98 | 
 99 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
100 | rather than combining with them. This is most useful for helper functions
101 | that define both data and aesthetics and shouldn't inherit behaviour from
102 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
103 | 
104 | \item{stat}{The statistical transformation to use on the data for this
105 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the
106 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than
107 | \code{"stat_count"})}
108 | }
109 | \description{
110 | Compute and plot the highest density regions (HDRs) of a bivariate pdf.
111 | \code{geom_hdr_fun()} draws filled regions, and \code{geom_hdr_lines_fun()} draws lines outlining the regions.
112 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default.
113 | }
114 | \section{Aesthetics}{
115 |  \code{geom_hdr_fun()} and \code{geom_hdr_lines_fun()} understand the following aesthetics (required
116 | aesthetics are in bold):
117 | \itemize{
118 | \item x
119 | \item y
120 | \item alpha
121 | \item color
122 | \item fill (only \code{geom_hdr_fun})
123 | \item group
124 | \item linetype
125 | \item linewidth
126 | \item subgroup
127 | }
128 | }
129 | 
130 | \section{Computed variables}{
131 | 
132 | 
133 | \describe{ \item{probs}{The probability associated with the highest density region, specified
134 | by \code{probs}.} }
135 | }
136 | 
137 | \examples{
138 | # HDRs of the bivariate exponential
139 | f <- function(x, y) dexp(x) * dexp(y)
140 | ggplot() + geom_hdr_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10))
141 | 
142 | 
143 | # HDRs of a custom parametric model
144 | 
145 | # generate example data
146 | n <- 1000
147 | th_true <- c(3, 8)
148 | 
149 | rdata <- function(n, th) {
150 |   gen_single_obs <- function(th) {
151 |     rchisq(2, df = th) # can be anything
152 |   }
153 |   df <- replicate(n, gen_single_obs(th))
154 |   setNames(as.data.frame(t(df)), c("x", "y"))
155 | }
156 | data <- rdata(n, th_true)
157 | 
158 | # estimate unknown parameters via maximum likelihood
159 | likelihood <- function(th) {
160 |   th <- abs(th) # hack to enforce parameter space boundary
161 |   log_f <- function(v) {
162 |     x <- v[1]; y <- v[2]
163 |     dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE)
164 |   }
165 |   sum(apply(data, 1, log_f))
166 | }
167 | (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par)
168 | 
169 | # plot f for the give model
170 | f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2])
171 | 
172 | ggplot(data, aes(x, y)) +
173 |   geom_hdr_fun(fun = f, args = list(th = th_hat)) +
174 |   geom_point(size = .25, color = "red") +
175 |   xlim(0, 30) + ylim(c(0, 30))
176 | 
177 | ggplot(data, aes(x, y)) +
178 |   geom_hdr_lines_fun(fun = f, args = list(th = th_hat)) +
179 |   geom_point(size = .25, color = "red") +
180 |   xlim(0, 30) + ylim(c(0, 30))
181 | 
182 | 
183 | }
184 | \keyword{datasets}
185 | 


--------------------------------------------------------------------------------
/man/geom_hdr_rug_fun.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hdr_rug_fun.R
  3 | \docType{data}
  4 | \name{geom_hdr_rug_fun}
  5 | \alias{geom_hdr_rug_fun}
  6 | \alias{stat_hdr_rug_fun}
  7 | \alias{StatHdrRugFun}
  8 | \alias{GeomHdrRugFun}
  9 | \title{Rug plots of highest density region estimates of univariate pdfs}
 10 | \usage{
 11 | stat_hdr_rug_fun(
 12 |   mapping = NULL,
 13 |   data = NULL,
 14 |   geom = "hdr_rug_fun",
 15 |   position = "identity",
 16 |   ...,
 17 |   fun_x = NULL,
 18 |   fun_y = NULL,
 19 |   args_x = list(),
 20 |   args_y = list(),
 21 |   probs = c(0.99, 0.95, 0.8, 0.5),
 22 |   xlim = NULL,
 23 |   ylim = NULL,
 24 |   n = 512,
 25 |   na.rm = FALSE,
 26 |   show.legend = NA,
 27 |   inherit.aes = TRUE
 28 | )
 29 | 
 30 | geom_hdr_rug_fun(
 31 |   mapping = NULL,
 32 |   data = NULL,
 33 |   stat = "hdr_rug_fun",
 34 |   position = "identity",
 35 |   ...,
 36 |   outside = FALSE,
 37 |   sides = "bl",
 38 |   length = unit(0.03, "npc"),
 39 |   na.rm = FALSE,
 40 |   show.legend = TRUE,
 41 |   inherit.aes = TRUE
 42 | )
 43 | }
 44 | \arguments{
 45 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
 46 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
 47 | at the top level of the plot. You must supply \code{mapping} if there is no plot
 48 | mapping.}
 49 | 
 50 | \item{data}{The data to be displayed in this layer. There are three
 51 | options:
 52 | 
 53 | If \code{NULL}, the default, the data is inherited from the plot
 54 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
 55 | 
 56 | A \code{data.frame}, or other object, will override the plot
 57 | data. All objects will be fortified to produce a data frame. See
 58 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
 59 | 
 60 | A \code{function} will be called with a single argument,
 61 | the plot data. The return value must be a \code{data.frame}, and
 62 | will be used as the layer data. A \code{function} can be created
 63 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
 64 | 
 65 | \item{geom}{The geometric object to use to display the data, either as a
 66 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the
 67 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})}
 68 | 
 69 | \item{position}{Position adjustment, either as a string naming the adjustment
 70 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a
 71 | position adjustment function. Use the latter if you need to change the
 72 | settings of the adjustment.}
 73 | 
 74 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are
 75 | often aesthetics, used to set an aesthetic to a fixed value, like
 76 | \code{colour = "red"} or \code{size = 3}. They may also be parameters
 77 | to the paired geom/stat.}
 78 | 
 79 | \item{fun_x, fun_y}{Functions, the univariate probability density function for the x- and/or y-axis.
 80 | First argument must be vectorized.}
 81 | 
 82 | \item{args_x, args_y}{Named list of additional arguments passed on to \code{fun_x} and/or \code{fun_y}.}
 83 | 
 84 | \item{probs}{Probabilities to compute highest density regions for.}
 85 | 
 86 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to
 87 | range of data.}
 88 | 
 89 | \item{n}{Resolution of grid defined by \code{xlim} and \code{ylim}.
 90 | Ignored if \code{method = "histogram"} or \code{method = "freqpoly"}.}
 91 | 
 92 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with
 93 | a warning. If \code{TRUE}, missing values are silently removed.}
 94 | 
 95 | \item{show.legend}{logical. Should this layer be included in the legends?
 96 | \code{NA}, the default, includes if any aesthetics are mapped.
 97 | \code{FALSE} never includes, and \code{TRUE} always includes.
 98 | It can also be a named logical vector to finely select the aesthetics to
 99 | display.}
100 | 
101 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
102 | rather than combining with them. This is most useful for helper functions
103 | that define both data and aesthetics and shouldn't inherit behaviour from
104 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
105 | 
106 | \item{stat}{The statistical transformation to use on the data for this
107 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the
108 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than
109 | \code{"stat_count"})}
110 | 
111 | \item{outside}{logical that controls whether to move the rug tassels outside of the plot area. Default is off (FALSE). You will also need to use \code{coord_cartesian(clip = "off")}. When set to TRUE, also consider changing the sides argument to "tr". See examples.}
112 | 
113 | \item{sides}{A string that controls which sides of the plot the rugs appear on.
114 | It can be set to a string containing any of \code{"trbl"}, for top, right,
115 | bottom, and left.}
116 | 
117 | \item{length}{A \code{\link[grid:unit]{grid::unit()}} object that sets the length of the rug lines. Use scale expansion to avoid overplotting of data.}
118 | }
119 | \description{
120 | Compute and plot the highest density regions (HDRs) of specified univariate pdf(s).
121 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default.
122 | }
123 | \section{Aesthetics}{
124 |  \code{geom_hdr_rug_fun()} understands the following aesthetics (required
125 | aesthetics are in bold):
126 | \itemize{
127 | \item x
128 | \item y
129 | \item alpha
130 | \item fill
131 | \item group
132 | \item subgroup
133 | }
134 | }
135 | 
136 | \section{Computed variables}{
137 | 
138 | 
139 | \describe{ \item{probs}{The probability of the highest density region, specified
140 | by \code{probs}, corresponding to each point.} }
141 | }
142 | 
143 | \examples{
144 | # Plotting data with exponential marginals
145 | df <- data.frame(x = rexp(1e3), y = rexp(1e3))
146 | 
147 | ggplot(df, aes(x, y)) +
148 |   geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp) +
149 |   geom_point(size = .5) +
150 |   coord_fixed()
151 | 
152 | # without data/aesthetic mappings
153 | ggplot() +
154 |   geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp, xlim = c(0, 7), ylim = c(0, 7)) +
155 |   coord_fixed()
156 | 
157 | 
158 | # Plotting univariate normal data, estimating mean and sd
159 | df <- data.frame(x = rnorm(1e4, mean = 1, sd = 3))
160 | 
161 | # estimating parameters
162 | mu_hat <- mean(df$x)
163 | sd_hat <- sd(df$x)
164 | 
165 | ggplot(df, aes(x)) +
166 |   geom_hdr_rug_fun(fun_x = dnorm, args_x = list(mean = mu_hat, sd = sd_hat)) +
167 |   geom_density()
168 | 
169 | # Equivalent to `method_norm_1d()` with `geom_hdr_rug()`
170 | ggplot(df, aes(x)) +
171 |   geom_hdr_rug(method = method_norm_1d()) +
172 |   geom_density()
173 | }
174 | \keyword{datasets}
175 | 


--------------------------------------------------------------------------------
/R/hdr_rug_fun.R:
--------------------------------------------------------------------------------
  1 | #' Rug plots of highest density region estimates of univariate pdfs
  2 | #'
  3 | #' Compute and plot the highest density regions (HDRs) of specified univariate pdf(s).
  4 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default.
  5 | #'
  6 | #' @section Aesthetics: `geom_hdr_rug_fun()` understands the following aesthetics (required
  7 | #'   aesthetics are in bold):
  8 | #'
  9 | #'   - x
 10 | #'   - y
 11 | #'   - alpha
 12 | #'   - fill
 13 | #'   - group
 14 | #'   - subgroup
 15 | #'
 16 | #' @section Computed variables:
 17 | #'
 18 | #'   \describe{ \item{probs}{The probability of the highest density region, specified
 19 | #'   by `probs`, corresponding to each point.} }
 20 | #'
 21 | #' @inheritParams ggplot2::geom_rug
 22 | #' @inheritParams stat_hdr_rug
 23 | #' @param fun_x,fun_y Functions, the univariate probability density function for the x- and/or y-axis.
 24 | #'   First argument must be vectorized.
 25 | #' @param args_x,args_y Named list of additional arguments passed on to `fun_x` and/or `fun_y`.
 26 | #' @name geom_hdr_rug_fun
 27 | #' @rdname geom_hdr_rug_fun
 28 | #'
 29 | #' @examples
 30 | #' # Plotting data with exponential marginals
 31 | #' df <- data.frame(x = rexp(1e3), y = rexp(1e3))
 32 | #'
 33 | #' ggplot(df, aes(x, y)) +
 34 | #'   geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp) +
 35 | #'   geom_point(size = .5) +
 36 | #'   coord_fixed()
 37 | #'
 38 | #' # without data/aesthetic mappings
 39 | #' ggplot() +
 40 | #'   geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp, xlim = c(0, 7), ylim = c(0, 7)) +
 41 | #'   coord_fixed()
 42 | #'
 43 | #'
 44 | #' # Plotting univariate normal data, estimating mean and sd
 45 | #' df <- data.frame(x = rnorm(1e4, mean = 1, sd = 3))
 46 | #'
 47 | #' # estimating parameters
 48 | #' mu_hat <- mean(df$x)
 49 | #' sd_hat <- sd(df$x)
 50 | #'
 51 | #' ggplot(df, aes(x)) +
 52 | #'   geom_hdr_rug_fun(fun_x = dnorm, args_x = list(mean = mu_hat, sd = sd_hat)) +
 53 | #'   geom_density()
 54 | #'
 55 | #' # Equivalent to `method_norm_1d()` with `geom_hdr_rug()`
 56 | #' ggplot(df, aes(x)) +
 57 | #'   geom_hdr_rug(method = method_norm_1d()) +
 58 | #'   geom_density()
 59 | NULL
 60 | 
 61 | 
 62 | #' @rdname geom_hdr_rug_fun
 63 | #' @export
 64 | stat_hdr_rug_fun <- function(mapping = NULL, data = NULL,
 65 |   geom = "hdr_rug_fun", position = "identity",
 66 |   ...,
 67 |   fun_x = NULL, fun_y = NULL,
 68 |   args_x = list(), args_y = list(),
 69 |   probs = c(.99, .95, .8, .5),
 70 |   xlim = NULL, ylim = NULL, n = 512,
 71 |   na.rm = FALSE,
 72 |   show.legend = NA,
 73 |   inherit.aes = TRUE) {
 74 | 
 75 |   if (is.null(data)) data <- ensure_nonempty_data
 76 | 
 77 |   layer(
 78 |     data = data,
 79 |     mapping = mapping,
 80 |     stat = StatHdrRugFun,
 81 |     geom = geom,
 82 |     position = position,
 83 |     show.legend = show.legend,
 84 |     inherit.aes = inherit.aes,
 85 |     params = list(
 86 |       fun_x = fun_x,
 87 |       fun_y = fun_y,
 88 |       args_x = args_x,
 89 |       args_y = args_y,
 90 |       probs = probs,
 91 |       xlim = xlim,
 92 |       ylim = ylim,
 93 |       n = n,
 94 |       na.rm = na.rm,
 95 |       ...
 96 |     )
 97 |   )
 98 | }
 99 | 
100 | 
101 | 
102 | #' @rdname geom_hdr_rug_fun
103 | #' @format NULL
104 | #' @usage NULL
105 | #' @export
106 | StatHdrRugFun <- ggproto("StatHdrRugFun", Stat,
107 | 
108 |   default_aes = aes(order = after_stat(probs), alpha = after_stat(probs)),
109 |   # if fun_x or fun_y are unspecified data might be dropped
110 |   dropped_aes = c("x", "y"),
111 | 
112 |   # very similar to StatHdrRug$compute_group(),
113 |   # only difference are the parameters fun + args (vs. method + parameters)
114 |   # -- this prevents factoring into one compute_group() method,
115 |   #    compute_group()'s arguments are different.
116 |   compute_group = function(self, data, scales, na.rm = FALSE,
117 |                            fun_x = NULL, fun_y = NULL, args_x = list(), args_y = list(),
118 |                            probs = c(.99, .95, .8, .5),
119 |                            n = 512, xlim = NULL, ylim = NULL) {
120 | 
121 | 
122 |     # Recycle for both x, y
123 |     if (length(n) == 1) n <- rep(n, 2)
124 | 
125 |     # Estimate marginal densities
126 | 
127 |     # Initialize dfs for x and y axes,
128 |     # in case only x or y are supplied:
129 |     df_x <- data.frame()
130 |     df_y <- data.frame()
131 | 
132 | 
133 |     if (!is.null(fun_x)) {
134 | 
135 |       if (is.null(xlim) & is.null(scales$x)) {
136 |         stop("`xlim` must be specified if `x` aesthetic not provided to `StatHdrRugFun`")
137 |       }
138 | 
139 |       rangex <- xlim %||% scales$x$dimension()
140 | 
141 |       res_x <- get_hdr_1d(data$x, method = "fun", probs, n[1], rangex, hdr_membership = FALSE, fun = fun_x, args = args_x)
142 | 
143 |       df_x <- res_to_df_1d(res_x, probs, data$group[1], output = "rug")
144 | 
145 |       # Needs correct name for ggplot2 internals
146 |       df_x$axis <- "x"
147 |       df_x$y <- NA
148 | 
149 |     }
150 | 
151 | 
152 |     if (!is.null(fun_y)) {
153 | 
154 |       if (is.null(ylim) & is.null(scales$y)) {
155 |         stop("`ylim` must be specified if `y` aesthetic not provided to `StatHdrRugFun`")
156 |       }
157 | 
158 |       rangey <- ylim %||% scales$y$dimension()
159 | 
160 |       res_y <- get_hdr_1d(data$y, method = "fun", probs, n[1], rangey, hdr_membership = FALSE, fun = fun_y, args = args_y)
161 | 
162 |       df_y <- res_to_df_1d(res_y, probs, data$group[1], output = "rug")
163 | 
164 |       # Needs correct name for ggplot2 internals
165 |       df_y$axis <- "y"
166 |       df_y$y <- df_y$x
167 |       df_y$x <- NA
168 | 
169 |     }
170 | 
171 |     df <- rbind(df_x, df_y)
172 | 
173 |     # Need to remove extra col if only plotting x or y rug
174 |     if (is.null(fun_x)) df$x <- NULL
175 |     if (is.null(fun_y)) df$y <- NULL
176 | 
177 |     df
178 | 
179 | 
180 |   }
181 | )
182 | 
183 | 
184 | 
185 | #' @rdname geom_hdr_rug_fun
186 | #' @export
187 | geom_hdr_rug_fun <- function(mapping = NULL, data = NULL,
188 |                          stat = "hdr_rug_fun", position = "identity",
189 |                          ...,
190 |                          outside = FALSE,
191 |                          sides = "bl",
192 |                          length = unit(0.03, "npc"),
193 |                          na.rm = FALSE,
194 |                          show.legend = TRUE,
195 |                          inherit.aes = TRUE) {
196 | 
197 |   if (is.null(data)) data <- ensure_nonempty_data
198 | 
199 |   layer(
200 |     data = data,
201 |     mapping = mapping,
202 |     stat = stat,
203 |     geom = GeomHdrRugFun,
204 |     position = position,
205 |     show.legend = show.legend,
206 |     inherit.aes = inherit.aes,
207 |     params = list(
208 |       outside = outside,
209 |       sides = sides,
210 |       length = length,
211 |       na.rm = na.rm,
212 |       ...
213 |     )
214 |   )
215 | }
216 | 
217 | 
218 | #' @rdname geom_hdr_rug_fun
219 | #' @format NULL
220 | #' @usage NULL
221 | #' @export
222 | GeomHdrRugFun <- ggproto("GeomHdrRugFun", GeomHdrRug)
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 


--------------------------------------------------------------------------------
/man/geom_hdr_rug.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hdr_rug.R
  3 | \docType{data}
  4 | \name{geom_hdr_rug}
  5 | \alias{geom_hdr_rug}
  6 | \alias{stat_hdr_rug}
  7 | \alias{StatHdrRug}
  8 | \alias{GeomHdrRug}
  9 | \title{Rug plots of marginal highest density region estimates}
 10 | \usage{
 11 | stat_hdr_rug(
 12 |   mapping = NULL,
 13 |   data = NULL,
 14 |   geom = "hdr_rug",
 15 |   position = "identity",
 16 |   ...,
 17 |   method = "kde",
 18 |   method_y = "kde",
 19 |   probs = c(0.99, 0.95, 0.8, 0.5),
 20 |   xlim = NULL,
 21 |   ylim = NULL,
 22 |   n = 512,
 23 |   na.rm = FALSE,
 24 |   show.legend = TRUE,
 25 |   inherit.aes = TRUE
 26 | )
 27 | 
 28 | geom_hdr_rug(
 29 |   mapping = NULL,
 30 |   data = NULL,
 31 |   stat = "hdr_rug",
 32 |   position = "identity",
 33 |   ...,
 34 |   outside = FALSE,
 35 |   sides = "bl",
 36 |   length = unit(0.03, "npc"),
 37 |   na.rm = FALSE,
 38 |   show.legend = TRUE,
 39 |   inherit.aes = TRUE
 40 | )
 41 | }
 42 | \arguments{
 43 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
 44 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
 45 | at the top level of the plot. You must supply \code{mapping} if there is no plot
 46 | mapping.}
 47 | 
 48 | \item{data}{The data to be displayed in this layer. There are three
 49 | options:
 50 | 
 51 | If \code{NULL}, the default, the data is inherited from the plot
 52 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
 53 | 
 54 | A \code{data.frame}, or other object, will override the plot
 55 | data. All objects will be fortified to produce a data frame. See
 56 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
 57 | 
 58 | A \code{function} will be called with a single argument,
 59 | the plot data. The return value must be a \code{data.frame}, and
 60 | will be used as the layer data. A \code{function} can be created
 61 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
 62 | 
 63 | \item{geom}{The geometric object to use to display the data, either as a
 64 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the
 65 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})}
 66 | 
 67 | \item{position}{Position adjustment, either as a string naming the adjustment
 68 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a
 69 | position adjustment function. Use the latter if you need to change the
 70 | settings of the adjustment.}
 71 | 
 72 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are
 73 | often aesthetics, used to set an aesthetic to a fixed value, like
 74 | \code{colour = "red"} or \code{size = 3}. They may also be parameters
 75 | to the paired geom/stat.}
 76 | 
 77 | \item{method, method_y}{Density estimator(s) to use.
 78 | By default \code{method} is used for both x- and y-axis.
 79 | If specified, \code{method_y} will be used for y-axis.
 80 | Accepts character vector: \code{"kde"},\code{"histogram"}, \code{"freqpoly"}, or \code{"norm"}.
 81 | Alternatively accepts functions  which return closures corresponding to density estimates,
 82 | see \code{?get_hdr_1d} or \code{vignette("method", "ggdensity")}.}
 83 | 
 84 | \item{probs}{Probabilities to compute highest density regions for.}
 85 | 
 86 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to
 87 | range of data.}
 88 | 
 89 | \item{n}{Resolution of grid defined by \code{xlim} and \code{ylim}.
 90 | Ignored if \code{method = "histogram"} or \code{method = "freqpoly"}.}
 91 | 
 92 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with
 93 | a warning. If \code{TRUE}, missing values are silently removed.}
 94 | 
 95 | \item{show.legend}{logical. Should this layer be included in the legends?
 96 | \code{NA}, the default, includes if any aesthetics are mapped.
 97 | \code{FALSE} never includes, and \code{TRUE} always includes.
 98 | It can also be a named logical vector to finely select the aesthetics to
 99 | display.}
100 | 
101 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
102 | rather than combining with them. This is most useful for helper functions
103 | that define both data and aesthetics and shouldn't inherit behaviour from
104 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
105 | 
106 | \item{stat}{The statistical transformation to use on the data for this
107 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the
108 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than
109 | \code{"stat_count"})}
110 | 
111 | \item{outside}{logical that controls whether to move the rug tassels outside of the plot area. Default is off (FALSE). You will also need to use \code{coord_cartesian(clip = "off")}. When set to TRUE, also consider changing the sides argument to "tr". See examples.}
112 | 
113 | \item{sides}{A string that controls which sides of the plot the rugs appear on.
114 | It can be set to a string containing any of \code{"trbl"}, for top, right,
115 | bottom, and left.}
116 | 
117 | \item{length}{A \code{\link[grid:unit]{grid::unit()}} object that sets the length of the rug lines. Use scale expansion to avoid overplotting of data.}
118 | }
119 | \description{
120 | Perform 1D density estimation, compute and plot the resulting highest density
121 | regions in a way similar to \code{\link[ggplot2:geom_rug]{ggplot2::geom_rug()}}.
122 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default.
123 | }
124 | \section{Aesthetics}{
125 |  geom_hdr_rug understands the following aesthetics (required
126 | aesthetics are in bold):
127 | \itemize{
128 | \item x
129 | \item y
130 | \item alpha
131 | \item fill
132 | \item group
133 | \item subgroup
134 | }
135 | }
136 | 
137 | \section{Computed variables}{
138 | 
139 | 
140 | \describe{ \item{probs}{The probability of the highest density region, specified
141 | by \code{probs}, corresponding to each point.} }
142 | }
143 | 
144 | \examples{
145 | set.seed(1)
146 | df <- data.frame(x = rnorm(100), y = rnorm(100))
147 | 
148 | # Plot marginal HDRs for bivariate data
149 | ggplot(df, aes(x, y)) +
150 |   geom_point() +
151 |   geom_hdr_rug() +
152 |   coord_fixed()
153 | 
154 | ggplot(df, aes(x, y)) +
155 |   geom_hdr() +
156 |   geom_hdr_rug() +
157 |   coord_fixed()
158 | 
159 | # Plot HDR for univariate data
160 | ggplot(df, aes(x)) +
161 |   geom_density() +
162 |   geom_hdr_rug()
163 | 
164 | ggplot(df, aes(y = y)) +
165 |   geom_density() +
166 |   geom_hdr_rug()
167 | 
168 | # Specify location of marginal HDRs as in ggplot2::geom_rug()
169 | ggplot(df, aes(x, y)) +
170 |   geom_hdr() +
171 |   geom_hdr_rug(sides = "tr", outside = TRUE) +
172 |   coord_fixed(clip = "off")
173 | 
174 | # Can use same methods of density estimation as geom_hdr().
175 | # For data with constrained support, we suggest setting method = "histogram":
176 | ggplot(df, aes(x^2)) +
177 |  geom_histogram(bins = 30, boundary = 0) +
178 |  geom_hdr_rug(method = "histogram")
179 | 
180 | ggplot(df, aes(x^2, y^2)) +
181 |  geom_hdr(method = "histogram") +
182 |  geom_hdr_rug(method = "histogram") +
183 |  coord_fixed()
184 | 
185 | }
186 | \keyword{datasets}
187 | 


--------------------------------------------------------------------------------
/R/method_1d.R:
--------------------------------------------------------------------------------
  1 | # methods that return est pdf as closure  ---------------------------------
  2 | 
  3 | #' Univariate parametric normal HDR estimator
  4 | #'
  5 | #' Function used to specify univariate normal density estimator
  6 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`).
  7 | #'
  8 | #' For more details on the use and implementation of the `method_*_1d()` functions,
  9 | #' see `vignette("method", "ggdensity")`.
 10 | #'
 11 | #' @examples
 12 | #' # Normal estimators are useful when an assumption of normality is appropriate
 13 | #' df <- data.frame(x = rnorm(1e3))
 14 | #'
 15 | #' ggplot(df, aes(x)) +
 16 | #'   geom_hdr_rug(method = method_norm_1d()) +
 17 | #'   geom_density()
 18 | #'
 19 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
 20 | #' res <- get_hdr_1d(df$x, method = method_norm_1d())
 21 | #' str(res)
 22 | #'
 23 | #' @export
 24 | method_norm_1d <- function() {
 25 | 
 26 |   function(x) {
 27 | 
 28 |     mu_hat <- mean(x)
 29 |     sigma_hat <- sd(x)
 30 | 
 31 |     function(x) dnorm(x, mu_hat, sigma_hat)
 32 | 
 33 |   }
 34 | }
 35 | 
 36 | # methods that return closures that compute a grid ------------------------
 37 | 
 38 | #' Univariate kernel density HDR estimator
 39 | #'
 40 | #' Function used to specify univariate kernel density estimator
 41 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`).
 42 | #'
 43 | #' For more details on the use and implementation of the `method_*_1d()` functions,
 44 | #' see `vignette("method", "ggdensity")`.
 45 | #'
 46 | #' @inheritParams stats::density
 47 | #'
 48 | #' @examples
 49 | #' df <- data.frame(x = rnorm(1e3, sd = 3))
 50 | #'
 51 | #' ggplot(df, aes(x)) +
 52 | #'   geom_hdr_rug(method = method_kde_1d()) +
 53 | #'   geom_density()
 54 | #'
 55 | #' # Details of the KDE can be adjusted with arguments to `method_kde_1d()`
 56 | #' ggplot(df, aes(x)) +
 57 | #'   geom_hdr_rug(method = method_kde_1d(adjust = 1/5)) +
 58 | #'   geom_density(adjust = 1/5)
 59 | #'
 60 | #' ggplot(df, aes(x)) +
 61 | #'   geom_hdr_rug(method = method_kde_1d(kernel = "triangular")) +
 62 | #'   geom_density(kernel = "triangular")
 63 | #'
 64 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
 65 | #' res <- get_hdr_1d(df$x, method = method_kde_1d())
 66 | #' str(res)
 67 | #'
 68 | #' @export
 69 | method_kde_1d <- function(bw = "nrd0", adjust = 1, kernel = "gaussian", weights = NULL, window = kernel) {
 70 | 
 71 |   function(x, n, range) {
 72 | 
 73 |     nx <- length(x)
 74 | 
 75 |     if (is.null(weights)) {
 76 |       weights <- rep(1 / nx, nx)
 77 |     } else {
 78 |       weights <- normalize(weights)
 79 |     }
 80 | 
 81 |     dens <- stats::density(
 82 |       x,
 83 |       bw = bw,
 84 |       adjust = adjust,
 85 |       kernel = kernel,
 86 |       weights = weights,
 87 |       window = window,
 88 |       n = n,
 89 |       from = range[1],
 90 |       to = range[2]
 91 |     )
 92 | 
 93 |     data.frame(
 94 |       x = dens$x,
 95 |       fhat = dens$y
 96 |     )
 97 | 
 98 |   }
 99 | }
100 | 
101 | #' Univariate histogram HDR estimator
102 | #'
103 | #' Function used to specify univariate histogram density estimator
104 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`).
105 | #'
106 | #' For more details on the use and implementation of the `method_*_1d()` functions,
107 | #' see `vignette("method", "ggdensity")`.
108 | #'
109 | #' @param bins Number of bins. Defaults to normal reference rule (Scott, pg 59).
110 | #'
111 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley.
112 | #'
113 | #' @examples
114 | #' # Histogram estimators can be useful when data has boundary constraints
115 | #' df <- data.frame(x = rexp(1e3))
116 | #'
117 | #' # Strip chart to visualize 1-d data
118 | #' p <- ggplot(df, aes(x)) +
119 | #'   geom_jitter(aes(y = 0), width = 0, height = 2) +
120 | #'   scale_y_continuous(name = NULL, breaks = NULL) +
121 | #'   coord_cartesian(ylim = c(-3, 3))
122 | #'
123 | #' p
124 | #'
125 | #' p + geom_hdr_rug(method = method_histogram_1d())
126 | #'
127 | #' # The resolution of the histogram estimator can be set via `bins`
128 | #' p + geom_hdr_rug(method = method_histogram_1d(bins = 5))
129 | #'
130 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
131 | #' res <- get_hdr_1d(df$x, method = method_histogram_1d())
132 | #' str(res)
133 | #'
134 | #' @export
135 | method_histogram_1d <- function(bins = NULL) {
136 | 
137 |   function(x, n, range) {
138 | 
139 |     nx <- length(x)
140 | 
141 |     # Default to normal reference rule (Scott p. 59)
142 |     if (is.null(bins)) {
143 |       hx <- 3.504 * stats::sd(x) * nx^(-1/3)
144 |       bins <- round((range[2] - range[1]) / hx)
145 |     }
146 | 
147 |     sx <- seq(range[1], range[2], length.out = bins + 1)
148 |     de_x <- sx[2] - sx[1]
149 |     midpts <- sx[-(bins+1)] + de_x/2
150 |     n <- as.numeric(table(cut(x, sx)))
151 | 
152 |     data.frame(
153 |       x = midpts,
154 |       fhat = normalize(n)
155 |     )
156 | 
157 |   }
158 | }
159 | 
160 | #' Univariate frequency polygon HDR estimator
161 | #'
162 | #' Function used to specify univariate frequency polygon density estimator
163 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`).
164 | #'
165 | #' For more details on the use and implementation of the `method_*_1d()` functions,
166 | #' see `vignette("method", "ggdensity")`.
167 | #'
168 | #' @inheritParams method_histogram_1d
169 | #'
170 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley.
171 | #'
172 | #' @examples
173 | #' df <- data.frame(x = rnorm(1e3))
174 | #'
175 | #' # Strip chart to visualize 1-d data
176 | #' p <- ggplot(df, aes(x)) +
177 | #'   geom_jitter(aes(y = 0), width = 0, height = 2) +
178 | #'   scale_y_continuous(name = NULL, breaks = NULL) +
179 | #'   coord_cartesian(ylim = c(-3, 3))
180 | #'
181 | #' p
182 | #'
183 | #' p + geom_hdr_rug(method = method_freqpoly_1d())
184 | #'
185 | #' # The resolution of the frequency polygon estimator can be set via `bins`
186 | #' p + geom_hdr_rug(method = method_freqpoly_1d(bins = 100))
187 | #'
188 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs
189 | #' res <- get_hdr_1d(df$x, method = method_freqpoly_1d())
190 | #' str(res)
191 | #'
192 | #' @export
193 | method_freqpoly_1d <- function(bins = NULL) {
194 | 
195 |   function(x, n, range) {
196 | 
197 |     # Start with output from method_histogram
198 |     df <- method_histogram_1d(bins)(x, n, range)
199 | 
200 |     hx <- df$x[2] - df$x[1]
201 | 
202 |     # need to pad df from hist_marginal() w/ bins that have est prob of 0
203 |     # so that we can interpolate
204 |     df <- rbind(
205 | 
206 |       # add initial bin w/ est prob of 0
207 |       data.frame(
208 |         x = min(df$x) - hx,
209 |         fhat = 0
210 |       ),
211 | 
212 |       # include original histogram estimator
213 |       df,
214 | 
215 |       # add final bin w/ est prob of 0
216 |       data.frame(
217 |         x = max(df$x) + hx,
218 |         fhat = 0
219 |       )
220 | 
221 |     )
222 | 
223 |     sx <- seq(range[1], range[2], length.out = n)
224 | 
225 |     interpolate_fhat <- function(x) {
226 |       lower_x <- df$x[max(which(df$x < x))]
227 |       upper_x <- df$x[min(which(df$x >= x))]
228 | 
229 |       lower_fhat <- df$fhat[max(which(df$x < x))]
230 |       upper_fhat <- df$fhat[min(which(df$x >= x))]
231 | 
232 |       lower_fhat + (x - lower_x) * (upper_fhat - lower_fhat) / (upper_x - lower_x)
233 |     }
234 | 
235 |     dens <- vapply(sx, interpolate_fhat, numeric(1))
236 | 
237 |     data.frame(
238 |       x = sx,
239 |       fhat = dens
240 |     )
241 | 
242 |   }
243 | }
244 | 
245 | 
246 | 
247 | 


--------------------------------------------------------------------------------
/man/geom_hdr.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hdr.R, R/hdr_lines.R
  3 | \docType{data}
  4 | \name{geom_hdr}
  5 | \alias{geom_hdr}
  6 | \alias{stat_hdr}
  7 | \alias{StatHdr}
  8 | \alias{GeomHdr}
  9 | \alias{stat_hdr_lines}
 10 | \alias{StatHdrLines}
 11 | \alias{geom_hdr_lines}
 12 | \alias{GeomHdrLines}
 13 | \title{Highest density regions of a 2D density estimate}
 14 | \usage{
 15 | stat_hdr(
 16 |   mapping = NULL,
 17 |   data = NULL,
 18 |   geom = "hdr",
 19 |   position = "identity",
 20 |   ...,
 21 |   method = "kde",
 22 |   probs = c(0.99, 0.95, 0.8, 0.5),
 23 |   n = 100,
 24 |   xlim = NULL,
 25 |   ylim = NULL,
 26 |   na.rm = FALSE,
 27 |   show.legend = NA,
 28 |   inherit.aes = TRUE
 29 | )
 30 | 
 31 | geom_hdr(
 32 |   mapping = NULL,
 33 |   data = NULL,
 34 |   stat = "hdr",
 35 |   position = "identity",
 36 |   ...,
 37 |   na.rm = FALSE,
 38 |   show.legend = NA,
 39 |   inherit.aes = TRUE
 40 | )
 41 | }
 42 | \arguments{
 43 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
 44 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
 45 | at the top level of the plot. You must supply \code{mapping} if there is no plot
 46 | mapping.}
 47 | 
 48 | \item{data}{The data to be displayed in this layer. There are three
 49 | options:
 50 | 
 51 | If \code{NULL}, the default, the data is inherited from the plot
 52 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
 53 | 
 54 | A \code{data.frame}, or other object, will override the plot
 55 | data. All objects will be fortified to produce a data frame. See
 56 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
 57 | 
 58 | A \code{function} will be called with a single argument,
 59 | the plot data. The return value must be a \code{data.frame}, and
 60 | will be used as the layer data. A \code{function} can be created
 61 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
 62 | 
 63 | \item{geom}{The geometric object to use to display the data, either as a
 64 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the
 65 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})}
 66 | 
 67 | \item{position}{Position adjustment, either as a string naming the adjustment
 68 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a
 69 | position adjustment function. Use the latter if you need to change the
 70 | settings of the adjustment.}
 71 | 
 72 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are
 73 | often aesthetics, used to set an aesthetic to a fixed value, like
 74 | \code{colour = "red"} or \code{size = 3}. They may also be parameters
 75 | to the paired geom/stat.}
 76 | 
 77 | \item{method}{Density estimator to use, accepts character vector:
 78 | \code{"kde"},\code{"histogram"}, \code{"freqpoly"}, or \code{"mvnorm"}.
 79 | Alternatively accepts functions  which return closures corresponding to density estimates,
 80 | see \code{?get_hdr} or \code{vignette("method", "ggdensity")}.}
 81 | 
 82 | \item{probs}{Probabilities to compute highest density regions for.}
 83 | 
 84 | \item{n}{Resolution of grid defined by \code{xlim} and \code{ylim}.
 85 | Ignored if \code{method = "histogram"} or \code{method = "freqpoly"}.}
 86 | 
 87 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to
 88 | range of data.}
 89 | 
 90 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with
 91 | a warning. If \code{TRUE}, missing values are silently removed.}
 92 | 
 93 | \item{show.legend}{logical. Should this layer be included in the legends?
 94 | \code{NA}, the default, includes if any aesthetics are mapped.
 95 | \code{FALSE} never includes, and \code{TRUE} always includes.
 96 | It can also be a named logical vector to finely select the aesthetics to
 97 | display.}
 98 | 
 99 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
100 | rather than combining with them. This is most useful for helper functions
101 | that define both data and aesthetics and shouldn't inherit behaviour from
102 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
103 | 
104 | \item{stat}{The statistical transformation to use on the data for this
105 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the
106 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than
107 | \code{"stat_count"})}
108 | }
109 | \description{
110 | Perform 2D density estimation, compute and plot the resulting highest density regions.
111 | \code{geom_hdr()} draws filled regions and \code{geom_hdr_lines()} draws lines outlining the regions.
112 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default.
113 | }
114 | \section{Aesthetics}{
115 |  \code{geom_hdr()} and \code{geom_hdr_lines()} understand the following aesthetics (required
116 | aesthetics are in bold):
117 | \itemize{
118 | \item \strong{x}
119 | \item \strong{y}
120 | \item alpha
121 | \item color
122 | \item fill (only \code{geom_hdr})
123 | \item group
124 | \item linetype
125 | \item linewidth
126 | \item subgroup
127 | }
128 | }
129 | 
130 | \section{Computed variables}{
131 | 
132 | 
133 | \describe{ \item{probs}{The probability associated with the highest density region, specified
134 | by \code{probs} argument.} }
135 | }
136 | 
137 | \examples{
138 | # Basic simulated data with bivariate normal data and various methods
139 | df <- data.frame(x = rnorm(1000), y = rnorm(1000))
140 | p <- ggplot(df, aes(x, y)) + coord_equal()
141 | 
142 | p + geom_hdr()
143 | p + geom_hdr(method = "mvnorm")
144 | p + geom_hdr(method = "freqpoly")
145 | # p + geom_hdr(method = "histogram")
146 | 
147 | # Adding point layers on top to visually assess region estimates
148 | pts <- geom_point(size = .2, color = "red")
149 | 
150 | p + geom_hdr() + pts
151 | p + geom_hdr(method = "mvnorm") + pts
152 | p + geom_hdr(method = "freqpoly") + pts
153 | # p + geom_hdr(method = "histogram") + pts
154 | 
155 | # Highest density region boundary lines
156 | p + geom_hdr_lines()
157 | p + geom_hdr_lines(method = "mvnorm")
158 | p + geom_hdr_lines(method = "freqpoly")
159 | # p + geom_hdr_lines(method = "histogram")
160 | 
161 | \dontrun{
162 | 
163 | # 2+ groups - mapping other aesthetics in the geom
164 | rdata <- function(n, n_groups = 3, radius = 3) {
165 |   list_of_dfs <- lapply(0:(n_groups-1), function(k) {
166 |     mu <- c(cos(2*k*pi/n_groups), sin(2*k*pi/n_groups))
167 |     m <- MASS::mvrnorm(n, radius*mu, diag(2))
168 |     structure(data.frame(m, as.character(k)), names = c("x", "y", "c"))
169 |   })
170 |   do.call("rbind", list_of_dfs)
171 | }
172 | 
173 | dfc <- rdata(1000, n_groups = 5)
174 | pf <- ggplot(dfc, aes(x, y, fill = c)) + coord_equal()
175 | 
176 | pf + geom_hdr()
177 | pf + geom_hdr(method = "mvnorm")
178 | pf + geom_hdr(method = "mvnorm", probs = .90, alpha = .5)
179 | pf + geom_hdr(method = "histogram")
180 | pf + geom_hdr(method = "freqpoly")
181 | 
182 | pc <- ggplot(dfc, aes(x, y, color = c)) +
183 |  coord_equal() +
184 |  theme_minimal() +
185 |  theme(panel.grid.minor = element_blank())
186 | 
187 | pc + geom_hdr_lines()
188 | pc + geom_hdr_lines(method = "mvnorm")
189 | 
190 | 
191 | # Data with boundaries
192 | ggplot(df, aes(x^2)) + geom_histogram(bins = 30)
193 | ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0)
194 | ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram")
195 | 
196 | }
197 | 
198 | }
199 | \references{
200 | Scott, David W. Multivariate Density Estimation (2e), Wiley.
201 | }
202 | \keyword{datasets}
203 | 


--------------------------------------------------------------------------------
/R/hdr.R:
--------------------------------------------------------------------------------
  1 | #' Highest density regions of a 2D density estimate
  2 | #'
  3 | #' Perform 2D density estimation, compute and plot the resulting highest density regions.
  4 | #' `geom_hdr()` draws filled regions and `geom_hdr_lines()` draws lines outlining the regions.
  5 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default.
  6 | #'
  7 | #' @section Aesthetics: `geom_hdr()` and `geom_hdr_lines()` understand the following aesthetics (required
  8 | #'   aesthetics are in bold):
  9 | #'
 10 | #'   - **x**
 11 | #'   - **y**
 12 | #'   - alpha
 13 | #'   - color
 14 | #'   - fill (only `geom_hdr`)
 15 | #'   - group
 16 | #'   - linetype
 17 | #'   - linewidth
 18 | #'   - subgroup
 19 | #'
 20 | #' @section Computed variables:
 21 | #'
 22 | #'   \describe{ \item{probs}{The probability associated with the highest density region, specified
 23 | #'   by `probs` argument.} }
 24 | #'
 25 | #' @inheritParams ggplot2::geom_path
 26 | #' @inheritParams ggplot2::stat_identity
 27 | #' @inheritParams ggplot2::stat_density2d
 28 | #' @param method Density estimator to use, accepts character vector:
 29 | #'   `"kde"`,`"histogram"`, `"freqpoly"`, or `"mvnorm"`.
 30 | #'   Alternatively accepts functions  which return closures corresponding to density estimates,
 31 | #'   see `?get_hdr` or `vignette("method", "ggdensity")`.
 32 | #' @param probs Probabilities to compute highest density regions for.
 33 | #' @param xlim,ylim Range to compute and draw regions. If `NULL`, defaults to
 34 | #'   range of data.
 35 | #' @param n Resolution of grid defined by `xlim` and `ylim`.
 36 | #'   Ignored if `method = "histogram"` or `method = "freqpoly"`.
 37 | #' @name geom_hdr
 38 | #' @rdname geom_hdr
 39 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley.
 40 | #'
 41 | #' @import ggplot2
 42 | #'
 43 | #' @examples
 44 | #' # Basic simulated data with bivariate normal data and various methods
 45 | #' df <- data.frame(x = rnorm(1000), y = rnorm(1000))
 46 | #' p <- ggplot(df, aes(x, y)) + coord_equal()
 47 | #'
 48 | #' p + geom_hdr()
 49 | #' p + geom_hdr(method = "mvnorm")
 50 | #' p + geom_hdr(method = "freqpoly")
 51 | #' # p + geom_hdr(method = "histogram")
 52 | #'
 53 | #' # Adding point layers on top to visually assess region estimates
 54 | #' pts <- geom_point(size = .2, color = "red")
 55 | #'
 56 | #' p + geom_hdr() + pts
 57 | #' p + geom_hdr(method = "mvnorm") + pts
 58 | #' p + geom_hdr(method = "freqpoly") + pts
 59 | #' # p + geom_hdr(method = "histogram") + pts
 60 | #'
 61 | #' # Highest density region boundary lines
 62 | #' p + geom_hdr_lines()
 63 | #' p + geom_hdr_lines(method = "mvnorm")
 64 | #' p + geom_hdr_lines(method = "freqpoly")
 65 | #' # p + geom_hdr_lines(method = "histogram")
 66 | #'
 67 | #' \dontrun{
 68 | #'
 69 | #' # 2+ groups - mapping other aesthetics in the geom
 70 | #' rdata <- function(n, n_groups = 3, radius = 3) {
 71 | #'   list_of_dfs <- lapply(0:(n_groups-1), function(k) {
 72 | #'     mu <- c(cos(2*k*pi/n_groups), sin(2*k*pi/n_groups))
 73 | #'     m <- MASS::mvrnorm(n, radius*mu, diag(2))
 74 | #'     structure(data.frame(m, as.character(k)), names = c("x", "y", "c"))
 75 | #'   })
 76 | #'   do.call("rbind", list_of_dfs)
 77 | #' }
 78 | #'
 79 | #' dfc <- rdata(1000, n_groups = 5)
 80 | #' pf <- ggplot(dfc, aes(x, y, fill = c)) + coord_equal()
 81 | #'
 82 | #' pf + geom_hdr()
 83 | #' pf + geom_hdr(method = "mvnorm")
 84 | #' pf + geom_hdr(method = "mvnorm", probs = .90, alpha = .5)
 85 | #' pf + geom_hdr(method = "histogram")
 86 | #' pf + geom_hdr(method = "freqpoly")
 87 | #'
 88 | #' pc <- ggplot(dfc, aes(x, y, color = c)) +
 89 | #'  coord_equal() +
 90 | #'  theme_minimal() +
 91 | #'  theme(panel.grid.minor = element_blank())
 92 | #'
 93 | #' pc + geom_hdr_lines()
 94 | #' pc + geom_hdr_lines(method = "mvnorm")
 95 | #'
 96 | #'
 97 | #' # Data with boundaries
 98 | #' ggplot(df, aes(x^2)) + geom_histogram(bins = 30)
 99 | #' ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0)
100 | #' ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram")
101 | #'
102 | #' }
103 | #'
104 | NULL
105 | 
106 | 
107 | #' @rdname geom_hdr
108 | #' @export
109 | stat_hdr <- function(mapping = NULL, data = NULL,
110 |                      geom = "hdr", position = "identity",
111 |                      ...,
112 |                      method = "kde",
113 |                      probs = c(.99, .95, .8, .5),
114 |                      n = 100,
115 |                      xlim = NULL,
116 |                      ylim = NULL,
117 |                      na.rm = FALSE,
118 |                      show.legend = NA,
119 |                      inherit.aes = TRUE) {
120 |   layer(
121 |     data = data,
122 |     mapping = mapping,
123 |     stat = StatHdr,
124 |     geom = geom,
125 |     position = position,
126 |     show.legend = show.legend,
127 |     inherit.aes = inherit.aes,
128 |     params = list(
129 |       method = method,
130 |       probs = probs,
131 |       n = n,
132 |       xlim = xlim,
133 |       ylim = ylim,
134 |       na.rm = na.rm,
135 |       ...
136 |     )
137 |   )
138 | }
139 | 
140 | 
141 | 
142 | 
143 | #' @rdname geom_hdr
144 | #' @format NULL
145 | #' @usage NULL
146 | #' @importFrom scales percent
147 | #' @export
148 | StatHdr <- ggproto("StatHdr", Stat,
149 | 
150 |   required_aes = c("x", "y"),
151 |   default_aes = aes(order = after_stat(probs), alpha = after_stat(probs)),
152 | 
153 |   output = "bands",
154 | 
155 |   compute_group = function(self, data, scales, na.rm = FALSE,
156 |                            method = "kde", probs = c(.99, .95, .8, .5),
157 |                            n = 100, xlim = NULL, ylim = NULL) {
158 | 
159 |     rangex <- xlim %||% scales$x$dimension()
160 |     rangey <- ylim %||% scales$y$dimension()
161 | 
162 |     # Only calculate HDR membership if we need to
163 |     need_membership <- (self$output == "points")
164 | 
165 |     res <- get_hdr(data, method, probs, n, rangex, rangey, hdr_membership = need_membership)
166 | 
167 |     res_to_df(res, probs, data$group[1], self$output)
168 | 
169 |   }
170 | )
171 | 
172 | # internal helper function to convert output of `get_hdr[_1d]()` into
173 | # what `GeomHdr*$draw_group()` methods need
174 | res_to_df <- function(res, probs, group, output) {
175 | 
176 |   probs <- fix_probs(probs)
177 | 
178 |   # Need z for xyz_to_isobands/lines()
179 |   res$df_est$z <- res$df_est$fhat
180 | 
181 |   if (output == "bands") {
182 | 
183 |     isobands <- xyz_to_isobands(res$df_est, res$breaks)
184 |     names(isobands) <- scales::percent_format(accuracy = 1)(probs)
185 |     df <- iso_to_polygon(isobands, group)
186 |     df$probs <- ordered(df$level, levels = names(isobands))
187 |     df$level <- NULL
188 | 
189 |   } else if (output == "lines") {
190 | 
191 |     isolines <- xyz_to_isolines(res$df_est, res$breaks)
192 |     names(isolines) <- scales::percent_format(accuracy = 1)(probs)
193 |     df <- iso_to_path(isolines, group)
194 |     df$probs <- ordered(df$level, levels = names(isolines))
195 |     df$level <- NULL
196 | 
197 |   } else if (output == "points") {
198 | 
199 |     df <- res$data
200 |     df$hdr_membership <- scales::percent_format(accuracy = 1)(df$hdr_membership)
201 |     df$probs <- ordered(df$hdr_membership, levels = scales::percent_format(accuracy = 1)(c(1, probs)))
202 |     df$hdr_membership <- NULL
203 | 
204 |   }
205 | 
206 |   df
207 | 
208 | }
209 | 
210 | 
211 | 
212 | #' @rdname geom_hdr
213 | #' @export
214 | geom_hdr <- function(mapping = NULL, data = NULL,
215 |                        stat = "hdr", position = "identity",
216 |                        ...,
217 |                        na.rm = FALSE,
218 |                        show.legend = NA,
219 |                        inherit.aes = TRUE) {
220 |   layer(
221 |     data = data,
222 |     mapping = mapping,
223 |     stat = stat,
224 |     geom = GeomHdr,
225 |     position = position,
226 |     show.legend = show.legend,
227 |     inherit.aes = inherit.aes,
228 |     params = list(
229 |       na.rm = na.rm,
230 |       ...
231 |     )
232 |   )
233 | }
234 | 
235 | 
236 | 
237 | #' @rdname geom_hdr
238 | #' @format NULL
239 | #' @usage NULL
240 | #' @export
241 | GeomHdr <- ggproto("GeomHdr", GeomPolygon)
242 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_hdr.R:
--------------------------------------------------------------------------------
  1 | test_that("structure of get_hdr() return value is as expected", {
  2 | 
  3 |   data <- data.frame(
  4 |     x = 1:10,
  5 |     y = rep(1:5, each = 2)
  6 |   )
  7 | 
  8 |   res <- get_hdr(data)
  9 | 
 10 |   # Checking the top level of res
 11 |   expect_type(res, "list")
 12 |   expect_equal(length(res), 3)
 13 |   expect_equal(names(res), c("df_est", "breaks", "data"))
 14 | 
 15 |   # Checking res$df_est:
 16 |   expect_type(res$df_est, "list")
 17 |   expect_equal(ncol(res$df_est), 5)
 18 |   expect_equal(colnames(res$df_est), c("x", "y", "fhat", "fhat_discretized", "hdr"))
 19 | 
 20 |   # Checking res$data
 21 |   expect_type(res$data, "list")
 22 |   expect_equal(ncol(res$data), 3)
 23 |   expect_equal(nrow(res$data), 10)
 24 |   expect_equal(colnames(res$data), c("x", "y", "hdr_membership"))
 25 | 
 26 |   # Checking res$breaks
 27 |   expect_type(res$breaks, "double")
 28 |   expect_equal(length(res$breaks), 5)
 29 |   expect_equal(names(res$breaks), c("99%", "95%", "80%", "50%", NA))
 30 | 
 31 | 
 32 |   # Now with non-default args -----------------------------------------
 33 |   res <- get_hdr(data, probs = c(.989, .878, .67, .43, .21), hdr_membership = FALSE)
 34 | 
 35 |   # Checking res$data
 36 |   expect_equal(ncol(res$data), 2)
 37 | 
 38 |   # Checking res$breaks
 39 |   expect_type(res$breaks, "double")
 40 |   expect_equal(length(res$breaks), 6)
 41 |   expect_equal(names(res$breaks), c("99%", "88%", "67%", "43%", "21%", NA))
 42 | 
 43 | })
 44 | 
 45 | test_that("`method` can be provided as a character vector or function", {
 46 | 
 47 |   data <- data.frame(
 48 |     x = 1:10,
 49 |     y = rep(1:5, each = 2)
 50 |   )
 51 | 
 52 |   expect_equal(get_hdr(data, "kde"), get_hdr(data, method_kde()))
 53 |   expect_equal(get_hdr(data, "mvnorm"), get_hdr(data, method_mvnorm()))
 54 |   expect_equal(get_hdr(data, "freqpoly"), get_hdr(data, method_freqpoly()))
 55 |   expect_equal(get_hdr(data, "histogram"), get_hdr(data, method_histogram()))
 56 | 
 57 | })
 58 | 
 59 | test_that("get_hdr() errors informatively if bad `method` argument", {
 60 | 
 61 |   data <- data.frame(
 62 |     x = 1:10,
 63 |     y = rep(1:5, each = 2)
 64 |   )
 65 | 
 66 |   expect_error(get_hdr(data, method = "not-a-method"), regexp = "Invalid method specified")
 67 |   expect_error(get_hdr(data, method = method_kde), regexp = "did you forget")
 68 | 
 69 | })
 70 | 
 71 | 
 72 | # # The data used for tests:
 73 | #
 74 | # set.seed(1)
 75 | # df <- data.frame(
 76 | #   x = rnorm(5e3),
 77 | #   y = rnorm(5e3)
 78 | # )
 79 | #
 80 | # write_rds(df, here::here("tests/testthat/fixtures/df_norm.rds"))
 81 | 
 82 | test_that("get_hdr(method = method_kde()) calculations are consistent", {
 83 | 
 84 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
 85 | 
 86 |   res <- get_hdr(data, method_kde())
 87 | 
 88 |   # fhat_discretized should be normalized to sum to 1
 89 |   expect_equal(sum(res$df_est$fhat_discretized), 1)
 90 | 
 91 |   # By default, estimate is evaluated on the same range as original data
 92 |   expect_equal(range(res$df_est$x), range(data$x))
 93 |   expect_equal(range(res$df_est$y), range(data$y))
 94 | 
 95 |   # default grid is 100 x 100:
 96 |   expect_equal(nrow(res$df_est), 100 * 100)
 97 | 
 98 |   # Checksums:
 99 |   expect_equal(round(sum(res$df_est$fhat)), 185)
100 |   expect_equal(as.numeric(round(res$breaks, 4)), c(0.0017, 0.0083, 0.0303, 0.0731, Inf))
101 |   expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1))
102 |   expect_equal(as.numeric(table(res$df_est$hdr)), c(858, 1149, 1597, 1771, 4625))
103 | 
104 | 
105 |   # Checking non-default args ------------------------
106 | 
107 |   res <- get_hdr(data, method_kde(adjust = .4), probs = c(.97, .85, .4, .1), n = c(100, 200), rangex = c(-3, 2), rangey = c(-1, 3))
108 |   # fhat_discretized should be normalized to sum to 1
109 |   expect_equal(sum(res$df_est$fhat_discretized), 1)
110 | 
111 |   # Was the custom range used
112 |   expect_equal(range(res$df_est$x), c(-3, 2))
113 |   expect_equal(range(res$df_est$y), c(-1, 3))
114 | 
115 |   # default grid is 100 x 100:
116 |   expect_equal(nrow(res$df_est), 100 * 200)
117 | 
118 |   # Checksums:
119 |   expect_equal(round(sum(res$df_est$fhat)), 808)
120 |   expect_equal(as.numeric(round(res$breaks, 4)), c(0.0105, 0.0352, 0.1036, 0.1522, Inf))
121 |   expect_equal(sort(unique(res$df_est$hdr)), c(.1, .4, .85, .97, 1))
122 |   expect_equal(as.numeric(table(res$df_est$hdr)), c(495, 1923, 5584, 4524, 7474))
123 | 
124 | })
125 | 
126 | # TODO: above, for other methods
127 | 
128 | 
129 | 
130 | 
131 | test_that("get_hdr() works with custom function factory supplied to `method`", {
132 | 
133 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
134 | 
135 |   method_mvnorm_ind <- function() {
136 | 
137 |     function(data) {
138 | 
139 |       mean_x <- mean(data$x); s_x <- sd(data$x)
140 |       mean_y <- mean(data$y); s_y <- sd(data$y)
141 | 
142 |       function(x, y) dnorm(x, mean = mean_x, sd = s_x) * dnorm(y, mean = mean_y, sd = s_y)
143 | 
144 |     }
145 | 
146 |   }
147 | 
148 |   res <- get_hdr(data, method = method_mvnorm_ind())
149 | 
150 |   # fhat_discretized should be normalized to sum to 1
151 |   expect_equal(sum(res$df_est$fhat_discretized), 1)
152 | 
153 |   # By default, estimate is evaluated on the same range as original data
154 |   expect_equal(range(res$df_est$x), range(data$x))
155 |   expect_equal(range(res$df_est$y), range(data$y))
156 | 
157 |   # default grid is 100 x 100:
158 |   expect_equal(nrow(res$df_est), 100 * 100)
159 | 
160 |   # Checksums:
161 |   expect_equal(round(sum(res$df_est$fhat)), 185)
162 |   expect_equal(as.numeric(round(res$breaks, 4)), c(0.0017, 0.0078, 0.031, 0.078, Inf))
163 |   expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1))
164 |   expect_equal(as.numeric(table(res$df_est$hdr)), c(826, 1090, 1642, 1863, 4579))
165 | 
166 | })
167 | 
168 | 
169 | 
170 | test_that("get_hdr() works with custom function factory supplied to `method`", {
171 | 
172 |   data <- readRDS(test_path("fixtures", "df_norm.rds"))
173 | 
174 |   method_fixed_grid <- function() {
175 | 
176 |     function(data, n, rangex, rangey) {
177 | 
178 |       df_grid <- expand.grid(
179 |         x = seq(rangex[1], rangex[2], length.out = n),
180 |         y = seq(rangey[1], rangey[2], length.out = n)
181 |       )
182 | 
183 |       df_grid$fhat <- dnorm(df_grid$x) * dnorm(df_grid$y)
184 | 
185 |       df_grid
186 | 
187 |     }
188 | 
189 |   }
190 | 
191 |   res <- get_hdr(data, method = method_fixed_grid())
192 | 
193 |   # fhat_discretized should be normalized to sum to 1
194 |   expect_equal(sum(res$df_est$fhat_discretized), 1)
195 | 
196 |   # By default, estimate is evaluated on the same range as original data
197 |   expect_equal(range(res$df_est$x), range(data$x))
198 |   expect_equal(range(res$df_est$y), range(data$y))
199 | 
200 |   # default grid is 100 x 100:
201 |   expect_equal(nrow(res$df_est), 100 * 100)
202 | 
203 |   # Checksums:
204 |   expect_equal(round(sum(res$df_est$fhat)), 185)
205 |   expect_equal(as.numeric(round(res$breaks, 4)), c(0.0017, 0.008, 0.0321, 0.0796, Inf))
206 |   expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1))
207 |   expect_equal(as.numeric(table(res$df_est$hdr)), c(806, 1065, 1603, 1824, 4702))
208 | 
209 | })
210 | 
211 | 
212 | test_that("get_hdr() fails if `method != 'fun' and `data` isn't provided", {
213 | 
214 |   expect_error(get_hdr(method = method_kde()), regexp = ".data. must be provided")
215 | 
216 | })
217 | 
218 | test_that("fun argument of get_hdr() requires rangex/y", {
219 | 
220 |   expect_error(get_hdr(method = "fun", fun = function(x, y) dexp(x) * dexp(y)), regexp = ".rangey. must be provided")
221 | 
222 | })
223 | 
224 | 
225 | test_that("fun argument of get_hdr() works", {
226 | 
227 |   res <- get_hdr(method = "fun", fun = function(x, y) dexp(x) * dexp(y), rangex = c(0, 10), rangey = c(0, 10))
228 | 
229 |   # Structure of res is as expected
230 |   expect_type(res, "list")
231 |   expect_equal(length(res), 3)
232 |   expect_equal(names(res), c("df_est", "breaks", "data"))
233 | 
234 |   expect_null(res$data)
235 | 
236 |   # fhat_discretized should be normalized to sum to 1
237 |   expect_equal(sum(res$df_est$fhat_discretized), 1)
238 | 
239 |   expect_equal(range(res$df_est$x), c(0, 10))
240 |   expect_equal(range(res$df_est$y), c(0, 10))
241 | 
242 |   # default grid is 100 x 100:
243 |   expect_equal(nrow(res$df_est), 100 * 100)
244 | 
245 |   # Checksums:
246 |   expect_equal(round(sum(res$df_est$fhat)), 108)
247 |   expect_equal(as.numeric(round(res$breaks, 4)), c(0.0014, 0.0096, 0.0534, 0.1987, Inf))
248 |   expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1))
249 |   expect_equal(as.numeric(table(res$df_est$hdr)), c(145, 306, 669, 1045, 7835))
250 | })
251 | 
252 | 
253 | 
254 | 
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 
271 | 
272 | 
273 | 
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 


--------------------------------------------------------------------------------
/R/get_hdr_1d.R:
--------------------------------------------------------------------------------
  1 | #' Computing the highest density regions of a 1D density
  2 | #'
  3 | #' `get_hdr_1d` is used to estimate a 1-dimensional density and compute corresponding HDRs.
  4 | #' The estimated density and HDRs are represented in a discrete form as a grid, defined by arguments `range` and `n`.
  5 | #' `get_hdr_1d` is used internally by layer functions `stat_hdr_rug()` and `stat_hdr_rug_fun()`.
  6 | #'
  7 | #' @inheritParams get_hdr
  8 | #' @param method Either a character (`"kde"`, `"norm"`, `"histogram"`, `"freqpoly"`, or `"fun"`) or `method_*_1d()` function.
  9 | #'   See the "The `method` argument" section below for details.
 10 | #' @param x A vector of data
 11 | #' @param hdr_membership Should HDR membership of data points (`x`) be computed?
 12 | #' @param range Range of grid representing estimated density and HDRs.
 13 | #' @param n Resolution of grid representing estimated density and HDRs.
 14 | #' @param fun Optional, a probability density function, must be vectorized in its first argument.
 15 | #'   See the "The `fun` argument" section below for details.
 16 | #'
 17 | #' @section The `method` argument:
 18 | #' The density estimator used to estimate the HDRs is specified with the `method` argument.
 19 | #' The simplest way to specify an estimator is to provide a character value to `method`,
 20 | #' for example `method = "kde"` specifies a kernel density estimator.
 21 | #' However, this specification is limited to the default behavior of the estimator.
 22 | #'
 23 | #' Instead, it is possible to provide a function call, for example: `method = method_kde_1d()`.
 24 | #' This is slightly different from the function calls provided in `get_hdr()`, note the `_1d` suffix.
 25 | #' In many cases, these functions accept parameters governing the density estimation procedure.
 26 | #' Here, `method_kde_1d()` accepts several parameters related to the choice of kernel.
 27 | #' For details, see `?method_kde_1d`.
 28 | #' Every method of univariate density estimation implemented has such corresponding `method_*_1d()` function,
 29 | #' each with an associated help page.
 30 | #'
 31 | #' Note: `geom_hdr_rug()` and other layer functions also have `method` arguments which behave in the same way.
 32 | #' For more details on the use and implementation of the `method_*_1d()` functions,
 33 | #' see `vignette("method", "ggdensity")`.
 34 | #'
 35 | #' @section The `fun` argument:
 36 | #' If `method` is set to `"fun"`, `get_hdr_1d()` expects a univariate probability
 37 | #' density function to be specified with the `fun` argument.
 38 | #' It is required that `fun` be a function of at least one argument (`x`).
 39 | #' Beyond this first argument, `fun` can have arbitrarily many arguments;
 40 | #' these can be set in `get_hdr_1d()` as a named list via the `args` parameter.
 41 | #'
 42 | #' Note: `get_hdr_1d()` requires that `fun` be vectorized in `x`.
 43 | #' For an example of an appropriate choice of `fun`, see the final example below.
 44 | #'
 45 | #' @returns
 46 | #'
 47 | #' `get_hdr_1d` returns a list with elements `df_est` (`data.frame`), `breaks` (named `numeric`), and `data` (`data.frame`).
 48 | #'
 49 | #' * `df_est`: the estimated HDRs and density evaluated on the grid defined by `range` and `n`.
 50 | #' The column of estimated HDRs (`df_est$hdr`) is a numeric vector with values from `probs`.
 51 | #' The columns `df_est$fhat` and `df_est$fhat_discretized` correspond to the estimated density
 52 | #' on the original scale and rescaled to sum to 1, respectively.
 53 | #'
 54 | #' * `breaks`: the heights of the estimated density (`df_est$fhat`) corresponding to the HDRs specified by `probs`.
 55 | #' Will always have additional element `Inf` representing the cutoff for the 100% HDR.
 56 | #'
 57 | #' * `data`: the original data provided in the `data` argument.
 58 | #' If `hdr_membership` is set to `TRUE`, this includes a column (`data$hdr_membership`)
 59 | #' with the HDR corresponding to each data point.
 60 | #'
 61 | #' @examples
 62 | #' x <- rnorm(1e3)
 63 | #'
 64 | #' # Two ways to specify `method`
 65 | #' get_hdr_1d(x, method = "kde")
 66 | #' get_hdr_1d(x, method = method_kde_1d())
 67 | #'
 68 | #' \dontrun{
 69 | #'
 70 | #' # If parenthesis are omitted, `get_hdr_1d()` errors
 71 | #' get_hdr_1d(df, method = method_kde_1d)
 72 | #'
 73 | #' # If the `_1d` suffix is omitted, `get_hdr_1d()` errors
 74 | #' get_hdr_1d(x, method = method_kde())
 75 | #' }
 76 | #'
 77 | #' # Adjust estimator parameters with arguments to `method_kde_1d()`
 78 | #' get_hdr_1d(x, method = method_kde_1d(kernel = "triangular"))
 79 | #'
 80 | #' # Estimate different HDRs with `probs`
 81 | #' get_hdr_1d(x, method = method_kde_1d(), probs = c(.975, .6, .2))
 82 | #'
 83 | #' # Compute "population" HDRs of specified univariate pdf with `method = "fun"`
 84 | #' f <- function(x, sd = 1) dnorm(x, sd = sd)
 85 | #' get_hdr_1d(method = "fun", fun = f, range = c(-5, 5))
 86 | #' get_hdr_1d(method = "fun", fun = f, range = c(-5, 5), args = list(sd = .5))
 87 | #'
 88 | #'
 89 | #' @export
 90 | get_hdr_1d <- function(x = NULL, method = "kde", probs = c(.99, .95, .8, .5), n = 512, range = NULL, hdr_membership = TRUE, fun, args = list()) {
 91 | 
 92 |   # Deal with missing data argument
 93 |   if (is.null(x)) {
 94 |     if (!is.character(method) | (is.character(method) && method != "fun")) {
 95 |       stop('`x` must be provided unless `method = "fun"`')
 96 |     } else {
 97 |       if (is.null(range)) {
 98 |         stop('If `x` is unspecified, `range` must be provided when `method = "fun"`')
 99 |       }
100 |     }
101 |   }
102 | 
103 |   range <- range %||% range(x)
104 | 
105 |   probs <- fix_probs(probs)
106 | 
107 |   # Create df_est (estimated density evaluated on a grid) depending on specified method:
108 |   if (is.character(method) && method == "fun") {
109 | 
110 |     df_est <- f_est_1d(method = NULL, x = x, n, range = range, fun = fun, args = args)
111 | 
112 |   } else  {
113 | 
114 |     if (is.character(method)) {
115 | 
116 |       if (!method %in% c("kde", "norm", "histogram", "freqpoly")) stop("Invalid method specified")
117 | 
118 |       # If method is provided as a character, re-assign correct function output:
119 |       method <- switch(method,
120 |         "kde"       = method_kde_1d(),
121 |         "histogram" = method_histogram_1d(),
122 |         "freqpoly"  = method_freqpoly_1d(),
123 |         "norm"      = method_norm_1d()
124 |       )
125 | 
126 |     }
127 | 
128 |     # parse args of method to determine strategy of `method`
129 |     method_formals <- names(formals(method))
130 | 
131 |     # If `data` is the only argument to `method`, we know `method`
132 |     # is a function factory, returning a closure of pdf in terms of x, y:
133 |     if (length(method_formals) == 1 && method_formals %in% c("x", "y")) {
134 | 
135 |       df_est <- f_est_1d(method, x, n, range)
136 | 
137 |     # Otherwise `method` computes a grid for us, shortcutting
138 |     # representing pdf in terms of x, y:
139 |     } else if (length(method_formals) == 3 && method_formals[1] %in% c("x", "y") & all(method_formals[2:3] == c("n", "range"))) {
140 | 
141 |       df_est <- method(x, n, range)
142 | 
143 |     } else if ("data" %in% method_formals) {
144 | 
145 |       stop("Invalid `method` argument -- did you forget the `_1d()`?")
146 | 
147 |     } else {
148 | 
149 |       stop("Invalid `method` argument -- did you forget the `()`?")
150 | 
151 |     }
152 | 
153 |   }
154 | 
155 | 
156 |   # Manipulate df_est to get information about HDRs:
157 | 
158 |   # force estimate to integrate to 1
159 |   df_est$fhat_discretized <- normalize(df_est$fhat)
160 | 
161 |   # temporarily rescale df$fhat for stability
162 |   fhat_max <- max(df_est$fhat)
163 |   df_est$fhat <- df_est$fhat / fhat_max
164 | 
165 |   # find cutoffs (in terms of rescaled fhat)
166 |   breaks <- c(find_cutoff(df_est, probs), Inf)
167 | 
168 |   # find HDRs for points in the grid
169 |   df_est$hdr <- vapply(df_est$fhat, get_hdr_val, numeric(1), breaks, probs)
170 | 
171 |   # find hdr membership of points from data
172 |   if (!is.null(x) & hdr_membership) {
173 | 
174 |     data <- data.frame(x = x)
175 | 
176 |     if (hdr_membership) {
177 | 
178 |       hdr_membership <- vapply(x, get_hdr_membership_1d, numeric(1), df_est, breaks, probs)
179 | 
180 |       # create data frame w/ input data (x) + HDR membership
181 |       data$hdr_membership <- hdr_membership
182 | 
183 |     }
184 | 
185 |   } else {
186 | 
187 |     data <- NULL
188 | 
189 |   }
190 | 
191 |   # transforming df_est$fhat and breaks back to original scale:
192 |   df_est$fhat <- df_est$fhat * fhat_max
193 |   breaks <- breaks * fhat_max
194 | 
195 |   # Give breaks nicely formatted names, corresponding to HDRs:
196 |   names(breaks) <- scales::percent_format(accuracy = 1)(probs)
197 | 
198 |   # bundle everything together
199 |   list(
200 |     df_est = df_est,
201 |     breaks = breaks,
202 |     data = data
203 |   )
204 | 
205 | }
206 | 
207 | get_hdr_membership_1d <- function(x, df_est, breaks, probs) {
208 |   df_est$dist <- (x - df_est$x)^2
209 |   fhat <- df_est[which.min(df_est$dist), "fhat"]
210 | 
211 |   get_hdr_val(fhat, breaks, probs)
212 | }
213 | 
214 | # method is a function of data vector x
215 | # fun is a function of vector x -- the grid
216 | # Might need to be more careful w/ axis transformations here
217 | f_est_1d <- function(method, x, n, range, fun = NULL, args = list()) {
218 | 
219 |   # If fun isn't specified, method returns a closure
220 |   # representing closed form of density estimate
221 |   fun <- fun %||% method(x)
222 | 
223 |   # grid to evaluate fun
224 |   df <- data.frame(x = seq(range[1], range[2], length.out = n))
225 | 
226 |   # evaluate method on the grid, f required to be vectorized in x, y:
227 |   # (args is only non-empty if fun was specified)
228 |   df$fhat <- do.call(fun, c(quote(df$x), args))
229 | 
230 |   df
231 | 
232 | }
233 | 
234 | 
235 | 


--------------------------------------------------------------------------------
/R/get_hdr.R:
--------------------------------------------------------------------------------
  1 | #' Computing the highest density regions of a 2D density
  2 | #'
  3 | #' `get_hdr` is used to estimate a 2-dimensional density and compute
  4 | #' corresponding HDRs. The estimated density and HDRs are represented in a
  5 | #' discrete form as a grid, defined by arguments `rangex`, `rangey`, and `n`.
  6 | #' `get_hdr` is used internally by layer functions `stat_hdr()`,
  7 | #' `stat_hdr_points()`, `stat_hdr_fun()`, etc.
  8 | #'
  9 | #' @param method Either a character (`"kde"`, `"mvnorm"`, `"histogram"`,
 10 | #'   `"freqpoly"`, or `"fun"`) or `method_*()` function. See the "The `method`
 11 | #'   argument" section below for details.
 12 | #' @param data A data frame with columns `x` and `y`.
 13 | #' @param probs Probabilities to compute HDRs for.
 14 | #' @param rangex,rangey Range of grid representing estimated density and HDRs,
 15 | #'   along the x- and y-axes.
 16 | #' @param n Resolution of grid representing estimated density and HDRs.
 17 | #' @param hdr_membership Should HDR membership of data points (`data`) be
 18 | #'   computed? Defaults to `TRUE`, although it is computationally expensive for
 19 | #'   large data sets.
 20 | #' @param fun Optional, a joint probability density function, must be vectorized
 21 | #'   in its first two arguments. See the "The `fun` argument" section below for
 22 | #'   details.
 23 | #' @param args Optional, a list of arguments to be provided to `fun`.
 24 | #'
 25 | #' @section The `method` argument: The density estimator used to estimate the
 26 | #'   HDRs is specified with the `method` argument. The simplest way to specify
 27 | #'   an estimator is to provide a character value to `method`, for example
 28 | #'   `method = "kde"` specifies a kernel density estimator. However, this
 29 | #'   specification is limited to the default behavior of the estimator.
 30 | #'
 31 | #'   Instead, it is possible to provide a function call, for example: `method =
 32 | #'   method_kde()`. In many cases, these functions accept parameters governing
 33 | #'   the density estimation procedure. Here, `method_kde()` accepts parameters
 34 | #'   `h` and `adjust`, both related to the kernel's bandwidth. For details, see
 35 | #'   `?method_kde`. Every method of bivariate density estimation implemented has
 36 | #'   such corresponding `method_*()` function, each with an associated help
 37 | #'   page.
 38 | #'
 39 | #'   Note: `geom_hdr()` and other layer functions also have `method` arguments
 40 | #'   which behave in the same way. For more details on the use and
 41 | #'   implementation of the `method_*()` functions, see `vignette("method",
 42 | #'   "ggdensity")`.
 43 | #'
 44 | #' @section The `fun` argument: If `method` is set to `"fun"`, `get_hdr()`
 45 | #'   expects a bivariate probability density function to be specified with the
 46 | #'   `fun` argument. It is required that `fun` be a function of at least two
 47 | #'   arguments (`x` and `y`). Beyond these first two arguments, `fun` can have
 48 | #'   arbitrarily many arguments; these can be set in `get_hdr()` as a named list
 49 | #'   via the `args` parameter.
 50 | #'
 51 | #'   Note: `get_hdr()` requires that `fun` be vectorized in `x` and `y`. For an
 52 | #'   example of an appropriate choice of `fun`, see the final example below.
 53 | #'
 54 | #' @returns
 55 | #'
 56 | #' `get_hdr` returns a list with elements `df_est` (`data.frame`), `breaks`
 57 | #' (named `numeric`), and `data` (`data.frame`).
 58 | #'
 59 | #' * `df_est`: the estimated HDRs and density evaluated on the grid defined by `rangex`, `rangey`, and `n`.
 60 | #' The column of estimated HDRs (`df_est$hdr`) is a numeric vector with values
 61 | #' from `probs`. The columns `df_est$fhat` and `df_est$fhat_discretized`
 62 | #' correspond to the estimated density on the original scale and rescaled to sum
 63 | #' to 1, respectively.
 64 | #'
 65 | #' * `breaks`: the heights of the estimated density (`df_est$fhat`) corresponding to the HDRs specified by `probs`.
 66 | #' Will always have additional element `Inf` representing the cutoff for the
 67 | #' 100% HDR.
 68 | #'
 69 | #' * `data`: the original data provided in the `data` argument.
 70 | #' If `hdr_membership` is set to `TRUE`, this includes a column
 71 | #' (`data$hdr_membership`) with the HDR corresponding to each data point.
 72 | #'
 73 | #' @examples
 74 | #' df <- data.frame(x = rnorm(1e3), y = rnorm(1e3))
 75 | #'
 76 | #' # Two ways to specify `method`
 77 | #' get_hdr(df, method = "kde")
 78 | #' get_hdr(df, method = method_kde())
 79 | #'
 80 | #' \dontrun{
 81 | #'
 82 | #' # If parenthesis are omitted, `get_hdr()` errors
 83 | #' get_hdr(df, method = method_kde)
 84 | #' }
 85 | #'
 86 | #' # Estimate different HDRs with `probs`
 87 | #' get_hdr(df, method = method_kde(), probs = c(.975, .6, .2))
 88 | #'
 89 | #' # Adjust estimator parameters with arguments to `method_kde()`
 90 | #' get_hdr(df, method = method_kde(h = 1))
 91 | #'
 92 | #' # Parametric normal estimator of density
 93 | #' get_hdr(df, method = "mvnorm")
 94 | #' get_hdr(df, method = method_mvnorm())
 95 | #'
 96 | #' # Compute "population" HDRs of specified bivariate pdf with `method = "fun"`
 97 | #' f <- function(x, y, sd_x = 1, sd_y = 1) dnorm(x, sd = sd_x) * dnorm(y, sd = sd_y)
 98 | #'
 99 | #' get_hdr(
100 | #'   method = "fun", fun = f,
101 | #'   rangex = c(-5, 5), rangey = c(-5, 5)
102 | #'  )
103 | #'
104 | #' get_hdr(
105 | #'   method = "fun", fun = f,
106 | #'   rangex = c(-5, 5), rangey = c(-5, 5),
107 | #'   args = list(sd_x = .5, sd_y = .5) # specify additional arguments w/ `args`
108 | #' )
109 | #'
110 | #' @export
111 | get_hdr <- function(data = NULL, method = "kde", probs = c(.99, .95, .8, .5), n = 100, rangex = NULL, rangey = NULL, hdr_membership = TRUE, fun, args = list()) {
112 | 
113 |   # Deal with missing data argument
114 |   if (is.null(data)) {
115 |     if (!is.character(method) | (is.character(method) && method != "fun")) {
116 |       stop('`data` must be provided unless `method = "fun"`')
117 |     } else {
118 |       if (is.null(rangex) | is.null(rangey)) {
119 |         stop('If `data` is unspecified, `rangex` and `rangey` must be provided when `method = "fun"`')
120 |       }
121 |     }
122 |   }
123 | 
124 |   rangex <- rangex %||% range(data$x)
125 |   rangey <- rangey %||% range(data$y)
126 | 
127 |   probs <- fix_probs(probs)
128 | 
129 |   # Create df_est (estimated density evaluated on a grid) depending on specified method:
130 |   if (is.character(method) && method == "fun") {
131 | 
132 |     df_est <- f_est(method = NULL, n = n, rangex = rangex, rangey = rangey, fun = fun, args = args)
133 | 
134 |   } else  {
135 | 
136 |     if (is.character(method)) {
137 | 
138 |       if (!method %in% c("kde", "mvnorm", "histogram", "freqpoly")) stop("Invalid method specified")
139 | 
140 |       # If method is provided as a character, re-assign correct function output:
141 |       method <- switch(method,
142 |         "kde"       = method_kde(),
143 |         "histogram" = method_histogram(),
144 |         "freqpoly"  = method_freqpoly(),
145 |         "mvnorm"    = method_mvnorm()
146 |       )
147 | 
148 |     }
149 | 
150 |     # parse args of method to determine strategy of `method`
151 |     method_formals <- names(formals(method))
152 | 
153 |     # If `data` is the only argument to `method`, we know `method`
154 |     # is a function factory, returning a closure of pdf in terms of x, y:
155 |     if (length(method_formals) == 1 && method_formals == "data") {
156 | 
157 |       df_est <- f_est(method, data, n, rangex, rangey)
158 | 
159 |     # Otherwise `method` computes a grid for us, shortcutting
160 |     # representing pdf in terms of x, y:
161 |     } else if (length(method_formals) == 4 && all(method_formals == c("data", "n", "rangex", "rangey"))) {
162 | 
163 |       df_est <- method(data, n, rangex, rangey)
164 | 
165 |     } else {
166 | 
167 |       stop("Invalid `method` argument -- did you forget the `()`?")
168 | 
169 |     }
170 | 
171 |   }
172 | 
173 | 
174 |   # remove unneeded attributes
175 |   attr(df_est, "out.attrs") <- NULL
176 | 
177 |   # Manipulate df_est to get information about HDRs:
178 | 
179 |   # force estimate to integrate to 1
180 |   df_est$fhat_discretized <- normalize(df_est$fhat)
181 | 
182 |   # temporarily rescale df$fhat for stability
183 |   fhat_max <- max(df_est$fhat)
184 |   df_est$fhat <- df_est$fhat / fhat_max
185 | 
186 |   # find cutoffs (in terms of rescaled fhat)
187 |   breaks <- c(find_cutoff(df_est, probs), Inf)
188 | 
189 |   # find HDRs for points in the grid
190 |   df_est$hdr <- vapply(df_est$fhat, get_hdr_val, numeric(1), breaks, probs)
191 | 
192 |   # find hdr membership of points from data
193 |   if (!is.null(data) & hdr_membership) {
194 |     data$hdr_membership <- mapply(get_hdr_membership, data$x, data$y, MoreArgs = list(df_est, breaks, probs), SIMPLIFY = TRUE)
195 |   }
196 | 
197 |   # transforming df_est$fhat and breaks back to original scale:
198 |   df_est$fhat <- df_est$fhat * fhat_max
199 |   breaks <- breaks * fhat_max
200 | 
201 |   # Give breaks nicely formatted names, corresponding to HDRs:
202 |   names(breaks) <- scales::percent_format(accuracy = 1)(probs)
203 | 
204 |   # bundle everything together
205 |   list(
206 |     df_est = df_est,
207 |     breaks = breaks,
208 |     data = data
209 |   )
210 | 
211 | }
212 | 
213 | fix_probs <- function(probs) {
214 |   stopifnot("Probabilities must be between 0 and 1, exclusive" = all(probs > 0) & all(probs < 1))
215 | 
216 |   sort(probs, decreasing = TRUE)
217 | }
218 | 
219 | get_hdr_val <- function(fhat, breaks, probs) {
220 |   hdrs <- which(fhat >= breaks)
221 |   if (length(hdrs) == 0) return(1)
222 |   probs[max(hdrs)]
223 | }
224 | 
225 | get_hdr_membership <- function(x, y, df_est, breaks, probs) {
226 |   df_est$dist <- (x - df_est$x)^2 + (y - df_est$y)^2
227 |   fhat <- df_est[which.min(df_est$dist), "fhat"]
228 | 
229 |   get_hdr_val(fhat, breaks, probs)
230 | }
231 | 
232 | 
233 | # method is a function of data
234 | # fun is a function of vectors x, y
235 | f_est <- function(method, data, n, rangex, rangey, fun = NULL, args = list()) {
236 | 
237 |   # If `fun` isn't specified, method returns a closure
238 |   # representing closed form of density estimate
239 |   fun <- fun %||% method(data)
240 | 
241 |   # grid to evaluate fun
242 |   df <- expand.grid(
243 |     "x" = seq(rangex[1], rangex[2], length.out = n),
244 |     "y" = seq(rangey[1], rangey[2], length.out = n)
245 |   )
246 | 
247 |   # evaluate method on the grid, f required to be vectorized in x, y:
248 |   # (args is only non-empty if fun was specified)
249 |   df$fhat <- do.call(fun, c(quote(df$x), quote(df$y), args))
250 | 
251 |   df
252 | 
253 | }
254 | 
255 | 
256 | 


--------------------------------------------------------------------------------
/R/hdr_rug.R:
--------------------------------------------------------------------------------
  1 | #' Rug plots of marginal highest density region estimates
  2 | #'
  3 | #' Perform 1D density estimation, compute and plot the resulting highest density
  4 | #' regions in a way similar to [ggplot2::geom_rug()].
  5 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default.
  6 | #'
  7 | #' @section Aesthetics: geom_hdr_rug understands the following aesthetics (required
  8 | #'   aesthetics are in bold):
  9 | #'
 10 | #'   - x
 11 | #'   - y
 12 | #'   - alpha
 13 | #'   - fill
 14 | #'   - group
 15 | #'   - subgroup
 16 | #'
 17 | #' @section Computed variables:
 18 | #'
 19 | #'   \describe{ \item{probs}{The probability of the highest density region, specified
 20 | #'   by `probs`, corresponding to each point.} }
 21 | #'
 22 | #' @inheritParams ggplot2::geom_rug
 23 | #' @inheritParams stat_hdr
 24 | #' @param method,method_y Density estimator(s) to use.
 25 | #'   By default `method` is used for both x- and y-axis.
 26 | #'   If specified, `method_y` will be used for y-axis.
 27 | #'   Accepts character vector: `"kde"`,`"histogram"`, `"freqpoly"`, or `"norm"`.
 28 | #'   Alternatively accepts functions  which return closures corresponding to density estimates,
 29 | #'   see `?get_hdr_1d` or `vignette("method", "ggdensity")`.
 30 | #' @name geom_hdr_rug
 31 | #' @rdname geom_hdr_rug
 32 | #'
 33 | #' @import ggplot2
 34 | #'
 35 | #' @examples
 36 | #' set.seed(1)
 37 | #' df <- data.frame(x = rnorm(100), y = rnorm(100))
 38 | #'
 39 | #' # Plot marginal HDRs for bivariate data
 40 | #' ggplot(df, aes(x, y)) +
 41 | #'   geom_point() +
 42 | #'   geom_hdr_rug() +
 43 | #'   coord_fixed()
 44 | #'
 45 | #' ggplot(df, aes(x, y)) +
 46 | #'   geom_hdr() +
 47 | #'   geom_hdr_rug() +
 48 | #'   coord_fixed()
 49 | #'
 50 | #' # Plot HDR for univariate data
 51 | #' ggplot(df, aes(x)) +
 52 | #'   geom_density() +
 53 | #'   geom_hdr_rug()
 54 | #'
 55 | #' ggplot(df, aes(y = y)) +
 56 | #'   geom_density() +
 57 | #'   geom_hdr_rug()
 58 | #'
 59 | #' # Specify location of marginal HDRs as in ggplot2::geom_rug()
 60 | #' ggplot(df, aes(x, y)) +
 61 | #'   geom_hdr() +
 62 | #'   geom_hdr_rug(sides = "tr", outside = TRUE) +
 63 | #'   coord_fixed(clip = "off")
 64 | #'
 65 | #' # Can use same methods of density estimation as geom_hdr().
 66 | #' # For data with constrained support, we suggest setting method = "histogram":
 67 | #' ggplot(df, aes(x^2)) +
 68 | #'  geom_histogram(bins = 30, boundary = 0) +
 69 | #'  geom_hdr_rug(method = "histogram")
 70 | #'
 71 | #' ggplot(df, aes(x^2, y^2)) +
 72 | #'  geom_hdr(method = "histogram") +
 73 | #'  geom_hdr_rug(method = "histogram") +
 74 | #'  coord_fixed()
 75 | #'
 76 | NULL
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | #' @rdname geom_hdr_rug
 84 | #' @export
 85 | stat_hdr_rug <- function(mapping = NULL, data = NULL,
 86 |                          geom = "hdr_rug", position = "identity",
 87 |                          ...,
 88 |                          method = "kde",
 89 |                          method_y = "kde",
 90 |                          probs = c(.99, .95, .8, .5),
 91 |                          xlim = NULL,
 92 |                          ylim = NULL,
 93 |                          n = 512,
 94 |                          na.rm = FALSE,
 95 |                          show.legend = TRUE,
 96 |                          inherit.aes = TRUE) {
 97 |   layer(
 98 |     data = data,
 99 |     mapping = mapping,
100 |     stat = StatHdrRug,
101 |     geom = geom,
102 |     position = position,
103 |     show.legend = show.legend,
104 |     inherit.aes = inherit.aes,
105 |     params = list(
106 |       method = method,
107 |       method_y = method_y,
108 |       probs = probs,
109 |       xlim = xlim,
110 |       ylim = ylim,
111 |       n = n,
112 |       na.rm = na.rm,
113 |       ...
114 |     )
115 |   )
116 | }
117 | 
118 | 
119 | 
120 | #' @rdname geom_hdr_rug
121 | #' @format NULL
122 | #' @usage NULL
123 | #' @export
124 | StatHdrRug <- ggproto("StatHdrRug", Stat,
125 | 
126 |   required_aes = c("x|y"),
127 |   default_aes = aes(alpha = after_stat(probs)),
128 | 
129 |   compute_group = function(data, scales, na.rm = FALSE,
130 |                            method = "kde", method_y = NULL,
131 |                            probs = c(.99, .95, .8, .5),
132 |                            xlim = NULL, ylim = NULL, n = 512) {
133 | 
134 |     # Recycle for both x, y
135 |     if (length(n) == 1) n <- rep(n, 2)
136 | 
137 |     # If no alternative method_y, use method
138 |     if (is.null(method_y)) method_y <- method
139 | 
140 | 
141 |     # Estimate marginal densities
142 | 
143 |     # Initialize dfs for x and y axes,
144 |     # in case only x or y are supplied:
145 |     df_x <- data.frame()
146 |     df_y <- data.frame()
147 | 
148 |     if (!is.null(data$x)) {
149 | 
150 |       rangex <- xlim %||% scales$x$dimension()
151 | 
152 |       res_x <- get_hdr_1d(data$x, method, probs, n[1], rangex, hdr_membership = FALSE)
153 | 
154 |       df_x <- res_to_df_1d(res_x, probs, data$group[1], output = "rug")
155 | 
156 |       # Needs correct name for ggplot2 internals
157 |       df_x$axis <- "x"
158 |       df_x$y <- NA
159 | 
160 |     }
161 | 
162 | 
163 |     if (!is.null(data$y)) {
164 | 
165 |       rangey <- ylim %||% scales$y$dimension()
166 | 
167 |       res_y <- get_hdr_1d(data$y, method_y, probs, n[2], rangey, hdr_membership = FALSE)
168 | 
169 |       df_y <- res_to_df_1d(res_y, probs, data$group[1], output = "rug")
170 | 
171 |       # Needs correct name for ggplot2 internals
172 |       df_y$axis <- "y"
173 |       df_y$y <- df_y$x
174 |       df_y$x <- NA
175 | 
176 |     }
177 | 
178 |     df <- rbind(df_x, df_y)
179 | 
180 |     # Need to remove extra col if only plotting x or y rug
181 |     if (is.null(data$x)) df$x <- NULL
182 |     if (is.null(data$y)) df$y <- NULL
183 | 
184 |     df
185 | 
186 |     }
187 |   )
188 | 
189 | 
190 | res_to_df_1d <- function(res, probs, group, output) {
191 | 
192 |   probs <- fix_probs(probs)
193 | 
194 |   if (output == "rug") {
195 | 
196 |     probs_formatted <- scales::percent_format(accuracy = 1)(probs)
197 | 
198 |     df <- res$df_est
199 | 
200 |     # alpha will be mapped to df$probs
201 |     df$probs <- scales::percent_format(accuracy = 1)(df$hdr)
202 |     df$probs <- ordered(df$probs, levels = probs_formatted)
203 |     df$hdr <- NULL
204 | 
205 |     # Discard 100% HDR if it's not in probs:
206 |     df <- df[!is.na(df$probs),]
207 | 
208 |   }
209 | 
210 |   df
211 | 
212 | }
213 | 
214 | 
215 | 
216 | #' @rdname geom_hdr_rug
217 | #' @export
218 | geom_hdr_rug <- function(mapping = NULL, data = NULL,
219 |                          stat = "hdr_rug", position = "identity",
220 |                          ...,
221 |                          outside = FALSE,
222 |                          sides = "bl",
223 |                          length = unit(0.03, "npc"),
224 |                          na.rm = FALSE,
225 |                          show.legend = TRUE,
226 |                          inherit.aes = TRUE) {
227 |   layer(
228 |     data = data,
229 |     mapping = mapping,
230 |     stat = stat,
231 |     geom = GeomHdrRug,
232 |     position = position,
233 |     show.legend = show.legend,
234 |     inherit.aes = inherit.aes,
235 |     params = list(
236 |       outside = outside,
237 |       sides = sides,
238 |       length = length,
239 |       na.rm = na.rm,
240 |       ...
241 |     )
242 |   )
243 | }
244 | 
245 | 
246 | #' @rdname geom_hdr_rug
247 | #' @format NULL
248 | #' @usage NULL
249 | #' @export
250 | GeomHdrRug <- ggproto("GeomHdrRug", Geom,
251 |    optional_aes = c("x", "y"),
252 | 
253 |    draw_panel = function(data, panel_params, coord, sides = "bl",
254 |                          outside = FALSE, length = unit(0.03, "npc")) {
255 | 
256 |      if (!inherits(length, "unit")) {
257 |        abort("'length' must be a 'unit' object.")
258 |      }
259 |      rugs <- list()
260 | 
261 |      # For coord_flip, coord$tranform does not flip the sides where to
262 |      # draw the rugs. We have to flip them.
263 |      if (inherits(coord, 'CoordFlip')) {
264 |        sides <- chartr('tblr', 'rlbt', sides)
265 |      }
266 | 
267 |      # move the rug to outside the main plot space
268 |      if (outside) length <- -length
269 | 
270 |      # Set up data frames for x and y:
271 |      data_x <- data[data$axis == "x",]
272 |      data_y <- data[data$axis == "y",]
273 | 
274 | 
275 |      if (nrow(data_x) > 0) {
276 | 
277 |        data_x <- coord$transform(data_x, panel_params)
278 |        data_x$width <- resolution(data_x$x, FALSE)
279 | 
280 |        gp_x <- grid::gpar(
281 |          col = alpha(data_x$fill, data_x$alpha),
282 |          fill = alpha(data_x$fill, data_x$alpha),
283 |          lwd = 0
284 |        )
285 | 
286 |        # set up x axis rug rasters
287 |        if (grepl("b", sides)) {
288 |          rugs$x_b <- grid::rectGrob(
289 |            x = unit(data_x$x, "native"),
290 |            y = unit(0, "npc"),
291 |            width = data_x$width,
292 |            height = length,
293 |            just = "bottom",
294 |            gp = gp_x
295 |          )
296 |        }
297 | 
298 |        if (grepl("t", sides)) {
299 |          rugs$x_t <- grid::rectGrob(
300 |            x = unit(data_x$x, "native"),
301 |            y = unit(1, "npc"),
302 |            width = data_x$width,
303 |            height = length,
304 |            just = "top",
305 |            gp = gp_x
306 |          )
307 |        }
308 |      }
309 | 
310 |      if (nrow(data_y) > 0) {
311 | 
312 |        data_y <- coord$transform(data_y, panel_params)
313 |        data_y$height <- resolution(data_y$y, FALSE)
314 | 
315 |        gp_y <- grid::gpar(
316 |          col = alpha(data_y$fill, data_y$alpha),
317 |          fill = alpha(data_y$fill, data_y$alpha),
318 |          lwd = 0
319 |        )
320 | 
321 | 
322 |        # set up y axis rug rasters
323 |        if (grepl("l", sides)) {
324 |          rugs$y_l <- grid::rectGrob(
325 |            x = unit(0, "npc"),
326 |            y = unit(data_y$y, "native"),
327 |            width = length,
328 |            height = data_y$height,
329 |            just = "left",
330 |            gp = gp_y
331 |          )
332 |        }
333 | 
334 |        if (grepl("r", sides)) {
335 |          rugs$y_r <- grid::rectGrob(
336 |            x = unit(1, "npc"),
337 |            y = unit(data_y$y, "native"),
338 |            width = length,
339 |            height = data_y$height,
340 |            just = "right",
341 |            gp = gp_y
342 |          )
343 |        }
344 | 
345 |      }
346 | 
347 |      grid::gTree(children = do.call(grid::gList, rugs))
348 | 
349 |    },
350 | 
351 |   default_aes = aes(fill = "grey20", alpha = NA),
352 | 
353 |   draw_key = draw_key_rect
354 | )
355 | 
356 | 
357 | 
358 | 
359 | 
360 | 


--------------------------------------------------------------------------------
/R/method.R:
--------------------------------------------------------------------------------
  1 | # methods that return est pdf as closure  ---------------------------------
  2 | 
  3 | #' Bivariate parametric normal HDR estimator
  4 | #'
  5 | #' Function used to specify bivariate normal density estimator
  6 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`).
  7 | #'
  8 | #' For more details on the use and implementation of the `method_*()` functions,
  9 | #' see `vignette("method", "ggdensity")`.
 10 | #'
 11 | #' @examples
 12 | #' # Normal estimator is useful when an assumption of normality is appropriate
 13 | #' set.seed(1)
 14 | #' df <- data.frame(x = rnorm(1e3), y = rnorm(1e3))
 15 | #'
 16 | #' ggplot(df, aes(x, y)) +
 17 | #'   geom_hdr(method = method_mvnorm(), xlim = c(-4, 4), ylim = c(-4, 4)) +
 18 | #'   geom_point(size = 1)
 19 | #'
 20 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs
 21 | #' res <- get_hdr(df, method = method_mvnorm())
 22 | #' str(res)
 23 | #'
 24 | #' @export
 25 | method_mvnorm <- function() {
 26 | 
 27 |   function(data) {
 28 | 
 29 |     data_matrix <- with(data, cbind(x, y))
 30 |     mu_hat <- colMeans(data_matrix)
 31 |     R <- chol(cov(data_matrix)) # R'R = crossprod(R) = S
 32 | 
 33 |     function(x, y) {
 34 |       X <- cbind(x, y)
 35 |       tmp <- backsolve(R, t(X) - mu_hat, transpose = TRUE)
 36 |       logretval <- -sum(log(diag(R))) - log(2 * pi) - 0.5 * colSums(tmp^2)
 37 |       exp( logretval )
 38 |     }
 39 | 
 40 |   }
 41 | 
 42 | }
 43 | 
 44 | # methods that return closures that compute a grid ------------------------
 45 | 
 46 | #' Bivariate kernel density HDR estimator
 47 | #'
 48 | #' Function used to specify bivariate kernel density estimator
 49 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`).
 50 | #'
 51 | #' For more details on the use and implementation of the `method_*()` functions,
 52 | #' see `vignette("method", "ggdensity")`.
 53 | #'
 54 | #' @inheritParams ggplot2::stat_density2d
 55 | #'
 56 | #' @examples
 57 | #' set.seed(1)
 58 | #' df <- data.frame(x = rnorm(1e3, sd = 3), y = rnorm(1e3, sd = 3))
 59 | #'
 60 | #' ggplot(df, aes(x, y)) +
 61 | #'   geom_hdr(method = method_kde()) +
 62 | #'   geom_point(size = 1)
 63 | #'
 64 | #' # The defaults of `method_kde()` are the same as the estimator for `ggplot2::geom_density_2d()`
 65 | #' ggplot(df, aes(x, y)) +
 66 | #'   geom_density_2d_filled() +
 67 | #'   geom_hdr_lines(method = method_kde(), probs = seq(.1, .9, by = .1)) +
 68 | #'   theme(legend.position = "none")
 69 | #'
 70 | #' # The bandwidth of the estimator can be set directly with `h` or scaled with `adjust`
 71 | #' ggplot(df, aes(x, y)) +
 72 | #'   geom_hdr(method = method_kde(h = 1)) +
 73 | #'   geom_point(size = 1)
 74 | #'
 75 | #' ggplot(df, aes(x, y)) +
 76 | #'   geom_hdr(method = method_kde(adjust = 1/2)) +
 77 | #'   geom_point(size = 1)
 78 | #'
 79 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs
 80 | #' res <- get_hdr(df, method = method_kde())
 81 | #' str(res)
 82 | #'
 83 | #' @export
 84 | method_kde <- function(h = NULL, adjust = c(1, 1)) {
 85 | 
 86 |   function(data, n, rangex, rangey) {
 87 | 
 88 |     if (is.null(h)) {
 89 |       h <- c(MASS::bandwidth.nrd(data$x), MASS::bandwidth.nrd(data$y))
 90 |     }
 91 | 
 92 |     h <- h * adjust
 93 | 
 94 |     kdeout <- MASS::kde2d(
 95 |       x = data$x, y = data$y, n = n, h = h,
 96 |       lims = c(rangex, rangey)
 97 |     )
 98 | 
 99 |     df <- with(kdeout, expand.grid("x" = x, "y" = y))
100 |     df$fhat <- as.vector(kdeout$z)
101 | 
102 |     df
103 | 
104 |   }
105 | }
106 | 
107 | #' Bivariate histogram HDR estimator
108 | #'
109 | #' Function used to specify bivariate histogram density estimator
110 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`).
111 | #'
112 | #' For more details on the use and implementation of the `method_*()` functions,
113 | #' see `vignette("method", "ggdensity")`.
114 | #'
115 | #' @param bins Number of bins along each axis.
116 | #'   Either a vector of length 2 or a scalar value which is recycled for both dimensions.
117 | #'   Defaults to normal reference rule (Scott, pg 87).
118 | #' @param smooth If `TRUE`, HDRs are smoothed by the marching squares algorithm.
119 | #' @param nudgex,nudgey Horizontal and vertical rules for choosing witness points when `smooth == TRUE`.
120 | #'   Accepts character vector: `"left"`, `"none"`, `"right"` (`nudgex`) or  `"down"`, `"none"`, `"up"` (`nudgey`).
121 | #'
122 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley.
123 | #'
124 | #' @examples
125 | #' \dontrun{
126 | #'
127 | #' # Histogram estimators can be useful when data has boundary constraints
128 | #' set.seed(1)
129 | #' df <- data.frame(x = rexp(1e3), y = rexp(1e3))
130 | #'
131 | #' ggplot(df, aes(x, y)) +
132 | #'   geom_hdr(method = method_histogram()) +
133 | #'   geom_point(size = 1)
134 | #'
135 | #' # The resolution of the histogram estimator can be set via `bins`
136 | #' ggplot(df, aes(x, y)) +
137 | #'   geom_hdr(method = method_histogram(bins = c(8, 25))) +
138 | #'   geom_point(size = 1)
139 | #'
140 | #' # By setting `smooth = TRUE`, we can graphically smooth the "blocky" HDRs
141 | #' ggplot(df, aes(x, y)) +
142 | #'   geom_hdr(method = method_histogram(smooth = TRUE)) +
143 | #'   geom_point(size = 1)
144 | #'
145 | #' # However, we need to set `nudgex` and `nudgey` to align the HDRs correctly
146 | #' ggplot(df, aes(x, y)) +
147 | #'   geom_hdr(method = method_histogram(smooth = TRUE, nudgex = "left", nudgey = "down")) +
148 | #'   geom_point(size = 1)
149 | #'
150 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs
151 | #' res <- get_hdr(df, method = method_histogram())
152 | #' str(res)
153 | #' }
154 | #'
155 | #' @export
156 | method_histogram <- function(bins = NULL, smooth = FALSE, nudgex = "none", nudgey = "none") {
157 | 
158 |   # n is an argument, but it is not used
159 |   function(data, n, rangex, rangey) {
160 | 
161 |     if (is.null(bins)) {
162 |       bins <- numeric(2)
163 | 
164 |       # define histogram mesh according to Scott p. 87
165 |       rho <- cor(data$x, data$y)
166 |       hx <- 3.504 * sd(data$x) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4)
167 |       hy <- 3.504 * sd(data$y) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4)
168 |       bins[1] <- round((rangex[2] - rangex[1]) / hx)
169 |       bins[2] <- round((rangey[2] - rangey[1]) / hy)
170 | 
171 |     } else if (length(bins == 1)) {
172 |       bins <- rep(bins, 2)
173 |     }
174 | 
175 |     xvals <- data$x
176 |     yvals <- data$y
177 | 
178 |     xbtwn <- (rangex[1] <= xvals & xvals <= rangex[2])
179 |     if (!all(xbtwn)) {
180 |       xvals <- xvals[xbtwn]
181 |       yvals <- yvals[xbtwn]
182 |     }
183 | 
184 |     ybtwn <- (rangey[1] <= yvals & yvals <= rangey[2])
185 |     if (!all(ybtwn)) {
186 |       xvals <- xvals[ybtwn]
187 |       yvals <- yvals[ybtwn]
188 |     }
189 | 
190 |     sx <- seq(rangex[1], rangex[2], length.out = bins[1] + 1)
191 |     sy <- seq(rangey[1], rangey[2], length.out = bins[2] + 1)
192 |     de_x <- sx[2] - sx[1]
193 |     de_y <- sy[2] - sy[1]
194 |     box_area <- de_x * de_y
195 | 
196 |     xbin_mdpts <- sx[-(bins[1]+1)] + de_x/2
197 |     ybin_mdpts <- sy[-(bins[2]+1)] + de_y/2
198 | 
199 |     xleft <- sx[-(bins[1]+1)]
200 |     xright <- sx[-1]
201 | 
202 |     ybottom <- sy[-(bins[2]+1)]
203 |     ytop <- sy[-1]
204 | 
205 | 
206 |     df_cuts <- data.frame("xbin" = cut(xvals, sx), "ybin" = cut(yvals, sy))
207 | 
208 |     df <- with(df_cuts, expand.grid("xbin" = levels(xbin), "ybin" = levels(ybin)))
209 |     df$n <- with(df_cuts, as.vector(table(xbin, ybin)))
210 | 
211 |     df$xbin_midpt <- xbin_mdpts[as.integer(df$xbin)]
212 |     df$ybin_midpt <- ybin_mdpts[as.integer(df$ybin)]
213 | 
214 |     df$xmin <- df$xbin_midpt - de_x/2
215 |     df$xmax <- df$xbin_midpt + de_x/2
216 |     df$de_x <- de_x
217 | 
218 |     df$ymin <- df$ybin_midpt - de_y/2
219 |     df$ymax <- df$ybin_midpt + de_y/2
220 |     df$de_y <- de_y
221 | 
222 |     df$fhat <- with(df, n / (sum(n) * box_area))
223 | 
224 | 
225 |     if (smooth) {
226 | 
227 |       if(nudgex == "left") df$x <- df$xmin
228 |       if(nudgex == "none") df$x <- df$xbin_midpt
229 |       if(nudgex == "right") df$x <- df$xmax
230 | 
231 |       if(nudgey == "down") df$y <- df$ymin
232 |       if(nudgey == "none") df$y <- df$ybin_midpt
233 |       if(nudgey == "up") df$y <- df$ymax
234 | 
235 |     } else {
236 | 
237 |       # No nudging if not smoothing
238 |       df$x <- df$xbin_midpt
239 |       df$y <- df$ybin_midpt
240 | 
241 |       # Evaluate histogram on a grid
242 |       # For xyz_to_iso* funs, need tightly packed values for good isobands/lines
243 |       # k*k points per histogram footprint
244 |       # Higher values of k -> better visuals, more computationally expensive
245 | 
246 |       # Currently determining k heuristically - not based on any theoretical results
247 |       # The necessary value of k seems to be O((bins[1]*bins[2])^(-1/3))
248 |       # found constant which yields k = 50 for bins[1]*bins[2] = 10^2
249 |       k <- if (bins[1] * bins[2] > 10^2) max(floor(225/((bins[1] * bins[2])^(1/3))), 5) else 50
250 | 
251 |       bbins <- bins * k
252 | 
253 |       ssx <- seq(rangex[1], rangex[2], length.out = bbins[1])
254 |       ssy <- seq(rangey[1], rangey[2], length.out = bbins[2])
255 | 
256 |       ddf <- expand.grid(x = ssx, y = ssy)
257 | 
258 |       # Need fhat repeated in very particular way for grid:
259 |       #   e.g.
260 |       #      k = 2
261 |       #      df$fhat = 1, 2,
262 |       #                3, 4
263 |       #     ddf$fhat = 1, 1, 2, 2,
264 |       #                1, 1, 2, 2,
265 |       #                3, 3, 4, 4,
266 |       #                3, 3, 4, 4
267 | 
268 |       # m <- matrix(df$fhat, nrow = bins[2], byrow = TRUE)
269 |       # ddf$fhat <- as.vector(kronecker(m, matrix(1, k, k)))
270 | 
271 |       fhat <- split(df$fhat, factor(rep(1:bins[2], each = bins[1]))) # split into rows
272 |       fhat <- lapply(fhat, function(x) rep(x, each = k)) # repeat within rows (horizontal)
273 |       fhat <- lapply(fhat, function(x) rep(x, times = k)) # repeat rows (vertical)
274 |       fhat <- unlist(fhat) # concatenate
275 |       ddf$fhat <- fhat
276 | 
277 |       df <- ddf
278 |     }
279 | 
280 |     df[c("x", "y", "fhat")]
281 | 
282 |   }
283 | }
284 | 
285 | #' Bivariate frequency polygon HDR estimator
286 | #'
287 | #' Function used to specify bivariate frequency polygon density estimator
288 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`).
289 | #'
290 | #' For more details on the use and implementation of the `method_*()` functions,
291 | #' see `vignette("method", "ggdensity")`.
292 | #'
293 | #' @inheritParams method_histogram
294 | #'
295 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley.
296 | #'
297 | #' @examples
298 | #' set.seed(1)
299 | #' df <- data.frame(x = rnorm(1e3), y = rnorm(1e3))
300 | #'
301 | #' ggplot(df, aes(x, y)) +
302 | #'   geom_hdr(method = method_freqpoly()) +
303 | #'   geom_point(size = 1)
304 | #'
305 | #' # The resolution of the frequency polygon estimator can be set via `bins`
306 | #' ggplot(df, aes(x, y)) +
307 | #'   geom_hdr(method = method_freqpoly(bins = c(8, 25))) +
308 | #'   geom_point(size = 1)
309 | #'
310 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs
311 | #' res <- get_hdr(df, method = method_freqpoly())
312 | #' str(res)
313 | #'
314 | #' @export
315 | method_freqpoly <- function(bins = NULL) {
316 | 
317 |   # n is an argument, but it is not used
318 |   function(data, n, rangex, rangey) {
319 | 
320 |     if (is.null(bins)) {
321 |       bins <- numeric(2)
322 | 
323 |       # define histogram mesh according to Scott p. 87
324 |       # TODO: fill in with rules for frequency polygons
325 |       rho <- cor(data$x, data$y)
326 |       hx <- 3.504 * sd(data$x) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4)
327 |       hy <- 3.504 * sd(data$y) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4)
328 |       bins[1] <- round((rangex[2] - rangex[1]) / hx)
329 |       bins[2] <- round((rangey[2] - rangey[1]) / hy)
330 | 
331 |     } else {
332 |       if (length(bins == 1)) bins <- rep(bins, 2)
333 |     }
334 | 
335 |     xvals <- data$x
336 |     yvals <- data$y
337 | 
338 |     xbtwn <- (rangex[1] <= xvals & xvals <= rangex[2])
339 |     if (!all(xbtwn)) {
340 |       xvals <- xvals[xbtwn]
341 |       yvals <- yvals[xbtwn]
342 |     }
343 | 
344 |     ybtwn <- (rangey[1] <= yvals & yvals <= rangey[2])
345 |     if (!all(ybtwn)) {
346 |       xvals <- xvals[ybtwn]
347 |       yvals <- yvals[ybtwn]
348 |     }
349 | 
350 | 
351 |     de_x <- (rangex[2] - rangex[1]) / bins[1]
352 |     de_y <- (rangey[2] - rangey[1]) / bins[2]
353 |     rangex[1] <- rangex[1] - de_x
354 |     rangex[2] <- rangex[2] + de_x
355 |     rangey[1] <- rangey[1] - de_y
356 |     rangey[2] <- rangey[2] + de_y
357 |     bins <- bins + 2
358 |     sx <- seq(rangex[1], rangex[2], length.out = bins[1] + 1)
359 |     sy <- seq(rangey[1], rangey[2], length.out = bins[2] + 1)
360 | 
361 | 
362 |     box_area <- de_x * de_y
363 | 
364 |     xbin_mdpts <- sx[-(bins[1]+1)] + de_x/2
365 |     ybin_mdpts <- sy[-(bins[2]+1)] + de_y/2
366 | 
367 |     xleft <- sx[-(bins[1]+1)]
368 |     xright <- sx[-1]
369 | 
370 |     ybottom <- sy[-(bins[2]+1)]
371 |     ytop <- sy[-1]
372 | 
373 | 
374 |     df_cuts <- data.frame("xbin" = cut(xvals, sx), "ybin" = cut(yvals, sy))
375 | 
376 |     df <- with(df_cuts, expand.grid("xbin" = levels(xbin), "ybin" = levels(ybin)))
377 |     df$n <- with(df_cuts, as.vector(table(xbin, ybin)))
378 | 
379 |     df$xbin_midpt <- xbin_mdpts[as.integer(df$xbin)]
380 |     df$ybin_midpt <- ybin_mdpts[as.integer(df$ybin)]
381 | 
382 |     df$xmin <- df$xbin_midpt - de_x/2
383 |     df$xmax <- df$xbin_midpt + de_x/2
384 |     df$de_x <- de_x
385 | 
386 |     df$ymin <- df$ybin_midpt - de_y/2
387 |     df$ymax <- df$ybin_midpt + de_y/2
388 |     df$de_y <- de_y
389 | 
390 |     df$fhat <- with(df, n / (sum(n) * box_area))
391 |     df$fhat_discretized <- normalize(df$fhat)
392 | 
393 |     grid <- expand.grid(
394 |       x = sx[2:bins[1]],
395 |       y = sy[2:bins[2]]
396 |     )
397 | 
398 |     x_midpts <- unique(df$xbin_midpt)
399 |     y_midpts <- unique(df$ybin_midpt)
400 | 
401 |     find_A <- function(coords) {
402 |       x <- coords[[1]]
403 |       y <- coords[[2]]
404 | 
405 |       row <- data.frame(
406 |         x1 = max(x_midpts[x_midpts - x < 0]),
407 |         x2 = min(x_midpts[x_midpts - x >= 0]),
408 |         y1 = max(y_midpts[y_midpts - y < 0]),
409 |         y2 = min(y_midpts[y_midpts - y >= 0])
410 |       )
411 | 
412 |       row$fQ11 <- df[df$xbin_midpt == row$x1 & df$ybin_midpt == row$y1, "fhat"]
413 |       row$fQ21 <- df[df$xbin_midpt == row$x2 & df$ybin_midpt == row$y1, "fhat"]
414 |       row$fQ12 <- df[df$xbin_midpt == row$x1 & df$ybin_midpt == row$y2, "fhat"]
415 |       row$fQ22 <- df[df$xbin_midpt == row$x2 & df$ybin_midpt == row$y2, "fhat"]
416 | 
417 |       xy_mat <- with(row, matrix(c(
418 |         x2 * y2, -x2 * y1, -x1 * y2, x1 * y1,
419 |         -y2, y1, y2, -y1,
420 |         -x2, x2, x1, -x1,
421 |         1, -1, -1, 1
422 |       ), nrow = 4, byrow = TRUE))
423 | 
424 |       A <- with(row,
425 |         1 / ((x2 - x1) * (y2 - y1)) * xy_mat %*% c(fQ11, fQ12, fQ21, fQ22)
426 |       )
427 | 
428 |       row$a00 <- A[1]
429 |       row$a10 <- A[2]
430 |       row$a01 <- A[3]
431 |       row$a11 <- A[4]
432 | 
433 |       row
434 |     }
435 | 
436 | 
437 |     A_list <- apply(grid, 1, find_A, simplify = FALSE)
438 |     df_A <- do.call(rbind, A_list)
439 | 
440 |     coeffs_to_surface <- function(row, k) {
441 |       sx <- seq(row[["x1"]], row[["x2"]], length.out = k)[-k]
442 |       sy <- seq(row[["y1"]], row[["y2"]], length.out = k)[-k]
443 | 
444 |       fit <- function(x, y) row[["a00"]] + row[["a10"]] * x + row[["a01"]] * y + row[["a11"]] * x * y
445 | 
446 |       df <- expand.grid(x = sx, y = sy)
447 |       df$fhat <- fit(df$x, df$y)
448 | 
449 |       df
450 |     }
451 | 
452 | 
453 |     # Currently determining k heuristically - not based on any theoretical results
454 |     # The necessary value of k seems to be O((bins[1]*bins[2])^(-1/4))
455 |     k <- if (bins[1] * bins[2] > 10^2) max(floor(30/((bins[1] * bins[2])^(1/4))), 3) else 10
456 | 
457 |     surface_list <- apply(df_A, 1, coeffs_to_surface, k, simplify = FALSE)
458 |     df <- do.call(rbind, surface_list)
459 | 
460 |     df[c("x","y","fhat")]
461 | 
462 |   }
463 | }
464 | 
465 | 


--------------------------------------------------------------------------------